In [5]:
import sys
sys.path.append('../src')

from graph_model import load_topic_frequencies, keep_top_n_topics, create_user_user_graph, connect_on_IOU
from analyze_graphs import configuration_model, modularity_communities, top_down_communities, extract_topics_from_community
from analyze_graphs import community_topic_evolution, sample_topics, compute_betweenness_graph, compute_community_betweenness, determine_prototype
from utils import plot_graph, get_literal_topics, create_topic_map, load_graph

In [6]:
# Define data paths
user_topic_graph_path = "../data/processed/author_topic_long.txt"
topic_freqs_path = "../data/processed/topic_freq_long.txt"
user_user_graph_path = "../data/processed/user_user_long.txt"
topics_path = "../data/processed/topics_long.txt"

## Create user-topic and user-user graphs

In [7]:
# Create user-topic graph
user_topic_graph = load_graph(user_topic_graph_path)
topic_freqs = load_topic_frequencies(topic_freqs_path)
# Keep only top n topics in graph
user_topic_graph = keep_top_n_topics(user_topic_graph, topic_freqs, n=20)

Number of nodes: 139292
Number of edges: 3634301


In [9]:
# Create use-user graph
# user_user_graph = create_user_user_graph(user_topic_graph, connect_on_IOU, out_filename=user_user_graph_path)
user_user_graph = load_graph(user_user_graph_path)

Number of nodes: 2991
Number of edges: 4134418


In [None]:
# Draw crude graph
plot_graph(user_user_graph)

In [None]:
# Load configuration model graph
config_user_user_graph = configuration_model(user_user_graph)

In [None]:
# Draw crude config graph
plot_graph(config_user_user_graph)

### Detect communities in user-user graph

In [10]:
# Compute modularity-maximizing communities for user-user graph
mod_communities = modularity_communities(user_user_graph)
num_communities = len(mod_communities)
print("{} communities".format(num_communities))

2 communities


In [None]:
# Plot communities
plot_graph(user_user_graph, mod_communities)

In [None]:
# Compute modularity-maximizing communities for config graph
config_mod_communities = modularity_communities(config_user_user_graph)
config_num_communities = len(config_mod_communities)
print("{} communities".format(config_num_communities))

In [None]:
# Measure betweenness of each community
graph_betweenness = compute_betweenness_graph(user_user_graph)

In [None]:
for community in mod_communities:
    # Compute betweenness of community
    community_betweenness = compute_community_betweenness(graph_betweenness, community)
    print("Community Betweenness: {}".format(community_betweenness))
    # Extract top topics
    topic_scores = extract_topics_from_community(user_topic_graph, community)
    top_topics = sample_topics(topic_scores, n=10)
    print("Top 10 topics: {}".format(top_topics))

### Analyze communities