In [1]:
import pandas as pd
import numpy as np
import pickle
import os
import json
import networkx as nx
from networkx.algorithms.community import greedy_modularity_communities
import matplotlib.pyplot as plt

k_core = nx.read_gexf("../graphs/hashtag_network_k10_nothemes.gexf")

In [2]:
communities = pd.read_csv("../data/themes_10core_20241120_evaluated.csv")

# For k=10 graph
def add_community_info(graph, communities_df):
    # Create community dictionary
    community_dict = communities_df.set_index('hashtag')['theme'].to_dict()
    
    # Add community as node attribute
    nx.set_node_attributes(graph, community_dict, 'community')
    
    # Print some statistics about communities in the graph
    graph_communities = [community_dict.get(node, 'misc') for node in graph.nodes()]
    community_counts = pd.Series(graph_communities).value_counts()
    
    print("Community distribution:")
    for comm, count in community_counts.items():
        print(f"{comm}: {count} nodes")

    # Print nodes with unknown communities
    # unknown_nodes = [node for node in graph.nodes() if community_dict.get(node, 'unknown') == 'unknown']
    # print(f"\nNumber of nodes with unknown community: {len(unknown_nodes)}")
    # if len(unknown_nodes) > 0:
    #     print("Nodes without community assignment:")
    #     print(unknown_nodes)
    return graph

In [3]:
k_core_theme = add_community_info(k_core, communities)

# Export k=5 graph for Gephi
nx.write_gexf(k_core_theme, "../graphs/hashtag_network_k10_withtheme.gexf")

Community distribution:
misc: 569 nodes
awareness_and_advocacy: 279 nodes
health_conditions: 277 nodes
platform: 183 nodes
emotions_and_feelings: 172 nodes
identity_and_community: 155 nodes
commonly_misused_substances: 152 nodes
location: 116 nodes
occupation: 73 nodes
other_substances: 70 nodes
humor: 56 nodes
cannabis: 51 nodes
alcohol: 43 nodes
substance_effects: 41 nodes
consumption_method: 41 nodes
tobacco_nicotine: 31 nodes
cognitive_enhancement: 24 nodes


In [5]:
def create_node_list(graph):
    node_data = []
    
    # For each node in the graph
    for node in graph.nodes():
        # Get community
        community = graph.nodes[node].get('community', 'Unknown')
        
        # Calculate weighted degree
        weighted_degree = sum(w['weight'] for u, v, w in graph.edges(node, data=True))
        
        # Add to list
        node_data.append({
            'Node': node,
            'Community': community,
            'Weighted_Degree': weighted_degree
        })
    
    # Convert to DataFrame and save
    df = pd.DataFrame(node_data)
    df = df.sort_values('Weighted_Degree', ascending=False)  # Optional: sort by weighted degree
    df.to_csv('../data/k10node_list_with_weights.csv', index=False)
    
    return df

In [7]:
node_list_df = create_node_list(k_core)