In [1]:
import pandas as pd
import networkx as nx
import random
import numpy as np
import os
import networkx.algorithms.community as nx_comm

In [2]:
out_dir = "data"
in_dir = "{}/followers".format(out_dir)
dfs = []

for dirent in os.listdir(in_dir):
    path = os.path.join(in_dir, dirent)

    if not os.path.isfile(path) or not dirent.endswith(".csv") or "_lvl2" in dirent or "_friends" in dirent:
        continue

    print(dirent)
    dfs.append(pd.read_csv(path, lineterminator='\n'))

df_hochschul_friends = pd.read_csv(in_dir + "/hochschulen_friends.csv")
df_hochschul_friends_reverse = df_hochschul_friends.rename(columns={"id": "follower_of", "friend_of": "id"})
df = pd.concat(dfs)
df = pd.concat([df, df_hochschul_friends_reverse])

edges = df[["id", "follower_of"]].sort_values("id", ascending=True)

static_df = pd.read_csv("{}/static_users.csv".format(out_dir))


df_hochschul_friends = df_hochschul_friends.rename(columns={"friend_of": "follower_of"})

nodes = pd.concat([
    static_df,
    df,
    df_hochschul_friends,
]).drop_duplicates(["id"])




fh_dortmund.csv
fh_muenster.csv
HochschuleBO.csv
hsduesseldorf.csv
HSNiederrhein.csv
hsrheinwaal.csv
RWTH.csv
th_koeln.csv


In [3]:

nodes['viz'] = [{'color':{'r':255, 'g':255, 'b':255, 'a':1}}] * len(nodes)
nodes['gender'] = ["unknown"] * len(nodes)
hochschul_ids = [103823788, 84606793, 124155166, 265859722, 2776187059, 928008620, 11053712, 3063800235]

nodes = nodes[nodes['id'].notna()]

nodes = nodes.drop(columns=['withheld_in_countries','url','follower_of', 'created_at', 'has_extended_profile','default_profile','default_profile_image'])
nodes = nodes.astype({'description': 'string', 'screen_name': 'string'})
nodes['name'] =nodes['name'].str.replace(r'\x08|\x1f|\x1e', '')
nodes['location'] =nodes['location'].str.replace(r'\x08|\x1f|\x1e', '')

  nodes['name'] =nodes['name'].str.replace(r'\x08|\x1f|\x1e', '')
  nodes['location'] =nodes['location'].str.replace(r'\x08|\x1f|\x1e', '')


In [4]:
import re

females = []
male = []
for index, row in nodes.iterrows():
    
    if pd.notnull(row['description']):
        isFemale = re.search(r'\bshe/her\b|\bSie/Ihr\b', row['description'])
        if isFemale != None:
            females.append(row['id'])
        isMale = re.search(r'\bhe/him\b|\bEr/Ihn\b', row['description'])
        if isMale != None:
            male.append(row['id'])

print(f"Number of females: {len(females)}")
print(f"Number of males: {len(male)}")

Number of females: 173
Number of males: 157


In [5]:
nodes.loc[nodes['id'].isin(females),('gender')] = 'female'
nodes.loc[nodes['id'].isin(male), ('gender') ]= 'male'

nodes = nodes.drop(columns=['description'])

# Helper Funktion

In [6]:
random.seed(10)

def get_random_Color_hex():
    return {'r' : random.randint(0,255),'g' : random.randint(0,255), 'b' : random.randint(0,255), 'a':1}

def lerp(a, b, t):
    return a*(1 - t) + b*t

In [7]:
def create_graph(edges, nodes, is_directed):
    graph = nx.from_pandas_edgelist(edges, source='id', target='follower_of', create_using= nx.DiGraph() if is_directed else nx.Graph())
    node_attr = nodes.set_index('id').to_dict('index')
    nx.set_node_attributes(graph, node_attr)
    return graph

In [8]:
def interpolate_edge_colors(graph):
    for edge in graph.edges():
        color1 = graph.nodes[edge[0]]["viz"]['color']
        color2 = graph.nodes[edge[1]]["viz"]['color']
        color1 = np.array([color1['r'], color1['g'], color1['b']])
        color2 = np.array([color2['r'], color2['g'], color2['b']])
        edge_color = lerp(np.array(color1), np.array(color2), 0.5)
        edge_color = edge_color.astype(int)
        update = {edge: {"viz":{'color': {'r':edge_color[0], 'g':edge_color[1], 'b':edge_color[2],'a':1}}}}
        nx.set_edge_attributes(graph, update)
    return graph

# Louvain

In [14]:
graph = create_graph(edges=edges, nodes = nodes, is_directed=False)

In [27]:
communities = nx_comm.louvain_communities(graph, resolution = 2.2, seed=123)

In [28]:
print(len(communities))

37


In [29]:
for community in communities:
    color = get_random_Color_hex()
    for id in list(community):
        update = {id: {"viz": {'color':color}}}
        nx.set_node_attributes(graph, update)

In [30]:
graph = interpolate_edge_colors(graph)

In [31]:
os.makedirs("./graphs", exist_ok=True) 
nx.write_gexf(graph, path="./graphs/level1_louvain_zuteilung.gexf")

# Gerichteten Graph mit Knoten Färbung


In [39]:
graph2 = create_graph(edges=edges, nodes = nodes, is_directed=True)

In [40]:
for community in communities:
    color = get_random_Color_hex()
    for id in list(community):
        update = {id: {"viz": {'color':color}}}
        nx.set_node_attributes(graph2, update)

In [41]:
graph2 = interpolate_edge_colors(graph2)

In [42]:
os.makedirs("./graphs", exist_ok=True) 
nx.write_gexf(graph2, path="./graphs/level1_louvain_zuteilung_gerichtet.gexf")

# Vereinfachung

In [9]:
graph = nx.from_pandas_edgelist(edges, source='id', target='follower_of', create_using=nx.DiGraph())

node_attr = nodes.set_index('id').to_dict('index')
nx.set_node_attributes(graph, node_attr)

In [10]:
out_degree_nodes = [node for (node, val) in graph.out_degree() if val == 1 and graph.in_degree(node) == 0]
edges_one_degree = edges[edges['id'].isin(out_degree_nodes)]
edges_one_degree = edges_one_degree.sort_values("follower_of", ascending=True)
print(len(out_degree_nodes))
master_nodes = edges_one_degree.follower_of.unique()

40726


In [11]:
nodes_id_to_unify = []
for node in master_nodes:
    nodes_id_to_unify.append(edges_one_degree[edges_one_degree['follower_of'] == node]['id'].values.tolist()) 
nodes['size'] = nodes['followers_count']

In [12]:
nodes_copy = nodes.copy(deep=False)
edges_copy = edges.copy(deep=False)

In [13]:
unified_node_id = -1
for list_of_nodes,master_node in zip(nodes_id_to_unify, master_nodes):
    
    new_node = pd.DataFrame({
        'id':unified_node_id,
        'id_str':''            ,  
        'name':'follower_of_'+str(master_node), 
        'screen_name': 'follower_of_'+nodes[nodes['id']==master_node]['screen_name'].values[0], 
        'location':'', 
        'protected':'False', 
        'followers_count':0,
        'friends_count':0,
        'listed_count':0,
        'favourites_count':0,
        'verified':'False', 
        'statuses_count':0,
        'viz':{'color':nodes[nodes['id']==master_node]['viz'].values[0]}, 
        'gender':'unknown', 
        'size': len(list_of_nodes) })
    nodes_copy = pd.concat([nodes_copy,new_node], ignore_index=True)
    new_edge = pd.DataFrame({
        'id':[unified_node_id],
        'follower_of': [master_node] })
    edges_copy = pd.concat([edges_copy,new_edge], ignore_index=True)
    unified_node_id -= 1



In [14]:
flatten_node_to_remove = [node for list_of_nodes in nodes_id_to_unify for node in list_of_nodes]
nodes_copy = nodes_copy[~nodes_copy['id'].isin(flatten_node_to_remove)]
edges_copy = edges_copy[~edges_copy['id'].isin(flatten_node_to_remove)]

nodes_copy = nodes_copy.astype({'screen_name': 'string', 'id_str': 'string'})

In [15]:
print(len(nodes))
print(len(edges))
print(len(nodes_copy))
print(len(edges_copy))

45319
51926
4601
11193


In [29]:
graph = create_graph(edges=edges_copy, nodes = nodes_copy, is_directed=False)

In [30]:
communities = nx_comm.louvain_communities(graph, resolution = 1.7, seed=123)

In [31]:
print(len(communities))

29


In [32]:
for community in communities:
    color = get_random_Color_hex()
    for id in list(community):
        update = {id: {"viz": {'color':color}}}
        nx.set_node_attributes(graph, update)

In [33]:
graph2 = create_graph(edges=edges_copy, nodes = nodes_copy, is_directed=True)
for community in communities:
    color = get_random_Color_hex()
    for id in list(community):
        update = {id: {"viz": {'color':color}}}
        nx.set_node_attributes(graph2, update)
graph2 = interpolate_edge_colors(graph2)
os.makedirs("./graphs", exist_ok=True) 
nx.write_gexf(graph2, path="./graphs/level1_louvain_zuteilung_gerichtet_vereinfacht.gexf")