In [3]:
! pip install Pillow



In [9]:
import os
import pandas as pd

out_dir = "data"
in_dir = "{}/followers".format(out_dir)
dfs = []

for dirent in os.listdir(in_dir):
    path = os.path.join(in_dir, dirent)

    if not os.path.isfile(path) or not dirent.endswith(".csv") or "_lvl2" in dirent or "_friends" in dirent:
        continue

    print(dirent)
    dfs.append(pd.read_csv(path, lineterminator='\n'))

df_hochschul_friends = pd.read_csv(in_dir + "/hochschulen_friends.csv")
df_hochschul_friends_reverse = df_hochschul_friends.rename(columns={"id": "follower_of", "friend_of": "id"})
df = pd.concat(dfs)
df = pd.concat([df, df_hochschul_friends_reverse])

df[["id", "follower_of"]].sort_values("id", ascending=True).to_csv(
    "{}/direct_followings.csv".format(out_dir),
    index=False,
)

static_df = pd.read_csv("{}/static_users.csv".format(out_dir))

static_df['follower_of'] = static_df['id']

df_hochschul_friends = df_hochschul_friends.rename(columns={"friend_of": "follower_of"})

pd.concat([
    static_df,
    df,
    df_hochschul_friends,
]).drop_duplicates(["id"]).to_csv(
    "{}/direct_users.csv".format(out_dir),
    index=False,
)


fh_dortmund.csv
fh_muenster.csv
HochschuleBO.csv
hsduesseldorf.csv
HSNiederrhein.csv
hsrheinwaal.csv
RWTH.csv
th_koeln.csv


In [10]:
import pandas as pd
import networkx as nx
import random
import numpy as np

nodes = pd.read_csv('data/direct_users.csv')
edges = pd.read_csv('data/direct_followings.csv')

nodes['community'] = ['#FFFFFF'] * len(nodes)
hochschul_ids = [103823788, 84606793, 124155166, 265859722, 2776187059, 928008620, 11053712, 3063800235]
nodes.loc[nodes['id'].isin(hochschul_ids),'community'] = [ '#%02x%02x%02x' % (random.randint(0,255), random.randint(0,255), random.randint(0,255)) for i in range(len(hochschul_ids))]

nodes = nodes[nodes['id'].notna()]

graph = nx.from_pandas_edgelist(edges, source='id', target='follower_of', create_using=nx.DiGraph())
nx.is_directed(graph)
node_attr = nodes.set_index('id').to_dict('index')
nx.set_node_attributes(graph, node_attr)


  nodes = pd.read_csv('data/direct_users.csv')


In [11]:
amount_of_degrees = np.array(sorted(list(set([degree for _, degree in graph.degree()]))))
amount_of_degrees = amount_of_degrees[(amount_of_degrees < 20) & (amount_of_degrees > 1)]
amount_of_degrees

array([ 2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 15])

In [12]:
one_degree_nodes = []
for ids in hochschul_ids:
    out_nodes = [node for (node, val) in graph.out_degree() if (val == 1) and list(graph.out_edges(node))[0][1] == ids]
    in_nodes = [node for (node, val) in graph.in_degree() if (val == 1) and list(graph.in_edges(node))[0][0] == ids]
    one_degree_nodes = list(set(out_nodes + in_nodes))
    updated_nodes = [ {node: {"community": graph.nodes[ids]["community"]}} for node in one_degree_nodes]
    for update in updated_nodes:
        nx.set_node_attributes(graph, update)

In [13]:
def lerp(a, b, t):
    return a*(1 - t) + b*t

In [14]:
from PIL import ImageColor


for degree in amount_of_degrees:
    for out_degree, in_degree in zip(graph.out_degree(), graph.in_degree()):

        if in_degree[1] == degree:
            src = [src for src, _ in list(graph.in_edges(in_degree[0]))]
            colors = [graph.nodes[id]["community"] for id in src]
            mixed = graph.nodes[in_degree[0]]["community"]
            mixed = np.array(list(ImageColor.getcolor(mixed, "RGB")))
            for color in colors:
                target = np.array(list(ImageColor.getcolor(color, "RGB")))
                mixed = lerp(mixed, target, 0.25)

            update = {in_degree[0]: {"community": '#%02x%02x%02x' % tuple(mixed.astype(int))}}
            nx.set_node_attributes(graph, update)

        if out_degree[1] == degree:
            dest = [dest for _, dest in list(graph.out_edges(out_degree[0]))]
            colors = [graph.nodes[id]["community"] for id in dest]
            mixed = graph.nodes[out_degree[0]]["community"]
            mixed = np.array(list(ImageColor.getcolor(mixed, "RGB")))
            for color in colors:
                target = np.array(list(ImageColor.getcolor(color, "RGB")))
                mixed = lerp(mixed, target, 0.5)

            update = {out_degree[0]: {"community": '#%02x%02x%02x' % tuple(mixed.astype(int))}}
            nx.set_node_attributes(graph, update)
                

    

In [15]:
for node in graph.nodes():
    color = ImageColor.getcolor(graph.nodes[node]["community"], 'RGB')
    update = {node: {"viz":{'color': {'r':color[0], 'g':color[1], 'b':color[2],'a':0}}}}
    nx.set_node_attributes(graph, update)

In [16]:
for edge in graph.edges():
    color1 = graph.nodes[edge[0]]["community"]
    color2 = graph.nodes[edge[1]]["community"]
    color1 = ImageColor.getcolor(color1, "RGB")
    color2 = ImageColor.getcolor(color2, "RGB")
    edge_color = lerp(np.array(color1), np.array(color2), 0.5)
    edge_color = edge_color.astype(int)
    update = {edge: {"viz":{'color': {'r':edge_color[0], 'g':edge_color[1], 'b':edge_color[2],'a':0}}}}
    nx.set_edge_attributes(graph, update)
             



In [17]:
#nx.write_graphml(graph, path="./level_alle_hochschulen.graphml")
nx.write_gexf(graph, path="./level_alle_hochschulen.gexf")