# Libraries

In [1]:
import sys
import os
directory_path = os.path.abspath(os.path.join('..'))
utils_path = os.path.abspath(os.path.join('../utils'))
if directory_path not in sys.path:
    sys.path.append(directory_path)
    sys.path.append(utils_path)

In [2]:
import pandas as pd
import numpy as np
import networkx as nx
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
import warnings
warnings.filterwarnings("ignore")
from utils.Validator import *
from utils.Combinations import *
from utils.NetworkDraw import *
pd.set_option('display.max_columns', None)
sns.set(rc={'figure.figsize':(11.7,8.27)})

# Digraph: User Followers

In [3]:
df_user_follower = pd.read_csv("../data/processed/DiGraph_Followers.csv")
df_user_follower.head()

Unnamed: 0,UserId,FollowerUserId
0,368,993
1,368,1902
2,368,1950
3,368,3258
4,368,3429


In [4]:
GUF = nx.from_pandas_edgelist(df_user_follower, source = "FollowerUserId", target = "UserId", create_using=nx.DiGraph())
print(nx.info(GUF))

Name: 
Type: DiGraph
Number of nodes: 365493
Number of edges: 927918
Average in degree:   2.5388
Average out degree:   2.5388


In [5]:
indeg=GUF.in_degree()
indeg = dict(indeg)
NetworkDraw.get_top_nodes(indeg)

{5309: 16840, 71388: 13751, 708283: 13216, 1723677: 11607, 54836: 10725}

In [6]:
outdeg=GUF.out_degree()
outdeg = dict(outdeg)
NetworkDraw.get_top_nodes(outdeg)

{4333519: 3310, 87114: 2430, 2357264: 1367, 4348347: 1213, 4956446: 905}

In [7]:
#The degree centrality values are normalized by dividing by the maximum possible degree in a simple graph n-1 where n is the number of nodes in G.
degree_centrality =nx.in_degree_centrality(GUF)
NetworkDraw.get_top_nodes(degree_centrality)

{5309: 0.04607487988793188,
 71388: 0.03762325851181421,
 708283: 0.036159478182833,
 1723677: 0.03175719304389699,
 54836: 0.02934400752957657}

In [8]:
pagerank_centrality =nx.pagerank(GUF)
NetworkDraw.get_top_nodes(pagerank_centrality)

{5309: 0.02708929779719563,
 417337: 0.00830070114376981,
 708283: 0.00753479591606326,
 1723677: 0.007476415943191747,
 71388: 0.007169006637838913}

In [9]:
nx.set_node_attributes(GUF, name='in_degree_followers', values=indeg)
nx.set_node_attributes(GUF, name='out_degree_followers', values=outdeg)
nx.set_node_attributes(GUF, name='in_degree_cent_followers', values = degree_centrality)
nx.set_node_attributes(GUF, name='pagerank_cent_followers', values = pagerank_centrality)

In [10]:
nx.write_gexf(GUF, r"../data/processed/GUF.gexf")
del GUF, df_user_follower, indeg, outdeg, degree_centrality, pagerank_centrality

# Graph: Team Members

In [3]:
df_user_team = pd.read_csv("../data/processed/Graph_Teams.csv")
df_user_team.head()

Unnamed: 0,UserId_1,UserId_2,weight
0,10000270,8243312,3
1,10000270,9829581,2
2,1000033,1003122,1
3,1000033,1214207,1
4,1000033,1581982,1


In [4]:
GUT = nx.from_pandas_edgelist(df_user_team, source = "UserId_1", target = "UserId_2",edge_attr=True, create_using=nx.Graph())
print(nx.info(GUT))

Name: 
Type: Graph
Number of nodes: 155730
Number of edges: 196736
Average degree:   2.5266


In [5]:
degrees=GUT.degree()
degrees = dict(degrees)
NetworkDraw.get_top_nodes(degrees)

{54836: 133, 1192776: 110, 18463: 99, 111640: 92, 5309: 90}

In [6]:
degree_centrality =nx.degree_centrality(GUT)
NetworkDraw.get_top_nodes(degree_centrality)

{54836: 0.0008540477367734975,
 1192776: 0.0007063552710156746,
 18463: 0.0006357197439141072,
 111640: 0.0005907698630312915,
 5309: 0.0005779270399219156}

In [7]:
eigenvector_centrality = nx.eigenvector_centrality(GUT,weight="weight")
NetworkDraw.get_top_nodes(eigenvector_centrality)

{637722: 0.7050741444838826,
 2360956: 0.7050741444838826,
 43882: 0.017099873206441504,
 43896: 0.016979948853551405,
 43883: 0.0168582338863929}

In [8]:
nx.set_node_attributes(GUT, name='degree_teams', values=degrees)
nx.set_node_attributes(GUT, name='degree_cent_teams', values=degree_centrality)
nx.set_node_attributes(GUT, name='eigenvector_cent_teams', values = eigenvector_centrality)

In [9]:
nx.write_gexf(GUT, r"../data/processed/GUT.gexf")
del GUT, df_user_team, degrees, degree_centrality, eigenvector_centrality

# Graph: Competition Participant

In [18]:
df_comp_part = pd.read_csv("../data/processed/Graph_CompParticipant.csv")
df_comp_part.head()

Unnamed: 0,UserId_1,UserId_2,weight
0,1000025,10148801,1
1,1000025,10668599,1
2,1000025,1138223,1
3,1000025,1169450,1
4,1000025,1299826,1


In [19]:
GUP = nx.from_pandas_edgelist(df_comp_part, source = "UserId_1", target = "UserId_2",edge_attr=True, create_using=nx.Graph())
print(nx.info(GUP))

Name: 
Type: Graph
Number of nodes: 157158
Number of edges: 10068094
Average degree: 128.1270


In [20]:
degrees=GUP.degree()
degrees = dict(degrees)
NetworkDraw.get_top_nodes(degrees)

{5030586: 44655, 929585: 32448, 2121639: 10856, 18463: 6711, 1283757: 6060}

In [21]:
degree_centrality =nx.degree_centrality(GUP)
NetworkDraw.get_top_nodes(degree_centrality)

{5030586: 0.2841426089833733,
 929585: 0.20646869054512365,
 2121639: 0.06907741939589072,
 18463: 0.042702520409526774,
 1283757: 0.03856016594870098}

In [22]:
eigenvector_centrality = nx.eigenvector_centrality(GUP,weight="weight")
NetworkDraw.get_top_nodes(eigenvector_centrality)

{5755222: 0.09584890581263975,
 5749234: 0.09511932730451683,
 5752077: 0.09324808386352273,
 5119531: 0.09323238260549731,
 5753055: 0.09259691999193165}

In [23]:
nx.set_node_attributes(GUP, name='degree_participant', values=degrees)
nx.set_node_attributes(GUP, name='degree_cent_participant', values=degree_centrality)
nx.set_node_attributes(GUP, name='eigenvector_cent_participant', values = eigenvector_centrality)

In [24]:
nx.write_gexf(GUP, r"../data/processed/GUP.gexf")
del GUP, df_comp_part, degrees, degree_centrality, eigenvector_centrality

# Graph: User Forum Messages

In [25]:
df_user_forum_messages = pd.read_csv("../data/processed/Graph_ForumMessages.csv")
df_user_forum_messages.head()

Unnamed: 0,UserId_1,UserId_2,weight
0,10000014,10030651,7
1,10000014,10135089,1
2,10000014,10215702,1
3,10000014,10275991,4
4,10000014,10277092,1


In [26]:
GUM = nx.from_pandas_edgelist(df_user_forum_messages, source = "UserId_1", target = "UserId_2",edge_attr=True, create_using=nx.Graph())
print(nx.info(GUM))

Name: 
Type: Graph
Number of nodes: 209241
Number of edges: 8085584
Average degree:  77.2849


In [27]:
degrees=GUM.degree()
degrees = dict(degrees)
NetworkDraw.get_top_nodes(degrees)

{3012786: 22855, 75976: 12485, 1723677: 12084, 1245336: 9336, 1106296: 8864}

In [28]:
degree_centrality =nx.degree_centrality(GUM)
NetworkDraw.get_top_nodes(degree_centrality)

{3012786: 0.1092286369718983,
 75976: 0.059668323456318104,
 1723677: 0.05775186388835787,
 1245336: 0.044618619766774996,
 1106296: 0.04236283693366469}

In [29]:
eigenvector_centrality = nx.eigenvector_centrality(GUM,weight="weight")
NetworkDraw.get_top_nodes(eigenvector_centrality)

{3012786: 0.3870537519595844,
 3399844: 0.24673694975222174,
 3490494: 0.24550454055507087,
 5274255: 0.20054791210175402,
 4956446: 0.1922082501348615}

In [30]:
nx.set_node_attributes(GUM, name='degree_forums', values=degrees)
nx.set_node_attributes(GUM, name='degree_cent_forums', values=degree_centrality)
nx.set_node_attributes(GUM, name='eigenvector_cent_forums', values = eigenvector_centrality)

In [31]:
nx.write_gexf(GUM, r"../data/processed/GUM.gexf")
del GUM, df_user_forum_messages, degrees, degree_centrality, eigenvector_centrality

# To Dataframe

In [8]:
def scaler_network(GName,list_metrics,scaler,df_user):
    G = nx.read_gexf(r"../data/processed/"+GName+".gexf")
    df_G = pd.DataFrame.from_dict(dict(G.nodes(data=True)), orient='index').reset_index()
    df_G = df_G.rename(columns = {'index':'UserId'})
    df_G["UserId"] = df_G["UserId"].astype(str)
    df_G = df_G.iloc[: , :-1]
    #df_G_2 = df_G.copy()

    # Join con Users
    df_G_2 = df_G_2.merge(df_user, how='left', on='UserId')
    df_G_2 = df_G_2[list_metrics]

    #Escalamiento
    df_G_2_to_scale = df_G.iloc[:,2:]
    df_G_2_scaled = scaler.fit_transform(df_G_2_to_scale)
    df_G_2_scaled = pd.DataFrame(df_G_2_scaled,columns=[df_G_2_to_scale.columns])
    df_G_2[df_G_2_to_scale.columns+"_scaled"] = df_G_2_scaled[df_G_2_to_scale.columns]
    df_G_2[df_G_2_to_scale.columns+"_scaled"].fillna(10,inplace=True)

    #To csv scaled
    df_G_2.to_csv(r"../data/processed/df_"+GName+"_scaled.csv",index=False)

    #Return grafo leido
    return df_G

In [4]:
#Dataframe de usuarios
df_user = pd.read_csv(r"../data/interim/Users.csv",dtype=str)
df_user = df_user[["UserId","UserName","PerformanceTier_Cat","Seniority_Cat"]]

#Objecto de MinMax Scaler
scaler = MinMaxScaler(feature_range=(20,100))

#Metricas a escalar
list_metrics_dir = ['UserId', 'UserName', 'in_degree_followers', 'out_degree_followers', 'in_degree_cent_followers', 'pagerank_cent_followers']
list_metrics_nodir_GUT= ['UserId', 'UserName', 'degree_teams', 'degree_cent_teams', 'eigenvector_cent_teams']
list_metrics_nodir_GUP = ['UserId', 'UserName', 'degree_participant', 'degree_cent_participant', 'eigenvector_cent_participant']
list_metrics_nodir_GUM = ['UserId', 'UserName', 'degree_forums', 'degree_cent_forums', 'eigenvector_cent_forums']

In [5]:
df_GUF = scaler_network("GUF",list_metrics_dir,scaler,df_user)
df_GUF.shape

(365493, 5)

In [6]:
df_GUT = scaler_network("GUT",list_metrics_nodir_GUT,scaler,df_user)
df_GUT.shape

(155730, 4)

In [9]:
df_GUP = scaler_network("GUP",list_metrics_nodir_GUP,scaler,df_user)
df_GUP.shape

(157158, 4)

In [10]:
df_GUM = scaler_network("GUM",list_metrics_nodir_GUM,scaler,df_user)
df_GUM.shape

(209241, 4)

In [11]:
df_users_net = df_user.merge(df_GUF,how="left",on=["UserId"]).merge(df_GUT,how="left",on=["UserId"]).merge(df_GUP,how="left",on=["UserId"]).merge(df_GUM,how="left",on=["UserId"])
df_users_net.head()

Unnamed: 0,UserId,UserName,PerformanceTier_Cat,Seniority_Cat,in_degree_followers,out_degree_followers,in_degree_cent_followers,pagerank_cent_followers,degree_teams,degree_cent_teams,eigenvector_cent_teams,degree_participant,degree_cent_participant,eigenvector_cent_participant,degree_forums,degree_cent_forums,eigenvector_cent_forums
0,368,antgoldbloom,Expert,4_mas,983.0,6.0,0.00269,0.001607,4.0,2.6e-05,9.451807999999999e-26,,,,1320.0,0.006309,0.00126
1,381,iguyon,Expert,4_mas,6.0,0.0,1.6e-05,3e-06,,,,163.0,0.001037,2.4e-05,164.0,0.000784,6e-05
2,383,davidstephan,Novice,4_mas,,,,,,,,,,,,,
3,384,gabewarren,Novice,4_mas,,,,,,,,,,,,,
4,385,demonjosh,Novice,4_mas,,,,,,,,,,,,,


In [12]:
#Dataset Networks
df_users_net.to_csv(r"../data/interim/UserNetworksMetrics.csv",index=False)

In [13]:
del df_users_net,df_GUF,df_GUT,df_GUP,df_GUM