In [38]:
# Setup packages 
from collections import Counter
from dash_bootstrap_components._components.Row import Row
import pandas as pd
import numpy as np
import networkx as nx
from networkx.algorithms.community import greedy_modularity_communities
import networkx.algorithms
import networkx.utils # (algorithms to check out: "approximation", "eccentricity", "diameter", "radius", "periphery", "center", "barycenter", "Community" "degree_centrality", "constraint", "local_constraint", "effective_size") 

# Dash packages needed for building dash app 

import dash
import dash_core_components as dcc
import dash_bootstrap_components as dbc
import dash_html_components as html
import dash_cytoscape as cyto
import dash_table
from dash.dependencies import Input, Output, State
from dash.exceptions import PreventUpdate

# setup layout and paths
path = "/home/teijehidde/Documents/Git Blog and Coding/"
data_file = "data_dump/data_new5.json" 
external_stylesheets = path + 'Comparing Wikipedia Knowledge Networks (Network Analysis Page links)/Code/stylesheet.css' # downloaded from: https://codepen.io/chriddyp/pen/bWLwgP.css. Should appear in credits! 
app = dash.Dash(external_stylesheets=[dbc.themes.BOOTSTRAP])
styles = {
    'pre': {
        'border': 'thin lightgrey solid',
        'overflowX': 'scroll'
    }
}
colors = {
    'background': 'white', # use color coding later. 
    'text': 'gray'
}
list_colours = ['red', 'blue', 'purple','orange','green','olive', 'maroon', 'brown','lime','teal' ]

In [40]:
# loading data. 
network_data_df = pd.read_json((path + data_file), orient='split')
all_networks = network_data_df.loc[network_data_df['ego'] == True].loc[network_data_df['lang'] == 'en']['title'].values.tolist()

In [41]:
# setting up classes WikiNode and WikiNetwork. 
class WikiNode:
    def __init__(self, node_title, lang, network_data):

        node_data = network_data.loc[network_data['title'] == node_title].loc[network_data['lang'] == lang]
        
        self.node_title = node_data[['title']].iloc[0,0] # iloc[0,0] needed because there can be two instance of same wikipage in the dataframe: one as centralnode (with langlinks) and one as a normal node of other network (without langlinks).  
        self.node_ID = node_data[['uniqueid']].iloc[0,0]
        self.node_links = node_data[['links']].iloc[0,0]
        self.node_lang = node_data[['lang']].iloc[0,0]

In [42]:
class WikiNetwork(WikiNode):
   
    def __init__(self, node_title, lang, threshold = 0):
        
        WikiNode.__init__(self, node_title, lang, network_data = network_data_df)
        self.threshold = threshold
        self.network_nodes = {}
        self.network_links = [node_title]
        self.network_edges = [] 
        self.network_status = []
        
        # Go through node_links of the central node (node_title) to build network.
        for link in self.node_links + [self.node_title]:
            try: 
                Node2 = WikiNode(link, lang, network_data = network_data_df) # NB: the links are not always in the same language as the network. It throws an error as result. - for now it just skips. 
                purged_links = [x for x in Node2.node_links if x in self.node_links]
                purged_edges = []
                for purged_link in purged_links:
                    purged_edges.append((link,purged_link))  
                self.network_nodes[Node2.node_ID] = Node2
                self.network_links = self.network_links + purged_links
                self.network_edges = self.network_edges + purged_edges
            except: 
                pass
        self.links_count = Counter(self.network_links)

    def getNodes(self, type="cytoscape"):
        selected_nodes = [k for k,v in self.links_count.items() if float(v) >= self.threshold]
        
        if type == 'networkx':
            return [(i, {"name": i}) for i in selected_nodes]
        if type == 'cytoscape':
            return [{'data': {'id': i, "label": i}} for i in selected_nodes]

    def getEdges(self,type="cytoscape"):  
        selected_nodes = [k for k,v in self.links_count.items() if float(v) >= self.threshold]
        edges_network = [(a,b) for a,b in self.network_edges if a in selected_nodes and b in selected_nodes]
        
        if type == 'networkx':
            return edges_network
        if type == 'cytoscape':
            return [{'data': {'source': a, "target": b}} for a,b in edges_network]

    def getCommunities(self):  
        G = nx.Graph()
        G.add_edges_from(self.getEdges(type = 'networkx'))
        communities = greedy_modularity_communities(G)

        result = []
        for number in range(len(communities)): 
            result = result + [{i: number} for i in list(communities[number])] 

        return result

    def getStatsNodes(self):
        G = nx.Graph()
        G.add_edges_from(self.getEdges(type = 'networkx'))
        communities = greedy_modularity_communities(G)
        degree_centrality_nodes = networkx.algorithms.centrality.degree_centrality(G)
        eccentricity_nodes = networkx.algorithms.distance_measures.eccentricity(G)
        dict_communities = {key:value for value in range(len(communities)) for key in communities[value] }

        df = pd.DataFrame({'degree_centrality':pd.Series(degree_centrality_nodes), 'eccentricity':pd.Series(eccentricity_nodes), 'community':pd.Series(dict_communities)}) 

        val_max = max(df['degree_centrality'])
        val_min = min(df['degree_centrality'])
        df[['normalized_centrality']] = df[['degree_centrality']].apply(lambda x: (x - val_min) / (val_max - val_min), result_type = 'expand')
        df[['degree_centrality_rounded']] = df[['degree_centrality']].apply(lambda x: round(x, 2))

        return df

In [80]:
network_data_df

Unnamed: 0,title,lang,pageid,uniqueid,lastrevid,links,langlinks,ego
0,Cgroups,en,28942492,en28942492,1032278529,[Adaptive Domain Environment for Operating Sys...,"[{'lang': 'ar', '*': 'سيغروبس'}, {'lang': 'fa'...",
1,Linux kernel,en,21347315,en21347315,1037151397,"[/sys, 0 A.D. (video game), 64-bit, AArch64, A...","[{'lang': 'ang', '*': 'Linux (cyrnel)'}, {'lan...",
2,PearPC,en,656612,en656612,1019446604,"[3Com, 64-bit, Adaptive Domain Environment for...","[{'lang': 'ca', '*': 'PearPC'}, {'lang': 'de',...",
3,Containerd,en,64656490,en64656490,972183911,[Cloud Native Computing Foundation],[],
4,GLinux,en,56356171,en56356171,1035653157,"[/e/ (operating system), 4MLinux, ALT Linux, A...","[{'lang': 'es', '*': 'GLinux'}, {'lang': 'ru',...",
...,...,...,...,...,...,...,...,...
3524,Google Reader,nl,1038487,nl1038487,51250412,"[Android (besturingssysteem), Atom (bestandsfo...","[{'lang': 'ar', '*': 'قارىء جوجل'}, {'lang': '...",
3525,Flock (webbrowser),nl,366647,nl366647,55848852,"[2011, 26 april, Besturingssysteem, Californië...","[{'lang': 'ar', '*': 'فلوك (متصفح ويب)'}, {'la...",
3526,Besturingssysteem,nl,167,nl167,59492976,"[Android (besturingssysteem), Applicatie, Appl...","[{'lang': 'ace', '*': 'OS'}, {'lang': 'af', '*...",
3527,Google Drive,nl,2811223,nl2811223,58828884,"[Android (besturingssysteem), Application prog...","[{'lang': 'ar', '*': 'جوجل درايف'}, {'lang': '...",


In [50]:
all_networks_keys = network_data_df.loc[network_data_df['ego'] == True]['title'].values.tolist()
all_networks_values = network_data_df.loc[network_data_df['ego'] == True]['lang'].values.tolist()
all_networks_langs = dict(zip(all_networks_keys, all_networks_values))

In [75]:
kub_network = WikiNetwork('Kubernetes', lang = 'en')

In [81]:
stats_nodes = kub_network.getStatsNodes()

In [86]:
stats_nodes['community'].apply(lambda x: int(x))

A-B testing                                          0
API                                                  0
Accelerated Mobile Pages                             0
Adaptive Domain Environment for Operating Systems    1
Amazon EC2 Container Service                         0
                                                    ..
Xen                                                  1
XenServer                                            1
XtratuM                                              1
Z/VM                                                 1
ZeroVM                                               1
Name: community, Length: 209, dtype: int64

In [72]:
node_title_langlinks

[{'lang': 'ar', '*': 'كوبيرنيتيس'},
 {'lang': 'bs', '*': 'Kubernetes'},
 {'lang': 'ca', '*': 'Kubernetes'},
 {'lang': 'cs', '*': 'Kubernetes'},
 {'lang': 'de', '*': 'Kubernetes'},
 {'lang': 'es', '*': 'Kubernetes'},
 {'lang': 'et', '*': 'Kubernetes'},
 {'lang': 'fa', '*': 'کوبرنتیز'},
 {'lang': 'fi', '*': 'Kubernetes'},
 {'lang': 'fr', '*': 'Kubernetes'},
 {'lang': 'he', '*': 'Kubernetes'},
 {'lang': 'hu', '*': 'Kubernetes'},
 {'lang': 'id', '*': 'Kubernetes'},
 {'lang': 'is', '*': 'Kubernetes'},
 {'lang': 'it', '*': 'Kubernetes'},
 {'lang': 'ja', '*': 'Kubernetes'},
 {'lang': 'ko', '*': '쿠버네티스'},
 {'lang': 'lmo', '*': 'Kubernetes'},
 {'lang': 'ms', '*': 'Kubernetes'},
 {'lang': 'nb', '*': 'Kubernetes'},
 {'lang': 'pl', '*': 'Kubernetes'},
 {'lang': 'pt', '*': 'Kubernetes'},
 {'lang': 'ro', '*': 'Kubernetes'},
 {'lang': 'ru', '*': 'Kubernetes'},
 {'lang': 'ta', '*': 'கூபெர்னெற்றிசு'},
 {'lang': 'te', '*': 'క్యూబెర్\u200cనెట్స్'},
 {'lang': 'uk', '*': 'Kubernetes'},
 {'lang': 'zh', '*':