In [2]:
import re
import datetime
import json
import pandas as pd
import numpy as np
import operator
import warnings
import matplotlib.pyplot as plt

from scipy.cluster import hierarchy
from scipy.spatial import distance
from itertools import combinations

import networkx as nx
from networkx import betweenness_centrality 
from networkx import closeness_centrality
from networkx import degree_centrality

import community
from community import modularity
from community import generate_dendrogram

## Functions

In [3]:
def find_gc(network):
    '''
    Find the network giant component
    '''
    gc_nodes = max(nx.connected_components(network), key=len)
    network_gc = network.subgraph(gc_nodes).copy()
    return network_gc

In [4]:
def take_edges(suc_authors):
    '''
    Create author nodes and collaboration edges
    '''
    
    for i in suc_authors:
        for name in i:
            if name == '':
                i.remove('')
    
    edges = []
    nodes = []
    
    for i in suc_authors:
        if len(i) > 1:
            edges.extend(list(combinations(i,2)))
        else:
            nodes.append(i[0].strip())
            
    for pair in edges:
        if pair[0] == pair[1]:
            edges.remove(pair)
    
    return edges, list(set(nodes))

In [5]:
def build_table(network, school='test'):
    '''
    Compute for network properties for individual SUCs
    '''
    N = network.order()
    L = network.size() 
    
    avg_deg = 2*float(L) / N 
    
    network_table.loc[school]['Nodes'] = N
    network_table.loc[school]['Edges'] = L
    
    degrees = [k for node, k in nx.degree(network)]
    k_min = np.min(degrees)
    k_max = np.max(degrees)
    k_avg = np.mean(degrees)
    
    network_table.loc[school]['Average Degree'] = k_avg
    
    degree_sequence = sorted(([d, n] for n, d in network.degree()))
    dmax = max(degree_sequence)
    
    components = nx.number_connected_components(network)
    network_table.loc[school]['Node-Component Ratio'] = N/components
    
    network_table.loc[school]['Maximum Degree'] = [{'name' : dmax[1], 'value' : dmax[0]}]
    
    network_table.loc[school]['degree_distrib'] = school + '_degree.png'
    network_table.loc[school]['network_plot'] = school + '.png'
    network_table.loc[school]['Profile'] = 1

    if N/components > 4: #waived must be 4
    
        network0 = find_gc(network)
        
        N = network0.order()
        L = network0.size() 
        
        avg_deg = 2*float(L) / N 
        
        network_table.loc[school]['GC Nodes'] = N
        network_table.loc[school]['GC Edges'] = L
        
        degrees = [k for node, k in nx.degree(network0)]
        k_min = np.min(degrees)
        k_max = np.max(degrees)
        k_avg = np.mean(degrees)
        
        network_table.loc[school]['GC Average Degree'] = k_avg
        
        degree_sequence = sorted(([d, n] for n, d in network0.degree()))
        dmax = max(degree_sequence)
        
        network_table.loc[school]['GC Maximum Degree'] = [{'name' : dmax[1], 'value' : dmax[0]}]
        network_table.loc[school]['gc_degree_distrib'] = school + '_gc_degree.png'
        network_table.loc[school]['gc_network_plot'] = school + '_gc.png'
        
        sp = nx.average_shortest_path_length(network0)
        network_table.loc[school]['Average Shortest Path'] = sp
    
        cc = nx.clustering(network0)
        avg_clust = sum(cc.values()) / len(cc)
        network_table.loc[school]['Clustering Coefficient'] = avg_clust
        
        deg_c = degree_centrality(network0)
        sorted_deg_c = sorted(deg_c.items(), key=operator.itemgetter(1), reverse=True)
        dc_list = []
        for i in range(3):
            dc_list.append({"name" : sorted_deg_c[i][0], 'value' : sorted_deg_c[i][1]})
        network_table.loc[school]['Top 3 Degree Centrality'] = dc_list
                    
        bet_c = betweenness_centrality(network0)
        sorted_bet_c = sorted(bet_c.items(), key=operator.itemgetter(1), reverse=True)
        bc_list = []
        for i in range(3):
            bc_list.append({"name" : sorted_bet_c[i][0], 'value' : sorted_bet_c[i][1]})
        network_table.loc[school]['Top 3 Betweenness Centrality'] = bc_list
        
        clo_c = closeness_centrality(network0)
        sorted_clo_c = sorted(clo_c.items(), key=operator.itemgetter(1), reverse=True)
        cc_list = []
        for i in range(3):
            cc_list.append({"name" : sorted_clo_c[i][0], 'value' : sorted_clo_c[i][1]})
        network_table.loc[school]['Top 3 Closeness Centrality'] = cc_list
        
        network_table.loc[school]['dc_plot'] = school + '_dc.png'
        network_table.loc[school]['bc_plot'] = school + '_bc.png'
        network_table.loc[school]['cc_plot'] = school + '_cc.png'
        network_table.loc[school]['Profile'] = 2
    
        if sp > 3: #waived must be 3
        
            partition_mod = community.best_partition(network0)
            
            mod = modularity(partition_mod, network0)
            network_table.loc[school]['Modularity'] = mod
            
            no_com = len(set(partition_mod.values()))
            network_table.loc[school]['Number of Communities'] = no_com
            
            com_list = {}
            
            for k, v in sorted(partition_mod.items()):
                com_list.setdefault(v, []).append(k)
        
            network_table.loc[school]['Community List'] = com_list
            network_table.loc[school]['community_plot'] = school + '_communities.png'
            network_table.loc[school]['Profile'] = 3
        
        else:
            pass
        
    else:
        pass

In [32]:
def bipart_build_table(network, school='test'):
    '''
    Compute for network properties for author-topic bipartite network
    '''
    N = network.order()
    L = network.size() 
    
    avg_deg = 2*float(L) / N 
    
    network_table.loc[school]['Nodes'] = N
    network_table.loc[school]['Edges'] = L
    
    degrees = [k for node, k in nx.degree(network)]
    k_min = np.min(degrees)
    k_max = np.max(degrees)
    k_avg = np.mean(degrees)
    
    network_table.loc[school]['Average Degree'] = k_avg
    
    degree_sequence = sorted(([d, n] for n, d in network.degree()))
    dmax = max(degree_sequence)
    
    components = nx.number_connected_components(network)
    network_table.loc[school]['Node-Component Ratio'] = N/components
    
    network_table.loc[school]['Maximum Degree'] = [{'name' : dmax[1], 'value' : dmax[0]}]
    
    network_table.loc[school]['degree_distrib'] = school + '_degree.png'
    network_table.loc[school]['network_plot'] = school + '.png'
    network_table.loc[school]['Profile'] = 1

    if N/components > 4:
    
        network0 = find_gc(network)
        
        N = network0.order()
        L = network0.size() 
        
        avg_deg = 2*float(L) / N 
        
        network_table.loc[school]['GC Nodes'] = N
        network_table.loc[school]['GC Edges'] = L
        
        degrees = [k for node, k in nx.degree(network0)]
        k_min = np.min(degrees)
        k_max = np.max(degrees)
        k_avg = np.mean(degrees)
        
        network_table.loc[school]['GC Average Degree'] = k_avg
        
        degree_sequence = sorted(([d, n] for n, d in network0.degree()))
        dmax = max(degree_sequence)
        
        network_table.loc[school]['GC Maximum Degree'] = [{'name' : dmax[1], 'value' : dmax[0]}]
        network_table.loc[school]['gc_degree_distrib'] = school + '_gc_degree.png'
        network_table.loc[school]['gc_network_plot'] = school + '_gc.png'
        
        sp = nx.average_shortest_path_length(network0)
        network_table.loc[school]['Average Shortest Path'] = sp
    
        cc = nx.clustering(network0)
        avg_clust = sum(cc.values()) / len(cc)
        network_table.loc[school]['Clustering Coefficient'] = avg_clust
        
        deg_c = degree_centrality(network0)
        sorted_deg_c = sorted(deg_c.items(), key=operator.itemgetter(1), reverse=True)
        dc_list = []
        for i in range(3):
            dc_list.append({"name" : sorted_deg_c[i][0], 'value' : sorted_deg_c[i][1]})
        network_table.loc[school]['Top 3 Degree Centrality'] = dc_list
                    
        bet_c = betweenness_centrality(network0)
        sorted_bet_c = sorted(bet_c.items(), key=operator.itemgetter(1), reverse=True)
        bc_list = []
        for i in range(3):
            bc_list.append({"name" : sorted_bet_c[i][0], 'value' : sorted_bet_c[i][1]})
        network_table.loc[school]['Top 3 Betweenness Centrality'] = bc_list
        
        clo_c = closeness_centrality(network0)
        sorted_clo_c = sorted(clo_c.items(), key=operator.itemgetter(1), reverse=True)
        cc_list = []
        for i in range(3):
            cc_list.append({"name" : sorted_clo_c[i][0], 'value' : sorted_clo_c[i][1]})
        network_table.loc[school]['Top 3 Closeness Centrality'] = cc_list
        
        network_table.loc[school]['dc_plot'] = school + '_dc.png'
        network_table.loc[school]['bc_plot'] = school + '_bc.png'
        network_table.loc[school]['cc_plot'] = school + '_cc.png'
        network_table.loc[school]['Profile'] = 2
    
        if sp > 1:
        
            partition_mod = community.best_partition(network0)
            
            mod = modularity(partition_mod, network0)
            network_table.loc[school]['Modularity'] = mod
            
            no_com = len(set(partition_mod.values()))
            network_table.loc[school]['Number of Communities'] = no_com
            
            com_list = {}
            
            for k, v in sorted(partition_mod.items()):
                com_list.setdefault(v, []).append(k)
        
            network_table.loc[school]['Community List'] = com_list
            network_table.loc[school]['community_plot'] = school + '_communities.png'
            network_table.loc[school]['Profile'] = 3
        
        else:
            pass
        
    else:
        pass

## Build Network Property Table

In [8]:
column_list = ['Profile', 'Nodes', 'Edges', 'Average Degree', 'Maximum Degree', 'Node-Component Ratio',
               'degree_distrib', 'network_plot', 'GC Nodes', 'GC Edges', 
               'GC Average Degree', 'GC Maximum Degree', 'gc_degree_distrib', 
               'gc_network_plot', 'Average Shortest Path', 'Clustering Coefficient', 
               'Top 3 Degree Centrality', 'Top 3 Betweenness Centrality', 
               'Top 3 Closeness Centrality', 'dc_plot', 'bc_plot', 'cc_plot', 
               'Modularity', 'Number of Communities', 'Community List', 'community_plot']

school_list = ['ASSIST', 'ASCAT', 'BSU', 'BulSU', 'DNSC', 'MinSU', 'PSU', 'TSU', 'UPB', 'RSU', 'QSU', 'Authors', 'Topics']

In [9]:
network_table = pd.DataFrame(columns=column_list, index=school_list)

In [10]:
#empty table

network_table

Unnamed: 0,Profile,Nodes,Edges,Average Degree,Maximum Degree,Node-Component Ratio,degree_distrib,network_plot,GC Nodes,GC Edges,...,Top 3 Degree Centrality,Top 3 Betweenness Centrality,Top 3 Closeness Centrality,dc_plot,bc_plot,cc_plot,Modularity,Number of Communities,Community List,community_plot
ASSIST,,,,,,,,,,,...,,,,,,,,,,
ASCAT,,,,,,,,,,,...,,,,,,,,,,
BSU,,,,,,,,,,,...,,,,,,,,,,
BulSU,,,,,,,,,,,...,,,,,,,,,,
DNSC,,,,,,,,,,,...,,,,,,,,,,
MinSU,,,,,,,,,,,...,,,,,,,,,,
PSU,,,,,,,,,,,...,,,,,,,,,,
TSU,,,,,,,,,,,...,,,,,,,,,,
UPB,,,,,,,,,,,...,,,,,,,,,,
RSU,,,,,,,,,,,...,,,,,,,,,,


## Import Data (Individual SUCs)

In [11]:
df_rp = pd.read_excel('research_profile.xlsx')

In [12]:
suc_list = [('Abra State Institute of Science and Technology', 'ASSIST'),
            ('Agusan del Sur State College of Agriculture and Technology', 'ASCAT'),
            ('Benguet State University', 'BSU'), 
            ('Bulacan State University', 'BulSU'),
            ('Davao del Norte State College', 'DNSC'),
            ('Mindoro State University', 'MinSU'),
            ('Palawan State University', 'PSU'),
            ('Tarlac State University', 'TSU'),
            ('University of the Philippines Baguio', 'UPB'),  
            ('Romblon State University', 'RSU'),
            ('Quirino State University', 'QSU')]

In [13]:
#compute network properties for each SUC

for suc in suc_list:
    sucs = df_rp[df_rp['University (Full Name)'] == suc[0]]
    suc_authors = sucs['Author'].copy().str.split(';')

    edges, nodes = take_edges(suc_authors)

    network = nx.Graph()
    
    for i in edges:
        network.add_edge(i[0], i[1])
    
    for i in nodes:
        network.add_node(i)
    
    build_table(network, school=suc[1])

In [16]:
#table preview

network_table.head()

Unnamed: 0,Profile,Nodes,Edges,Average Degree,Maximum Degree,Node-Component Ratio,degree_distrib,network_plot,GC Nodes,GC Edges,...,Top 3 Degree Centrality,Top 3 Betweenness Centrality,Top 3 Closeness Centrality,dc_plot,bc_plot,cc_plot,Modularity,Number of Communities,Community List,community_plot
ASSIST,3,265,483,3.645283,"[{'name': 'Vasquez, Reymond S.', 'value': 20}]",7.361111,ASSIST_degree.png,ASSIST.png,193.0,422.0,...,"[{'name': 'Vasquez, Reymond S.', 'value': 0.10...","[{'name': 'Gannapao, Jubert S.', 'value': 0.43...","[{'name': 'Atmosfera, Rynheart P.', 'value': 0...",ASSIST_dc.png,ASSIST_bc.png,ASSIST_cc.png,0.785399,11.0,"{6: ['Abalos, Michael Vene', 'Abat, John Lord'...",ASSIST_communities.png
ASCAT,1,26,30,2.307692,"[{'name': 'Bongolan Jr., Rodrigo L.', 'value':...",3.714286,ASCAT_degree.png,ASCAT.png,,,...,,,,,,,,,,
BSU,3,437,1259,5.762014,"[{'name': 'Tayaben, Jude L.', 'value': 38}]",5.675325,BSU_degree.png,BSU.png,157.0,469.0,...,"[{'name': 'Lee, Young Jin', 'value': 0.1538461...","[{'name': 'Dolipas, Bretel B.', 'value': 0.515...","[{'name': 'Dolipas, Bretel B.', 'value': 0.228...",BSU_dc.png,BSU_bc.png,BSU_cc.png,0.802101,9.0,"{3: ['Abadillos, Mg', 'Alfonso, Gerry P.', 'Ba...",BSU_communities.png
BulSU,1,251,321,2.557769,"[{'name': 'Vigonte, Florinda G.', 'value': 14}]",2.918605,BulSU_degree.png,BulSU.png,,,...,,,,,,,,,,
DNSC,3,100,208,4.16,"[{'name': 'Decano, Ronald S.', 'value': 20}]",11.111111,DNSC_degree.png,DNSC.png,83.0,196.0,...,"[{'name': 'Decano, Ronald S.', 'value': 0.2439...","[{'name': 'Decano, Ronald S.', 'value': 0.4214...","[{'name': 'Decano, Ronald S.', 'value': 0.3744...",DNSC_dc.png,DNSC_bc.png,DNSC_cc.png,0.695531,9.0,"{6: ['Agod, Joena Marie M.', 'Albite, Carlo Ja...",DNSC_communities.png


## Import Data (Author-Topic)

In [18]:
df_topics = pd.read_csv('title_topic.csv')
df_topics = df_topics[df_topics['Topic Name'].notna()]
df_topics = df_topics.reset_index(drop=True)

In [22]:
#topic nodes

topic = df_topics['Topic Name'].copy().str.split(';')
topic = ['None' if x is np.nan else x for x in topic]
topic = [x[0] for x in topic]

topic_nodes = list(set(topic))

In [23]:
#author nodes

authors = df_topics['Author'].copy().str.split(';')
authors = [x for x in authors]

author_nodes = []

for list_ in authors:
    for name in list_:
        author_nodes.append(name.strip())
        
author_nodes = set(author_nodes)
author_nodes.remove('')

In [24]:
#build edges

edges = []

for i in range(len(topic)):
    for name in authors[i]:
        if name == '':
            pass
        else:
            edges.append((name.strip(), topic[i]))

In [25]:
#build network

G = nx.Graph()

G.add_nodes_from(author_nodes)
G.add_nodes_from(topic_nodes)
G.add_edges_from(edges)

In [26]:
#conectivity check

nx.is_connected(G)

True

In [29]:
top = nx.bipartite.sets(G)[0]
aut = nx.bipartite_layout(G, top)

In [30]:
#build projected networks

A = nx.bipartite.projected_graph(G, aut, multigraph=False)
T = nx.bipartite.projected_graph(G, top, multigraph=False)

In [33]:
#compute network properties for projected networks

bipart_build_table(A, school='Authors')
bipart_build_table(T, school='Topics')

In [34]:
#update property table

network_table.tail()

Unnamed: 0,Profile,Nodes,Edges,Average Degree,Maximum Degree,Node-Component Ratio,degree_distrib,network_plot,GC Nodes,GC Edges,...,Top 3 Degree Centrality,Top 3 Betweenness Centrality,Top 3 Closeness Centrality,dc_plot,bc_plot,cc_plot,Modularity,Number of Communities,Community List,community_plot
ASSIST,3,265,483,3.645283,"[{'name': 'Vasquez, Reymond S.', 'value': 20}]",7.361111,ASSIST_degree.png,ASSIST.png,193.0,422.0,...,"[{'name': 'Vasquez, Reymond S.', 'value': 0.10...","[{'name': 'Gannapao, Jubert S.', 'value': 0.43...","[{'name': 'Atmosfera, Rynheart P.', 'value': 0...",ASSIST_dc.png,ASSIST_bc.png,ASSIST_cc.png,0.785399,11.0,"{6: ['Abalos, Michael Vene', 'Abat, John Lord'...",ASSIST_communities.png
ASCAT,1,26,30,2.307692,"[{'name': 'Bongolan Jr., Rodrigo L.', 'value':...",3.714286,ASCAT_degree.png,ASCAT.png,,,...,,,,,,,,,,
BSU,3,437,1259,5.762014,"[{'name': 'Tayaben, Jude L.', 'value': 38}]",5.675325,BSU_degree.png,BSU.png,157.0,469.0,...,"[{'name': 'Lee, Young Jin', 'value': 0.1538461...","[{'name': 'Dolipas, Bretel B.', 'value': 0.515...","[{'name': 'Dolipas, Bretel B.', 'value': 0.228...",BSU_dc.png,BSU_bc.png,BSU_cc.png,0.802101,9.0,"{3: ['Abadillos, Mg', 'Alfonso, Gerry P.', 'Ba...",BSU_communities.png
BulSU,1,251,321,2.557769,"[{'name': 'Vigonte, Florinda G.', 'value': 14}]",2.918605,BulSU_degree.png,BulSU.png,,,...,,,,,,,,,,
DNSC,3,100,208,4.16,"[{'name': 'Decano, Ronald S.', 'value': 20}]",11.111111,DNSC_degree.png,DNSC.png,83.0,196.0,...,"[{'name': 'Decano, Ronald S.', 'value': 0.2439...","[{'name': 'Decano, Ronald S.', 'value': 0.4214...","[{'name': 'Decano, Ronald S.', 'value': 0.3744...",DNSC_dc.png,DNSC_bc.png,DNSC_cc.png,0.695531,9.0,"{6: ['Agod, Joena Marie M.', 'Albite, Carlo Ja...",DNSC_communities.png


In [31]:
#export to json for dashboard

network_json = network_table.to_json(orient='index')

with open('suc_network.json', 'w') as f:
    json.dump(network_json, f)