In [1]:
import pandas as pd
import numpy as np
import networkx as nx

G = nx.read_gml('./graph.gml')

In [16]:
analytics_cluster = list( G.neighbors("Data Analytics Using Power BI") )

neighbours_dict = {}
for n in analytics_cluster:
    edge = G.get_edge_data("Data Analytics Using Power BI", n)
    neighbours_dict[n] = edge['weight']

# Sort according to the edge weight
analytics_sorted = sorted(neighbours_dict.items(), key=lambda pair: pair[1], reverse=True)

In [17]:
analytics_sorted

[('Data Analytics @ Work', 58),
 ('Data Analytics Using Advanced Power BI', 43),
 ('Tableau: Unlocking Insights with Analytics', 25),
 ('DATA ANALYSIS FUNDAMENTALS USING MICROSOFT EXCEL (IBF)', 16),
 ('Data Analytics for Managers', 10),
 ('Enhance Teaching Effectiveness with Learning Analytics and Data Visualisation (I) - Bootcamp for Educationists, Teachers and School Leaders',
  10),
 ('CERTIFIED SCRUM MASTER (CSM)', 8),
 ('PROJECT MANAGEMENT FOR ALL', 8),
 ('ICAGILE - AGILE TEAM FACILITATION', 8),
 ('ACCOUNTANCY', 8),
 ('ICAGILE - AGILE COACHING', 7),
 ('BUSINESS ANALYTICS WITH QLIK SENSE', 7),
 ('Professional Certificate in\\nMachine Learning', 7),
 ('Data Driven Design: Harnessing the Power of Predictive Analytics', 7),
 ('Data Analytics Using Power BI', 7),
 ('MACHINE LEARNING AND ADVANCED ANALYTICS USING PYTHON', 6),
 ('Lean Six Sigma Yellow Belt', 6),
 ('BUSINESS', 6),
 ('BUILDING AN AGILE BUSINESS STRATEGY', 5),
 ('HR Analytics Certificate - Using Analytics to Improve People D

### Degree Centrality

In [3]:
print("Nodes with Highest Degree Centrality")
print()

centrality = nx.degree_centrality(G)
top_degree_centrality = sorted(centrality.items(), key = lambda x: x[1], reverse=True)[:10]

degree_df = pd.DataFrame( data = {"course": [ c[0] for c in top_degree_centrality ], "degree_centrality": [ c[1] for c in top_degree_centrality] })

Nodes with Highest Degree Centrality



In [4]:
degree_df

Unnamed: 0,course,degree_centrality
0,DATA ANALYSIS FUNDAMENTALS USING MICROSOFT EXC...,0.397054
1,PROJECT MANAGEMENT FOR ALL,0.389525
2,ICAGILE - AGILE TEAM FACILITATION,0.379705
3,BLOCKCHAIN FOR CROSS BORDER PAYMENT,0.361702
4,CYBER SECURITY PROTECTION CERTIFICATE (CSPC),0.345336
5,BLOCKCHAIN FOR CORPORATE COMPLIANCE,0.336825
6,ICAGILE - AGILE COACHING,0.315221
7,AGILE INNOVATION POWERED BY ESSEC,0.314239
8,BUSINESS ANALYTICS WITH QLIK SENSE,0.295908
9,BUSINESS MODEL CANVAS - 9 STEPS TO A SUCCESSFU...,0.272995


### Betweenness Centrality

In [3]:
# High betweenness centralities suggest that these courses were important brokers=
print("Nodes with High Betweeness Centrality")
print()

betweenness = nx.betweenness_centrality(G, normalized=False)
top_betweenness = sorted(betweenness.items(), key = lambda x: x[1], reverse=True)

between_df = pd.DataFrame( data = {"course": [ c[0] for c in top_betweenness ], "betweenness": [ c[1] for c in top_betweenness] })

Nodes with High Betweeness Centrality



### Closeness Centrality

In [4]:
# High closeness suggest that, on average, they have short paths to many other nodes, which can be helpful for disseminating resources quickly
print("Nodes with High Closeness Centrality")
print()

closeness = nx.closeness_centrality(G)
top_closeness = sorted(closeness.items(), key = lambda x: x[1], reverse=True)

close_df = pd.DataFrame( data = {"course": [ c[0] for c in top_closeness ], "closeness": [ c[1] for c in top_closeness] })

Nodes with High Closeness Centrality



In [5]:
triangles = nx.triangles(G)
top_triangles = sorted(triangles.items(), key = lambda x: x[1], reverse=True)

triangle_df = pd.DataFrame( data = {"course": [ c[0] for c in top_triangles ], "triangle_count": [ c[1] for c in top_triangles] })

### Clustering Coefficient

In [6]:
clustering = nx.clustering(G)
top_clusters = sorted(clustering.items(), key = lambda x: x[1], reverse=True)

cluster_df = pd.DataFrame( data = {"course": [ c[0] for c in top_clusters ], "cluster_coef": [ c[1] for c in top_clusters] })

In [7]:
cluster_df[ cluster_df.cluster_coef == 1.0 ]

Unnamed: 0,course,cluster_coef
0,Nurturing Learners and Learning,1.0
1,P-Series: Cybersecurity Practice Module 4: Too...,1.0
2,Building Organisational Resilency through Busi...,1.0
3,SGUnited Skills Programme in Cyber Security Sp...,1.0
4,Advanced Diploma in Procurement Management & S...,1.0
...,...,...
226,Basic Portrait Drawing,1.0
227,Probability and Statistics,1.0
228,"AI for Biomedical image, Signal Analysis and H...",1.0
229,International Construction Management,1.0


In [8]:
cluster_df[ cluster_df.cluster_coef != 1.0 ]

Unnamed: 0,course,cluster_coef
231,Data Analytics - Optimizing Marketing Campaign...,0.996829
232,SGUS NETWORK ENGINEER,0.996377
233,CCNA PART-1,0.996377
234,NETWORK ENGINEER - CAPSTONE PROJECT 2,0.996377
235,NETWORK ENGINEER - SUMMATIVE CAPSTONE PROJECT,0.996377
...,...,...
3051,Housekeeping Operation (Outpatient),0.000000
3052,Practical Business Finance for Engineering and...,0.000000
3053,Advanced Diploma in Logistics and Supply Chain...,0.000000
3054,Preparatory Course for Undergraduate Physics,0.000000


In [12]:
# shortest_paths = dict(nx.shortest_path_length(G))
# shortest_paths

In [26]:
from networkx.algorithms.community import greedy_modularity_communities

c = sorted( greedy_modularity_communities(G), key=len, reverse=True)


In [30]:
community = c[0]
G2 = G.subgraph( community )

In [31]:
G2

<networkx.classes.graph.Graph at 0x1d0483d0a88>