In [1]:
import pandas as pd
import numpy as np
import networkx as nx

G = nx.read_gml('./graph.gml')

In [2]:
analytics_cluster = list( G.neighbors("Data Analytics Using Power BI") )

neighbours_dict = {}
for n in analytics_cluster:
    edge = G.get_edge_data("Data Analytics Using Power BI", n)
    neighbours_dict[n] = edge['weight']

# Sort according to the edge weight
analytics_sorted = sorted(neighbours_dict.items(), key=lambda pair: pair[1], reverse=True)

In [3]:
analytics_sorted

[('Data Analytics @ Work', 58),
 ('Data Analytics Using Advanced Power BI', 43),
 ('Tableau: Unlocking Insights with Analytics', 25),
 ('DATA ANALYSIS FUNDAMENTALS USING MICROSOFT EXCEL (IBF)', 16),
 ('Data Analytics for Managers', 10),
 ('Enhance Teaching Effectiveness with Learning Analytics and Data Visualisation (I) - Bootcamp for Educationists, Teachers and School Leaders',
  10),
 ('CERTIFIED SCRUM MASTER (CSM)', 8),
 ('PROJECT MANAGEMENT FOR ALL', 8),
 ('ICAGILE - AGILE TEAM FACILITATION', 8),
 ('ACCOUNTANCY', 8),
 ('ICAGILE - AGILE COACHING', 7),
 ('BUSINESS ANALYTICS WITH QLIK SENSE', 7),
 ('Professional Certificate in\\nMachine Learning', 7),
 ('Data Driven Design: Harnessing the Power of Predictive Analytics', 7),
 ('Data Analytics Using Power BI', 7),
 ('MACHINE LEARNING AND ADVANCED ANALYTICS USING PYTHON', 6),
 ('Lean Six Sigma Yellow Belt', 6),
 ('BUSINESS', 6),
 ('BUILDING AN AGILE BUSINESS STRATEGY', 5),
 ('HR Analytics Certificate - Using Analytics to Improve People D

In [4]:
count = 0
for i in nx.isolates(G):
    # print(i)
    count += 1

print(count)

306


In [5]:
print(nx.density(G))

0.01985887010394084


### Degree Centrality

In [6]:
print("Nodes with Highest Degree Centrality")
print()

centrality = nx.degree_centrality(G)
top_degree_centrality = sorted(centrality.items(), key = lambda x: x[1], reverse=True)

degree_df = pd.DataFrame( data = {"certificate": [ c[0] for c in top_degree_centrality ], "degree_centrality": [ c[1] for c in top_degree_centrality] })

Nodes with Highest Degree Centrality



In [7]:
degree_df[degree_df["certificate"] == "Preparatory Course for Undergraduate Physics"]

Unnamed: 0,certificate,degree_centrality
3054,Preparatory Course for Undergraduate Physics,0.0


### Betweenness Centrality

In [8]:
# High betweenness centralities suggest that these courses were important brokers=
print("Nodes with High Betweeness Centrality")
print()

betweenness = nx.betweenness_centrality(G, normalized=False)
top_betweenness = sorted(betweenness.items(), key = lambda x: x[1], reverse=True)

between_df = pd.DataFrame( data = {"certificate": [ c[0] for c in top_betweenness ], "betweenness": [ c[1] for c in top_betweenness] })
between_df

Nodes with High Betweeness Centrality



Unnamed: 0,certificate,betweenness
0,DATA ANALYSIS FUNDAMENTALS USING MICROSOFT EXC...,245646.983764
1,PROJECT MANAGEMENT FOR ALL,199078.896099
2,ICAGILE - AGILE TEAM FACILITATION,185257.545378
3,CYBER SECURITY PROTECTION CERTIFICATE (CSPC),141984.123965
4,BLOCKCHAIN FOR CROSS BORDER PAYMENT,106575.302195
...,...,...
3051,Housekeeping Operation (Outpatient),0.000000
3052,Practical Business Finance for Engineering and...,0.000000
3053,Advanced Diploma in Logistics and Supply Chain...,0.000000
3054,Preparatory Course for Undergraduate Physics,0.000000


In [9]:
between_df[between_df["betweenness"] == 0]

Unnamed: 0,certificate,betweenness
2352,Nurturing Learners and Learning,0.0
2353,P-Series: Cybersecurity Practice Module 4: Too...,0.0
2354,Building Organisational Resilency through Busi...,0.0
2355,SGUnited Skills Programme in Cyber Security Sp...,0.0
2356,Advanced Diploma in Procurement Management & S...,0.0
...,...,...
3051,Housekeeping Operation (Outpatient),0.0
3052,Practical Business Finance for Engineering and...,0.0
3053,Advanced Diploma in Logistics and Supply Chain...,0.0
3054,Preparatory Course for Undergraduate Physics,0.0


In [10]:
btwn_deg_df = pd.merge(degree_df, between_df, on="certificate")
btwn_deg_df[(btwn_deg_df["betweenness"] == 0) & (btwn_deg_df["degree_centrality"] != 0)]

Unnamed: 0,certificate,degree_centrality,betweenness
1290,NICF - IDENTITY WITH WINDOWS SERVER (SF) (SYNC...,0.013093,0.0
1515,Where is the Market Heading? Macroeconomics Vi...,0.008838,0.0
1516,SMU-BOS Advanced Certificate in Private Banking,0.008838,0.0
1545,Advanced Certificate in Healthcare Management ...,0.008511,0.0
1562,SGUS APPLIED VR AND GAME SPECIALIST,0.008183,0.0
...,...,...,...
2745,Diploma in Fashion Marketing and Management,0.000327,0.0
2746,Neural Networks and Deep Learning,0.000327,0.0
2747,BUILDING CONSTRUCTION SUPEVISORS SAFETY COURSE,0.000327,0.0
2748,Advanced Diploma in Cyber Security,0.000327,0.0


### Closeness Centrality

In [11]:
# High closeness suggest that, on average, they have short paths to many other nodes, which can be helpful for disseminating resources quickly
print("Nodes with High Closeness Centrality")
print()

closeness = nx.closeness_centrality(G)
top_closeness = sorted(closeness.items(), key = lambda x: x[1], reverse=True)

close_df = pd.DataFrame( data = {"course": [ c[0] for c in top_closeness ], "closeness": [ c[1] for c in top_closeness] })

Nodes with High Closeness Centrality



In [12]:
triangles = nx.triangles(G)
top_triangles = sorted(triangles.items(), key = lambda x: x[1], reverse=True)

triangle_df = pd.DataFrame( data = {"course": [ c[0] for c in top_triangles ], "triangle_count": [ c[1] for c in top_triangles] })

### Clustering Coefficient

In [13]:
clustering = nx.clustering(G)
top_clusters = sorted(clustering.items(), key = lambda x: x[1], reverse=True)

cluster_df = pd.DataFrame( data = {"course": [ c[0] for c in top_clusters ], "cluster_coef": [ c[1] for c in top_clusters] })

In [14]:
cluster_df[ cluster_df.cluster_coef == 1.0 ]

Unnamed: 0,course,cluster_coef
0,Nurturing Learners and Learning,1.0
1,P-Series: Cybersecurity Practice Module 4: Too...,1.0
2,Building Organisational Resilency through Busi...,1.0
3,SGUnited Skills Programme in Cyber Security Sp...,1.0
4,Advanced Diploma in Procurement Management & S...,1.0
...,...,...
226,Basic Portrait Drawing,1.0
227,Probability and Statistics,1.0
228,"AI for Biomedical image, Signal Analysis and H...",1.0
229,International Construction Management,1.0


In [15]:
cluster_df[ cluster_df.cluster_coef != 1.0 ]

Unnamed: 0,course,cluster_coef
231,Data Analytics - Optimizing Marketing Campaign...,0.996829
232,SGUS NETWORK ENGINEER,0.996377
233,CCNA PART-1,0.996377
234,NETWORK ENGINEER - CAPSTONE PROJECT 2,0.996377
235,NETWORK ENGINEER - SUMMATIVE CAPSTONE PROJECT,0.996377
...,...,...
3051,Housekeeping Operation (Outpatient),0.000000
3052,Practical Business Finance for Engineering and...,0.000000
3053,Advanced Diploma in Logistics and Supply Chain...,0.000000
3054,Preparatory Course for Undergraduate Physics,0.000000


In [16]:
# shortest_paths = dict(nx.shortest_path_length(G))
# shortest_paths

In [17]:
from networkx.algorithms.community import greedy_modularity_communities

c = sorted(greedy_modularity_communities(G), key=len, reverse=True)

community = c[0]
G2 = G.subgraph(community)

community_dict = {}

for cert in list(G2.nodes):
    connections = len(list(G.neighbors(cert)))
    community_dict[cert] = connections

# Sort according to the edge weight
community_sorted = sorted(community_dict.items(),
                          key=lambda pair: pair[1], reverse=True)


In [18]:
community_sorted

[('DATA ANALYSIS FUNDAMENTALS USING MICROSOFT EXCEL (IBF)', 1212),
 ('PROJECT MANAGEMENT FOR ALL', 1189),
 ('ICAGILE - AGILE TEAM FACILITATION', 1160),
 ('BLOCKCHAIN FOR CROSS BORDER PAYMENT', 1104),
 ('CYBER SECURITY PROTECTION CERTIFICATE (CSPC)', 1055),
 ('BLOCKCHAIN FOR CORPORATE COMPLIANCE', 1029),
 ('ICAGILE - AGILE COACHING', 963),
 ('AGILE INNOVATION POWERED BY ESSEC', 959),
 ('BUSINESS ANALYTICS WITH QLIK SENSE', 904),
 ('BUSINESS MODEL CANVAS - 9 STEPS TO A SUCCESSFUL BUSINESS', 833),
 ('CERTIFIED SCRUM MASTER (CSM)', 819),
 ('BUILDING AN AGILE BUSINESS STRATEGY', 818),
 ('CYBINT CYBER SECURITY PROTECTION (CSPC) PROGRAM', 801),
 ('LEADING WITH AN EXPERIMENTAL MINDSET', 775),
 ('DEVELOPING AN EXPERIMENTAL MINDSET WITHIN TEAMS', 739),
 ('KNOW YOUR CUSTOMERS THROUGH HUMAN - CENTERED APPROACH', 727),
 ('CULTIVATING AN EXPERIMENTAL MINDSET', 715),
 ('BLOCKCHAIN FOR CAPITAL MARKETS', 694),
 ('HFI UX 1 - USABILITY ENGINEERING COURSE', 655),
 ('MACHINE LEARNING AND ADVANCED ANALYTICS

In [19]:
len(community_sorted)

1078

In [31]:
weights = sorted(G.edges(data=True), key=lambda x: x[2]['weight'], reverse=True)

count = 20
for item in weights:
    if item[0] != item[1] and count > 0:
        print(item)
        count -= 1

('SAFE MANAGEMENT OFFICER COURSE FOR CONSTRUCTION', 'SAFE MANAGEMENT OFFICER TRAINING', {'weight': 13317})
('SAFE MANAGEMENT OFFICER COURSE FOR CONSTRUCTION', 'UNDERSTANDING & APPLYING WICA & WSHA', {'weight': 6562})
('SAFE MANAGEMENT OFFICER COURSE FOR CONSTRUCTION', 'MAINTENANCE SAFETY COURSE LOCK-OUT PROCEDURES', {'weight': 5018})
('SAFE MANAGEMENT MEASURES - PREPARING WORKPLACE FOR COVID-19', 'SAFE MANAGEMENT OFFICER COURSE FOR CONSTRUCTION', {'weight': 3878})
('CPR(HANDS-ONLY)+AED PROVIDER', 'OCCUPATIONAL FIRST AID COURSE', {'weight': 3596})
('CPR(HANDS-ONLY)+AED PROVIDER', 'SAFE MANAGEMENT OFFICER COURSE FOR CONSTRUCTION', {'weight': 2631})
('OCCUPATIONAL FIRST AID COURSE', 'SAFE MANAGEMENT OFFICER COURSE FOR CONSTRUCTION', {'weight': 2586})
('SAFE MANAGEMENT OFFICER TRAINING', 'UNDERSTANDING & APPLYING WICA & WSHA', {'weight': 2346})
('SAFE MANAGEMENT OFFICER TRAINING', 'MAINTENANCE SAFETY COURSE LOCK-OUT PROCEDURES', {'weight': 1794})
('BLOCKCHAIN FOR CROSS BORDER PAYMENT', 'BL