In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
# https://stackoverflow.com/questions/48298146/changing-directory-in-google-colab-breaking-out-of-the-python-interpreter
%cd '/content/drive/MyDrive/Bt4103/'

/content/drive/MyDrive/Bt4103


In [3]:
import pandas as pd
import numpy as np
log_problem = pd.read_csv('Log_Problem.csv')

In [4]:
import networkx as nx
from networkx.algorithms import community #This part of networkx, for community detection, needs to be imported separately.
from operator import itemgetter

# Create the features

In [5]:
log_problem.sort_values(by=['timestamp_TW'], inplace=True)
user_to_content = log_problem.groupby(['uuid'], sort=False)['ucid'].apply(lambda x: x.tolist()).reset_index()

In [6]:
user_to_content['to'] = user_to_content['ucid'].apply(lambda x: x[1:])
user_to_content['from'] = user_to_content['ucid'].apply(lambda x: x[:-1])
user_to_content['unique_ucid'] = user_to_content['ucid'].apply(lambda x: [content for index, content in enumerate(x[:-1]) if x[index]!=x[index+1]] + [x[-1]])

# Drop those rows with empty list
user_to_content['to_keep'] = user_to_content['unique_ucid'].apply(lambda x: np.nan if len(x)==0 else 1)
user_to_content = user_to_content.dropna(subset=['to_keep'])

# Create new source node list and the corresponding target list without any repeated consecutive contents
user_to_content.loc[:,'to2'] = user_to_content['unique_ucid'].apply(lambda x: x[1:])
user_to_content.loc[:,'from2'] = user_to_content['unique_ucid'].apply(lambda x: x[:-1])
user_to_content['to2']

0        [hv7kHCAIdj7thZUmlqz553leG5bFNYgzXmLfB5m4Xvw=,...
1        [suDCkQs0H6kriJ78abNYmrtyOrBj80FflKWAiW3yfD8=,...
2        [NoskBq4Br1/gsHaS0Pkuhdx67DlKfRiCcj2jMAWpRS8=,...
3        [4p305X/NqL1Zx9j6BDJEpiQx090kFF3o/LTTGJt5s/E=,...
4        [5gAwpmADo34wxzAw/auRsRgPsj+RvWPNWhOj3SdP0Ss=,...
                               ...                        
72753       [P9MBH2y9lTr+ueuFDr6VDrx3GSS1/MvPLRyklqdSyVM=]
72754    [OnztoLe/KRAGCzT1fhys2SSvoBd4VIDTcFpP5fMpU68=,...
72755                                                   []
72756    [vhz7ZImgcFLciakjMFdmn0S/He85wqQkhLiYXRPpu18=,...
72757                                                   []
Name: to2, Length: 72758, dtype: object

#Represent exercise as undirected graphs

In [10]:
G = nx.Graph()
for index, row in user_to_content.iterrows():
    user = row['uuid']
    from_list = row['from2']
    to_list = row['to2']
    # always check that the number 
    assert len(from_list) == len(to_list)
    for src, tgt in zip(from_list, to_list):
        if G.has_edge(src, tgt):
            G[src][tgt]['weight'] += 1
        else:
            G.add_edge(src, tgt, weight=1)

### Measure Density of the learning paths

In [11]:
# DENSITY
# Density is the ratio of the actual edges in the network to all possible edges in the network
# Interpretation: 0 to 1, the bigger the more dense your network. 1 is a perfectly connect network
# This mean that 
density = nx.density(G)
print("Network density:", density)

Network density: 0.27179601013119326


### Look at the number of Connected Components in our learning path

---




In [12]:
# COMPONENTS, AND COMPONENTS AND THIER DIAMETERS
# LOOK AT THE NUMBER OF COMPONENTS
# If we have more than one components, then this means that some course contents are very different from the 
# Eg interpretation: There is a learning path frokm length 8 between the two furthest apart courses in the network.

# If your Graph has more than one component, this will return False:
# print(nx.is_strongly_connected(G))
print(nx.is_connected(G))


# Next, use nx.connected_components to get the list of components,
# then use the max() command to find the largest one:
components = nx.connected_components(G)
largest_component = max(components, key=len)

# Create a "subgraph" of just the largest component
# Then calculate the diameter of the subgraph, just like you did with density.
# This ensures that nx.diameter will not throw an error
subgraph = G.subgraph(largest_component)
diameter = nx.diameter(subgraph)
print("Network diameter of largest component:", diameter)

True
Network diameter of largest component: 3


### Look at the Triadic Closure of our learning paths

In [13]:
# TRANSITIVITY
# Clustering coefficient is one way to measure Triadic closure as it measures the clustering tendency.
# Transitivity is the ratio of all triangles over all possible triangles
triadic_closure = nx.transitivity(G)
print("Triadic closure:", triadic_closure)

Triadic closure: 0.5780489273369539


## Look at the degree of our exercises


In [14]:
## DEGREE
# Course with the highest degree are those most connected to other courses. These nodes are call Hubs.
degree_dict = dict(G.degree(G.nodes()))
nx.set_node_attributes(G, degree_dict, 'degree') # store all the degrees in the node
### Find the top few nodes with the highest degree
#  Eg print(G.nodes['William Penn'])
sorted_degree = sorted(degree_dict.items(), key=itemgetter(1), reverse=True)
print("Top 20 nodes by degree:")
for d in sorted_degree[:20]:
    print(d)

Top 20 nodes by degree:
('CPI+5YCeEmhqdk6znJeii6jJUNl1QWGEvwCUJ6uLflg=', 1244)
('hv7kHCAIdj7thZUmlqz553leG5bFNYgzXmLfB5m4Xvw=', 1207)
('PqQyQvOjvvozBCtlR/0pL8uxhe0K63IPiKYCi1tDG1o=', 1197)
('TwyqyV1uJYlDAX8wX/PtTCVZEBo/APIVfTzzleGkNCQ=', 1192)
('T7V7D8OgJF2BeuK/agj56BY9ibTmoqRbGdtwrt8a1SQ=', 1162)
('svTmVFY7kPYUWAdDc4l5Zaim90TY7RGkFLs/eaU4FVc=', 1117)
('jImuGvSym6odgRoYw7vgkq4HUbmJ1tDW9fq6bo/yN5s=', 1108)
('OVGQBBcCHSOyJ89uwY+z15n2iuZYrv8Tajf37jiGzqc=', 1074)
('w3a6Wk12CqIrUktZei2Z0W3Rf8xdgEdjIW9QX0MWIf8=', 1061)
('lvobiTtwUsd1ylmrciW7NzJnmmcqFC1J9PmoZ6R8xK0=', 1054)
('dp0m1kS6Bygoksx9S/gkBqTcWPoglbf1HT3Ma3U3N8U=', 1049)
('O/KjGB9rr8dvio3EBiuDvoL1cg/22EZaZV1FxmkURKc=', 1004)
('vhz7ZImgcFLciakjMFdmn0S/He85wqQkhLiYXRPpu18=', 989)
('dVKfHzCsi1dOBhd343bsUNF4dowsiANnvRBkWqToitg=', 982)
('tOR47i7wGbuHkGxkhX9tHItMHHuJtAttsvHDOjDRVOw=', 976)
('zX5HAUpRYC4F7YPNs5tUeBqVi++rEmPUfsAtHpoZmEQ=', 968)
('+cLbiKkV7+VhNPr7OGG3B3kPpZ8er0mvFvqPbM/gm+E=', 953)
('JcrFXI7wzq/14kZRPCdix3feq0/yQh1gHU06bGQwNBE=

## Betweenness and eigenvector centrality of our exercises

In [15]:
## EIGENVECTOR CENTRALITY
# Eigenvector centrality useful for understanding which nodes can get information to other nodes quickly.
# Example: if you know a lot of well-connected people, you can spread a message very efficiently.
# So these are courses that are related to many of the important courses, and should be placed into the learning path?


## BETWEENNESS CENTRALITY
# Betweenness centrality looks at all the shortest paths that pass through a particular node. (Thus takes very long to calculate)
# Tells us which nodes are important not because they have lots of connections themselves, but because they stand between groups, 
# Giving the network connectivity and cohesion

betweenness_dict = nx.betweenness_centrality(G) # Run betweenness centrality
eigenvector_dict = nx.eigenvector_centrality(G) # Run eigenvector centrality

# Assign each to an attribute in your network
nx.set_node_attributes(G, betweenness_dict, 'betweenness')
nx.set_node_attributes(G, eigenvector_dict, 'eigenvector')

sorted_betweenness = sorted(betweenness_dict.items(), key=itemgetter(1), reverse=True)

print("Top 20 nodes by betweenness centrality:")
for b in sorted_betweenness[:20]:
    print(b)
    
### You can see that nodes that have high degree also have high betweenness centrality
# What if you want to know learning path have high betweenness centrality and low degree?
# I.e which high betweenness nodes are unexpected?
## If we have high betweenness centrality and low degree, these could mean that the nodes are important nodes connecting
## distant parts of the graph.

#First get the top 20 nodes by betweenness as a list
top_betweenness = sorted_betweenness[:20]

#Then find and print their degree
for tb in top_betweenness: # Loop through top_betweenness
    degree = degree_dict[tb[0]] # Use degree_dict to access a node's degree, see footnote 2
    print("Name:", tb[0], "| Betweenness Centrality:", tb[1], "| Degree:", degree)

Top 20 nodes by betweenness centrality:
('CPI+5YCeEmhqdk6znJeii6jJUNl1QWGEvwCUJ6uLflg=', 0.018921296601167693)
('TwyqyV1uJYlDAX8wX/PtTCVZEBo/APIVfTzzleGkNCQ=', 0.01602455489017069)
('hv7kHCAIdj7thZUmlqz553leG5bFNYgzXmLfB5m4Xvw=', 0.015960378984617368)
('PqQyQvOjvvozBCtlR/0pL8uxhe0K63IPiKYCi1tDG1o=', 0.015435173827463225)
('T7V7D8OgJF2BeuK/agj56BY9ibTmoqRbGdtwrt8a1SQ=', 0.01294176919478974)
('svTmVFY7kPYUWAdDc4l5Zaim90TY7RGkFLs/eaU4FVc=', 0.010966613798047338)
('jImuGvSym6odgRoYw7vgkq4HUbmJ1tDW9fq6bo/yN5s=', 0.010337075994320133)
('tOR47i7wGbuHkGxkhX9tHItMHHuJtAttsvHDOjDRVOw=', 0.009336667887392635)
('OVGQBBcCHSOyJ89uwY+z15n2iuZYrv8Tajf37jiGzqc=', 0.00831595622511032)
('w3a6Wk12CqIrUktZei2Z0W3Rf8xdgEdjIW9QX0MWIf8=', 0.008254212352098514)
('lvobiTtwUsd1ylmrciW7NzJnmmcqFC1J9PmoZ6R8xK0=', 0.008020104105177922)
('dp0m1kS6Bygoksx9S/gkBqTcWPoglbf1HT3Ma3U3N8U=', 0.007495889794790361)
('+cLbiKkV7+VhNPr7OGG3B3kPpZ8er0mvFvqPbM/gm+E=', 0.007260536187362074)
('O/KjGB9rr8dvio3EBiuDvoL1cg/22EZaZV1Fxm

## Look at the number Communities in our network

In [None]:
# COMMUNITIES
## Community detection with modularity
## Modularity is a measure of relative density in your network. 
## A community has a high density relative to other nodes within its module, but low density with those outside
## Modularity gives you an overall score of how fractious your network is, and that is a score
# that can be used to partition the network and return the inidividual communities

# Very dense networks are often more difficult to split into sensible partitions.
# Luckily, networks are not all that dense. Our network is only 0.223
# Although there are some built in approahces like 'minimum cut', modularity is not included in networkX
# We just use an addiitonal python module, YAY
communities = community.greedy_modularity_communities(G)
## the method greedy_modularity_communities() tries to determine the number of communities appropriate for the graphs
## Group all nodes into the subsets based on these communities.

modularity_dict = {} # Create a blank dictionary
for i,c in enumerate(communities): # Loop through the list of communities, keeping track of the number for the community
    for name in c: # Loop through each person in a community
        modularity_dict[name] = i # Create an entry in the dictionary for the person, where the value is which group they belong to.

# Now you can add modularity information like we did the other metrics
nx.set_node_attributes(G, modularity_dict, 'modularity')

## Then in each of these communities, you do your centrality measures like EIGENVECTOR etc etc 

# First get a list of just the nodes in that class
class0 = [n for n in G.nodes() if G.nodes[n]['modularity'] == 0]

# Then create a dictionary of the eigenvector centralities of those nodes
class0_eigenvector = {n:G.nodes[n]['eigenvector'] for n in class0}

# Then sort that dictionary and print the first 5 results
class0_sorted_by_eigenvector = sorted(class0_eigenvector.items(), key=itemgetter(1), reverse=True)

print("Modularity Class 0 Sorted by Eigenvector Centrality:")
for node in class0_sorted_by_eigenvector[:5]:
    print("Name:", node[0], "| Eigenvector Centrality:", node[1])
    
    
### Using eigenvector centrality as a ranking can give you a sense of the important people within this modularity class
##  In smaller networks, it may be a common task to find a list of all the modularity classes and their members

for i,c in enumerate(communities): # Loop through the list of communities
    if len(c) > 2: # Filter out modularity classes with 2 or fewer nodes
        print('Class '+str(i)+':', list(c)) # Print out the classes and their members

Modularity Class 0 Sorted by Eigenvector Centrality:
Name: CPI+5YCeEmhqdk6znJeii6jJUNl1QWGEvwCUJ6uLflg= | Eigenvector Centrality: 0.06025249099354128
Name: hv7kHCAIdj7thZUmlqz553leG5bFNYgzXmLfB5m4Xvw= | Eigenvector Centrality: 0.05985130504037195
Name: PqQyQvOjvvozBCtlR/0pL8uxhe0K63IPiKYCi1tDG1o= | Eigenvector Centrality: 0.05977755197096649
Name: TwyqyV1uJYlDAX8wX/PtTCVZEBo/APIVfTzzleGkNCQ= | Eigenvector Centrality: 0.059691360376734406
Name: T7V7D8OgJF2BeuK/agj56BY9ibTmoqRbGdtwrt8a1SQ= | Eigenvector Centrality: 0.05945916053776811
Class 0: ['SPmFV4RvUZLm41Gfgt8kBm8u6VzSl1tNcT5BsAQvMls=', 'mzSnT1CgsdPVx0yFebyrR837l1a+t4MBMugTwpmRQ7g=', 'pdYh3TvKBK1LgOTy1Vc5GCkzssnNDhK3xSBLdsGpfLA=', 'bY13RsIM2Xlj/qRBujFNedeCeDFFNKV1YUp+1kTLfAA=', 'awqH6F/TgV0QhF46PtnHP7eSsTSHSXMKIO4KqJaMn04=', 'hLFiBikqhFsQw0px/iUFWmJ/TvnIlZpv95bRtR7baZM=', 'oujB8OrUofV7wuIygS0fGGodg6zwhEMNdNDLD2bzHhY=', 'fD8zM1ZR2ONSCf1qFOxRewRZWPyivWHdZKlbbOaK+tY=', 'AY0mOAFvi9VmstXOvox56zDaP9fa9bTwKm2mjraTSpM=', 'abZeR/hnIssN0A8N+K

In [None]:
print(len(communities))

1


# Represent our exercises as a Directed Graph


-- Explore methods to prune a tree?

In [None]:
G = nx.DiGraph()
for index, row in user_to_content.iterrows():
    user = row['uuid']
    from_list = row['from2']
    to_list = row['to2']
    # always check that the number 
    assert len(from_list) == len(to_list)
    for src, tgt in zip(from_list, to_list):
        if G.has_edge(src, tgt):
            G[src][tgt]['weight'] += 1
        else:
            G.add_edge(src, tgt, weight=1)

### Measure Density of our learning paths

In [None]:
# DENSITY
# Density is the ratio of the actual edges in the network to all possible edges in the network
# Interpretation: 0 to 1, the bigger the more dense your network. 1 is a perfectly connect network
# This mean that 
density = nx.density(G)
print("Network density:", density)

Network density: 0.22516975440393863


### Look at the number of Connected Components in our learning paths


In [None]:
# COMPONENTS, AND COMPONENTS AND THIER DIAMETERS
# LOOK AT THE NUMBER OF COMPONENTS
# If we have more than one components, then this means that some course contents are very different from the 
# Eg interpretation: There is a learning path frokm length 8 between the two furthest apart courses in the network.

# If your Graph has more than one component, this will return False:
# print(nx.is_strongly_connected(G))
print(nx.is_strongly_connected(G))


# Next, use nx.connected_components to get the list of components,
# then use the max() command to find the largest one:
components = nx.strongly_connected_components(G)
largest_component = max(components, key=len)

# Create a "subgraph" of just the largest component
# Then calculate the diameter of the subgraph, just like you did with density.
# This ensures that nx.diameter will not throw an error
subgraph = G.subgraph(largest_component)
diameter = nx.diameter(subgraph)
print("Network diameter of largest component:", diameter)

True
Network diameter of largest component: 3


### Look at the Triadic Closure of our learning paths

In [None]:
# TRANSITIVITY
# Clustering coefficient is one way to measure Triadic closure as it measures the clustering tendency.
# Transitivity is the ratio of all triangles over all possible triangles
triadic_closure = nx.transitivity(G)
print("Triadic closure:", triadic_closure)

Triadic closure: 0.5400136258370033


## Measure the degree of our exercises


In [None]:
## DEGREE
# Course with the highest degree are those most connected to other courses. These nodes are call Hubs.
degree_dict = dict(G.degree(G.nodes()))
nx.set_node_attributes(G, degree_dict, 'degree') # store all the degrees in the node
### Find the top few nodes with the highest degree
#  Eg print(G.nodes['William Penn'])
sorted_degree = sorted(degree_dict.items(), key=itemgetter(1), reverse=True)
print("Top 20 nodes by degree:")
for d in sorted_degree[:20]:
    print(d)

Top 20 nodes by degree:
('CPI+5YCeEmhqdk6znJeii6jJUNl1QWGEvwCUJ6uLflg=', 2402)
('hv7kHCAIdj7thZUmlqz553leG5bFNYgzXmLfB5m4Xvw=', 2311)
('TwyqyV1uJYlDAX8wX/PtTCVZEBo/APIVfTzzleGkNCQ=', 2267)
('PqQyQvOjvvozBCtlR/0pL8uxhe0K63IPiKYCi1tDG1o=', 2265)
('T7V7D8OgJF2BeuK/agj56BY9ibTmoqRbGdtwrt8a1SQ=', 2160)
('svTmVFY7kPYUWAdDc4l5Zaim90TY7RGkFLs/eaU4FVc=', 2078)
('jImuGvSym6odgRoYw7vgkq4HUbmJ1tDW9fq6bo/yN5s=', 2041)
('OVGQBBcCHSOyJ89uwY+z15n2iuZYrv8Tajf37jiGzqc=', 1944)
('w3a6Wk12CqIrUktZei2Z0W3Rf8xdgEdjIW9QX0MWIf8=', 1926)
('dp0m1kS6Bygoksx9S/gkBqTcWPoglbf1HT3Ma3U3N8U=', 1914)
('lvobiTtwUsd1ylmrciW7NzJnmmcqFC1J9PmoZ6R8xK0=', 1909)
('O/KjGB9rr8dvio3EBiuDvoL1cg/22EZaZV1FxmkURKc=', 1803)
('dVKfHzCsi1dOBhd343bsUNF4dowsiANnvRBkWqToitg=', 1765)
('vhz7ZImgcFLciakjMFdmn0S/He85wqQkhLiYXRPpu18=', 1726)
('zX5HAUpRYC4F7YPNs5tUeBqVi++rEmPUfsAtHpoZmEQ=', 1702)
('tOR47i7wGbuHkGxkhX9tHItMHHuJtAttsvHDOjDRVOw=', 1665)
('+cLbiKkV7+VhNPr7OGG3B3kPpZ8er0mvFvqPbM/gm+E=', 1630)
('JcrFXI7wzq/14kZRPCdix3feq0/yQh1gHU06bGQ

## Measure the Betweenness and eigenvector centrality of our exercises

In [None]:
## EIGENVECTOR CENTRALITY
# Eigenvector centrality useful for understanding which nodes can get information to other nodes quickly.
# Example: if you know a lot of well-connected people, you can spread a message very efficiently.
# So these are courses that are related to many of the important courses, and should be placed into the learning path?


## BETWEENNESS CENTRALITY
# Betweenness centrality looks at all the shortest paths that pass through a particular node. (Thus takes very long to calculate)
# Tells us which nodes are important not because they have lots of connections themselves, but because they stand between groups, 
# Giving the network connectivity and cohesion

betweenness_dict = nx.betweenness_centrality(G) # Run betweenness centrality
eigenvector_dict = nx.eigenvector_centrality(G) # Run eigenvector centrality

# Assign each to an attribute in your network
nx.set_node_attributes(G, betweenness_dict, 'betweenness')
nx.set_node_attributes(G, eigenvector_dict, 'eigenvector')

sorted_betweenness = sorted(betweenness_dict.items(), key=itemgetter(1), reverse=True)

print("Top 20 nodes by betweenness centrality:")
for b in sorted_betweenness[:20]:
    print(b)
    
### You can see that nodes that have high degree also have high betweenness centrality
# What if you want to know learning path have high betweenness centrality and low degree?
# I.e which high betweenness nodes are unexpected?
## If we have high betweenness centrality and low degree, these could mean that the nodes are important nodes connecting
## distant parts of the graph.

#First get the top 20 nodes by betweenness as a list
top_betweenness = sorted_betweenness[:20]

#Then find and print their degree
for tb in top_betweenness: # Loop through top_betweenness
    degree = degree_dict[tb[0]] # Use degree_dict to access a node's degree, see footnote 2
    print("Name:", tb[0], "| Betweenness Centrality:", tb[1], "| Degree:", degree)

Top 20 nodes by betweenness centrality:
('CPI+5YCeEmhqdk6znJeii6jJUNl1QWGEvwCUJ6uLflg=', 0.0268247805772182)
('hv7kHCAIdj7thZUmlqz553leG5bFNYgzXmLfB5m4Xvw=', 0.022022336541282342)
('TwyqyV1uJYlDAX8wX/PtTCVZEBo/APIVfTzzleGkNCQ=', 0.02141351611047956)
('PqQyQvOjvvozBCtlR/0pL8uxhe0K63IPiKYCi1tDG1o=', 0.020902246663373807)
('T7V7D8OgJF2BeuK/agj56BY9ibTmoqRbGdtwrt8a1SQ=', 0.015993187614160874)
('svTmVFY7kPYUWAdDc4l5Zaim90TY7RGkFLs/eaU4FVc=', 0.013812138724035491)
('jImuGvSym6odgRoYw7vgkq4HUbmJ1tDW9fq6bo/yN5s=', 0.012946982463347398)
('tOR47i7wGbuHkGxkhX9tHItMHHuJtAttsvHDOjDRVOw=', 0.009891927701134156)
('lvobiTtwUsd1ylmrciW7NzJnmmcqFC1J9PmoZ6R8xK0=', 0.009861133455573231)
('OVGQBBcCHSOyJ89uwY+z15n2iuZYrv8Tajf37jiGzqc=', 0.00970807680008711)
('w3a6Wk12CqIrUktZei2Z0W3Rf8xdgEdjIW9QX0MWIf8=', 0.009591450309513687)
('dp0m1kS6Bygoksx9S/gkBqTcWPoglbf1HT3Ma3U3N8U=', 0.008976319775588612)
('+cLbiKkV7+VhNPr7OGG3B3kPpZ8er0mvFvqPbM/gm+E=', 0.008110899565784882)
('O/KjGB9rr8dvio3EBiuDvoL1cg/22EZaZV1Fxmk

## Look at the number of Communities in our learning paths
- Note that there is currently a bug in directed graph for this

In [None]:
# COMMUNITIES
## Community detection with modularity
## Modularity is a measure of relative density in your network. 
## A community has a high density relative to other nodes within its module, but low density with those outside
## Modularity gives you an overall score of how fractious your network is, and that is a score
# that can be used to partition the network and return the inidividual communities

# Very dense networks are often more difficult to split into sensible partitions.
# Luckily, networks are not all that dense. Our network is only 0.223
# Although there are some built in approahces like 'minimum cut', modularity is not included in networkX
# We just use an addiitonal python module, YAY
communities = community.greedy_modularity_communities(G)
## the method greedy_modularity_communities() tries to determine the number of communities appropriate for the graphs
## Group all nodes into the subsets based on these communities.

modularity_dict = {} # Create a blank dictionary
for i,c in enumerate(communities): # Loop through the list of communities, keeping track of the number for the community
    for name in c: # Loop through each person in a community
        modularity_dict[name] = i # Create an entry in the dictionary for the person, where the value is which group they belong to.

# Now you can add modularity information like we did the other metrics
nx.set_node_attributes(G, modularity_dict, 'modularity')

## Then in each of these communities, you do your centrality measures like EIGENVECTOR etc etc 

# First get a list of just the nodes in that class
class0 = [n for n in G.nodes() if G.nodes[n]['modularity'] == 0]

# Then create a dictionary of the eigenvector centralities of those nodes
class0_eigenvector = {n:G.nodes[n]['eigenvector'] for n in class0}

# Then sort that dictionary and print the first 5 results
class0_sorted_by_eigenvector = sorted(class0_eigenvector.items(), key=itemgetter(1), reverse=True)

print("Modularity Class 0 Sorted by Eigenvector Centrality:")
for node in class0_sorted_by_eigenvector[:5]:
    print("Name:", node[0], "| Eigenvector Centrality:", node[1])
    
    
### Using eigenvector centrality as a ranking can give you a sense of the important people within this modularity class
##  In smaller networks, it may be a common task to find a list of all the modularity classes and their members

for i,c in enumerate(communities): # Loop through the list of communities
    if len(c) > 2: # Filter out modularity classes with 2 or fewer nodes
        print('Class '+str(i)+':', list(c)) # Print out the classes and their members

KeyError: ignored