In [None]:
%pip install networkx pyvis

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import networkx as nx
from pyvis.network import Network
import seaborn as sns

In [None]:
#supress warnings

import warnings
warnings.filterwarnings('ignore')

In [None]:
#read the data file and create a data frame
G = nx.read_gml('data/karate.gml', label='id')

In [None]:
#print the number of vertices and edges
print(G)

In [None]:
#display the graph

nx.draw(G, with_labels=True)

In [None]:
#import Pyvis
from pyvis.network import Network


In [None]:
#draw the graph with better visualization
plt.figure(figsize=(15, 10))

# Use spring layout for better node positioning
pos = nx.spring_layout(G, k=0.5, iterations=50)

# Draw nodes with color based on degree
node_sizes = [300 * G.degree(node) for node in G.nodes()]
node_colors = [G.degree(node) for node in G.nodes()]

nx.draw_networkx_nodes(G, pos, 
                       node_size=node_sizes,
                       node_color=node_colors,
                       cmap='viridis',
                       alpha=0.8)

# Draw edges
nx.draw_networkx_edges(G, pos, alpha=0.3, width=1)

# Draw labels
nx.draw_networkx_labels(G, pos, font_size=10, font_weight='bold')

plt.title('Karate Club Network', fontsize=16, fontweight='bold')
plt.axis('off')
plt.tight_layout()
plt.show()

print(f"Network has {G.number_of_nodes()} nodes and {G.number_of_edges()} edges")

In [None]:
#print all the nodes
print(G.nodes)

#print the edges
print(G.edges)

In [None]:
#get the neighbors of a node
print(neighbors := list(G.neighbors(1)))

In [None]:
#degree centrality = # of neighbor / # of neighbors in a complete graph
print(nx.degree_centrality(G))

In [None]:
#### Distribution of degrees in the graph
degree_sequence = [d for n, d in G.degree()]
plt.figure(figsize=(10, 6))
sns.histplot(degree_sequence, bins=range(1, max(degree_sequence) +
1), kde=False, color='blue')
plt.title('Degree Distribution of Karate Club Network', fontsize=16, fontweight='bold')
plt.xlabel('Degree', fontsize=14)
plt.ylabel('Frequency', fontsize=14)
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.show()

In [None]:
#import library
from collections import Counter

#calculate degree frequency
degree_freq = Counter(degree_sequence)
deg, freq = zip(*degree_freq.items())
#deg, freq
plt.figure(figsize=(10, 6))
plt.bar(deg, freq, width=0.80, color='b')
plt.title('Degree Frequency Distribution', fontsize=16, fontweight='bold')
plt.xlabel('Degree', fontsize=14)
plt.ylabel('Frequency', fontsize=14)
plt.xticks([d for d in deg])
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.show()



In [None]:
#get the distribution of degrees
from collections import Counter

degree_count = Counter([degree for node, degree in G.degree()])
print(degree_count)

In [None]:
#get the distribution of degrees
degree_hist = pd.DataFrame({'Degree': list(degree_count.keys()),
                            'Number of Nodes': list(degree_count.values())})

#plot the data of Degree vs Number of Nodes histogram
plt.figure(figsize=(10, 6))
plt.bar(degree_hist['Degree'], degree_hist['Number of Nodes'], width=0.80, color='g')
plt.title('Degree Histogram', fontsize=16, fontweight='bold')
plt.xlabel('Degree', fontsize=14)
plt.ylabel('Number of Nodes', fontsize=14)
plt.xticks(degree_hist['Degree'])
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.show()

In [None]:
#define a function that gives nodes with m neighbors
def node_mnbrs (G, m):
    node = set()
    for n in G.nodes():
        if len(list(G.neighbors(n))) == m:
            node.add(n)
    return node

In [None]:
#print the nodes of a given degree
print(node_mnbrs(G, 1))


In [None]:
#density of edges = number of edges / all possible edges
print(nx.density(G))

In [None]:
# diameter of graph = length of the shortest path between the furthest nodes

print(nx.shortest_path(G, source=1, target=20))

print(nx.diameter(G))

In [None]:
#node connectivity = minimum number of nodes that need to be removed to disconnect the graph
print(nx.node_connectivity(G))

In [None]:
#betweeness centrality = fraction of all the shortest paths passing through the nodes
between = nx.betweenness_centrality(G)
print(between)

In [None]:
##### Determination of Cliques 
# find all cliques in the graph
cliques = list(nx.find_cliques(G))
print(f"Number of cliques in the graph: {len(cliques)}")
print("Cliques:")
for clique in cliques:
    print(clique)

In [None]:
#find the cliques of a given size
def maximal_cliques(G, s):
    mcs = []
    for clique in (nx.find_cliques(G)):
        if len(clique) == s:
            mcs.append(clique)
    return mcs

In [None]:
print(maximal_cliques(G, 5))

In [None]:
#find neighbors
nodes = list(G.neighbors(1))
print(nodes)

In [None]:
#create a subgraph of 1 and its neighbors
nodes.append(1)
G_one = G.subgraph(nodes)
G_one

#draw it
plt.figure(figsize=(8, 6))
pos = nx.spring_layout(G_one)
nx.draw(G_one, pos, with_labels=True, node_color='lightblue', edge_color='gray', node_size=500, font_size=12)
plt.title('Subgraph of Node 1 and its Neighbors', fontsize=16, fontweight='bold')
plt.show()

In [None]:
### Game of Thrones

GOT = pd.read_csv('data/got_book_1.csv')

In [None]:
GOT.shape
GOT.head(5)

#print number of unique characters
print(len(set(GOT['Source']).union(set(GOT['Target']))))

#print number of connections
print(GOT.shape[0])

In [None]:
#print who is the most important person in the series
import networkx as nx
G = nx.from_pandas_edgelist(GOT, 'Source', 'Target')
centrality = nx.degree_centrality(G)
most_important = max(centrality, key=centrality.get)
print(f"The most important character in Game of Thrones is: {most_important}")