In [1]:
import networkx as nx
import pandas as pd
import powerlaw
import matplotlib.pyplot as plt
import time
import seaborn as sns
import itertools

In [2]:
def top(d: dict) -> list:
    for key, value in sorted(d.items(), key=lambda x: x[1], reverse=True)[:5]:
        print(f'{key}: {value}')

In [3]:
# Load the graph using NetworkX
dataset_name = 'TV Shows'
df = pd.read_csv('tvshow_edges.csv')
edges = [tuple(x) for x in df.to_numpy()]

# Creating the graph
G_nx = nx.Graph()
G_nx.add_edges_from(edges)

In [None]:
plt.figure(figsize=(12, 6))

# Subplot 1: Network Visualization
plt.subplot(121)
pos = nx.spring_layout(G_nx)
nx.draw(G_nx, node_size=10, pos=pos)
plt.title('Network Visualization of ' + dataset_name)

# Subplot 2: Degree Distribution
plt.subplot(122)
degrees = [d for n, d in G_nx.degree()]
plt.hist(degrees, bins=30, alpha=0.7)
plt.title('Degree Distribution of ' + dataset_name)
plt.xlabel('Degree')
plt.ylabel('Frequency')

# Adjust layout to prevent overlap
plt.tight_layout()

# Show the combined plot
plt.show()

In [None]:
# Calculate network properties
degree_distr = sorted(dict(G_nx.degree()).values(), reverse=True)
fit = powerlaw.Fit(degree_distr)

print('Number of nodes:', len(G_nx.nodes()))
print('Number of edges:', len(G_nx.edges()))
print('Min degree:', min(degrees))
print('Max degree:', max(degrees))
print('Average degree:', sum(degrees) / len(degrees))
print('Estimated gamma:', fit.alpha)

## Centrality

In [None]:
betweenness_centrality = nx.betweenness_centrality(G_nx)
print('Betweenness Centrality:')
top(betweenness_centrality)

In [None]:
closeness_centrality = nx.closeness_centrality(G_nx)
print('Closeness Centrality:')
top(closeness_centrality)

In [None]:
degree_centrality = nx.degree_centrality(G_nx)
print('Degree Centrality:')
top(degree_centrality)

In [None]:
eigenvector_centrality = nx.eigenvector_centrality(G_nx)
print('Eigenvector Centrality:')
top(eigenvector_centrality)

In [None]:
page_rank_centrality = nx.pagerank(G_nx)
print('Page Rank Centrality:')
top(page_rank_centrality)

In [None]:
hits_centrality = nx.hits(G_nx)[0]
print('HITS Centrality:')
top(hits_centrality)

In [None]:
current_flow_betweenness_centrality = nx.current_flow_betweenness_centrality(G_nx)
print('Current Flow Betweenness:')
top(current_flow_betweenness_centrality)

In [None]:
load_centrality = nx.load_centrality(G_nx)
print('Load Centrality:')
top(load_centrality)

In [None]:
subgraph_centrality = nx.subgraph_centrality(G_nx)
print('Subgraph Centrality:')
top(subgraph_centrality)

In [None]:
harmonic_centrality = nx.harmonic_centrality(G_nx)
print('Harmonic Centrality:')
top(harmonic_centrality)

In [None]:
# Convert centrality measures to DataFrames
centrality_measures = {
    'Betweenness': betweenness_centrality,
    'Closeness': closeness_centrality,
    'Degree': degree_centrality,
    'Eigenvector': eigenvector_centrality,
    'Page Rank': page_rank_centrality,
    'HITS': hits_centrality,
    'Current Flow': current_flow_betweenness_centrality,
    'Load': load_centrality,
    'Subgraph': subgraph_centrality,
    'Harmonic': harmonic_centrality
}

# Compute correlations between centrality measures
centrality_df = pd.DataFrame(centrality_measures)
centrality_corr = centrality_df.corr()

# Visualize correlation matrix
plt.figure(figsize=(8, 8))
plt.title('Correlation Heatmap of Centrality Measures')
sns.heatmap(centrality_corr, annot=True, cmap='coolwarm', fmt='.2f', square=True)
plt.xticks(rotation=45)
plt.yticks(rotation=0)
plt.tight_layout()
plt.show()

## Community Detection

In [None]:
communities = {}
modularity_values = {}
coverage_values = {}
performance_values = {}

k = 4
max_size = 4

In [None]:
communities['Bipartitions'] = nx.community.kernighan_lin_bisection(G_nx)
print('No of communities: ', len(communities['Bipartitions']))

modularity_values['Bipartitions'] = nx.community.modularity(G_nx, communities['Bipartitions'])

partition_quality = nx.community.partition_quality(G_nx, communities['Bipartitions'])
coverage_values['Bipartitions'] = partition_quality[0]
performance_values['Bipartitions'] = partition_quality[1]

In [None]:
communities['Modularity-based communities'] = nx.community.greedy_modularity_communities(G_nx)
print('No of communities: ', len(communities['Modularity-based communities']))

modularity_values['Modularity-based communities'] = nx.community.modularity(G_nx, communities['Modularity-based communities'])

partition_quality = nx.community.partition_quality(G_nx, communities['Modularity-based communities'])
coverage_values['Modularity-based communities'] = partition_quality[0]
performance_values['Modularity-based communities'] = partition_quality[1]

In [None]:
communities['Label propagation'] = nx.community.label_propagation_communities(G_nx)
print('No of communities: ', len(communities['Label propagation']))

modularity_values['Label propagation'] = nx.community.modularity(G_nx, communities['Label propagation'])

partition_quality = nx.community.partition_quality(G_nx, communities['Label propagation'])
coverage_values['Label propagation'] = partition_quality[0]
performance_values['Label propagation'] = partition_quality[1]

In [None]:
communities['Louvain Community Detection'] = nx.community.louvain_communities(G_nx)
print('No of communities: ', len(communities['Louvain Community Detection']))

modularity_values['Louvain Community Detection'] = nx.community.modularity(G_nx, communities['Louvain Community Detection'])

partition_quality = nx.community.partition_quality(G_nx, communities['Louvain Community Detection'])
coverage_values['Louvain Community Detection'] = partition_quality[0]
performance_values['Louvain Community Detection'] = partition_quality[1]

In [None]:
communities['Fluid Communities'] = list(nx.community.asyn_fluidc(G_nx, k))
print('No of communities: ', len(communities['Fluid Communities']))

modularity_values['Fluid Communities'] = nx.community.modularity(G_nx, communities['Fluid Communities'])

partition_quality = nx.community.partition_quality(G_nx, communities['Fluid Communities'])
coverage_values['Fluid Communities'] = partition_quality[0]
performance_values['Fluid Communities'] = partition_quality[1]

## Output

In [None]:
print("Output:\n")
print('Graph density:', nx.density(G_nx))
print('Clustering coefficient:', nx.average_clustering(G_nx))
print('Assortativity coefficient:', nx.degree_assortativity_coefficient(G_nx))

print("\nCorrelation between centrality measures:")
print(centrality_corr)

print("\n")
print("Modularity of Community Detection Algorithms")
print(modularity_values)
print("Coverage of Community Detection Algorithms")
print(coverage_values)
print("Performance of Community Detection Algorithms")
print(performance_values)

In [None]:
pos = nx.spring_layout(G_nx)
num_algorithms = len(communities)
    
fig, axes = plt.subplots(1, num_algorithms, figsize=(num_algorithms * 5, 5))

for idx, (algorithm, communities) in enumerate(communities.items()):
    ax = axes[idx] if num_algorithms > 1 else axes
    ax.set_title(algorithm)
        
    node_colors = []
    for node in G_nx.nodes:
        for community_idx, community in enumerate(communities, start=1):
            if node in community:
                node_colors.append(community_idx)
                break  # Break once the node is found in a community
        
    nx.draw(G_nx, pos, ax=ax, with_labels=False, node_size=10, node_color=node_colors, cmap=plt.cm.Set1)
    
plt.tight_layout()
plt.show()

In [None]:
fig, axes = plt.subplots(1, 3, figsize=(20, 6))

# Plotting modularity values
sns.barplot(x=list(modularity_values.keys()), y=list(modularity_values.values()), ax=axes[0])
axes[0].set_title('Modularity of Community Detection Algorithms')
axes[0].set_xlabel('Algorithms')
axes[0].set_ylabel('Modularity')
axes[0].tick_params(axis='x', rotation=30)

# Plotting coverage values
sns.barplot(x=list(coverage_values.keys()), y=list(coverage_values.values()), ax=axes[1])
axes[1].set_title('Coverage of Community Detection Algorithms')
axes[1].set_xlabel('Algorithms')
axes[1].set_ylabel('Coverage')
axes[1].tick_params(axis='x', rotation=30)

# Plotting performance values
sns.barplot(x=list(performance_values.keys()), y=list(performance_values.values()), ax=axes[2])
axes[2].set_title('Performance of Community Detection Algorithms')
axes[2].set_xlabel('Algorithms')
axes[2].set_ylabel('Performance')
axes[2].tick_params(axis='x', rotation=30)

plt.tight_layout()
plt.show()