In [27]:
import pandas as pd
import networkx as nx

In [28]:
# Read the nodes from the csv file
df = pd.read_csv('alex_data.csv')

# Read the edges from the csv file
df_edges = pd.read_csv('alex_leonidas_2.csv')

# Get channels with > 10000 subscribers
df = df[df['subscribercount'] > 10000]

# Filter the edges to only include channels with > 10000 subscribers
df_edges = df_edges[df_edges['Source'].isin(df['Id'])]

In [29]:
# Create a graph
G = nx.Graph()

# Add nodes to the graph
for index, row in df.iterrows():
    G.add_node(row['Id'], label=row['Label'], subscribercount=row['subscribercount'])
    
# Add edges to the graph
for index, row in df_edges.iterrows():
    G.add_edge(row['Source'], row['Target'], weight=row['Weight'])


In [30]:

# Get the degree centrality
degree_centrality = nx.degree_centrality(G)
# Get the betweenness centrality
betweenness_centrality = nx.betweenness_centrality(G)
# Get the closeness centrality
closeness_centrality = nx.closeness_centrality(G)
# Get the eigenvector centrality
eigenvector_centrality = nx.eigenvector_centrality(G)
# Get the pagerank
pagerank = nx.pagerank(G)
# Get the clustering coefficient
clustering_coefficient = nx.clustering(G)
# Get the average clustering coefficient
average_clustering_coefficient = nx.average_clustering(G)
# Get the average shortest path length
average_shortest_path_length = nx.average_shortest_path_length(G)
# Get the diameter
diameter = nx.diameter(G)
# Get the density
density = nx.density(G)

# Create a dataframe with all the metrics
df_metrics = pd.DataFrame({'degree_centrality': degree_centrality, 
                           'betweenness_centrality': betweenness_centrality, 
                           'closeness_centrality': closeness_centrality, 
                           'eigenvector_centrality': eigenvector_centrality, 
                           'pagerank': pagerank, 
                           'clustering_coefficient': clustering_coefficient}).reset_index().rename(columns={'index': 'Id'})
# Merge Id with the nodes dataframe and keep only the label and the metrics
df_metrics = df.merge(df_metrics, on='Id')[['Id', 'Label', 'subscribercount', 'degree_centrality', 'betweenness_centrality', 'closeness_centrality', 'eigenvector_centrality', 'pagerank', 'clustering_coefficient']]
df_metrics

Unnamed: 0,Id,Label,subscribercount,degree_centrality,betweenness_centrality,closeness_centrality,eigenvector_centrality,pagerank,clustering_coefficient
0,UCmdlnVFzmf7Zhqm_QE-UlJw,Alex Leonidas,362000,0.028090,0.066644,0.507123,0.087436,0.004337,0.160000
1,UCObA5o3mcc1felIMAv6cukw,Geoffrey Verity Schofield,133000,0.111236,0.149401,0.478238,0.141411,0.020393,0.029685
2,UCG-3rEW4IrDNa7-9iGByc2A,Natural Hypertrophy,113000,0.062921,0.076477,0.429329,0.081033,0.012195,0.035065
3,UCbYoTv5DbJmzKwxx5dmnu6g,Bald Omni-Man,63900,0.006742,0.001452,0.375527,0.026645,0.001149,0.600000
4,UCU6xkf4sbccuSUP4dkV_lSw,PG.Coaching,15100,0.144944,0.185526,0.438424,0.108303,0.032819,0.010174
...,...,...,...,...,...,...,...,...,...
693,UCyjTUNqX_vasRG4AAaM34jA,Animation Domination High Def,2170000,0.006742,0.000153,0.329630,0.032480,0.000995,0.466667
694,UCyqHPsR1eKEPSunlp6E-LSQ,Katie Moore,47200,0.003371,0.000003,0.329020,0.025068,0.000619,0.666667
695,UCzWQYUVCpZqtN93H8RR44Qw,Seeker,5070000,0.004494,0.000002,0.327567,0.027487,0.000724,0.833333
696,UCznHh6gk3ewFhaTZtdQFWWw,Greg and Mitch,787000,0.002247,0.000000,0.327929,0.023044,0.000458,1.000000
