In [1]:
import networkx as nx

In [2]:
G = nx.read_graphml('new_dataset.graphml')

In [3]:
import networkx as nx

# Assuming 'your_graph' is your original YouTube social graph
num_nodes = G.number_of_nodes()
num_edges = G.number_of_edges()

# Calculate the probability for the Erdős-Rényi model
p = num_edges / (num_nodes * (num_nodes - 1))

# Create a random directed graph with the same number of nodes and the calculated probability
random_graph = nx.erdos_renyi_graph(num_nodes, p, directed=True)

# Now you can calculate the triadic closure for the random graph and compare


In [4]:
G1 = G.to_undirected()

transitivity = nx.transitivity(G1)
print("Transitivity of the Graph:", transitivity)


Transitivity of the Graph: 0.2507650932022628


In [5]:
random_graph1 = random_graph.to_undirected()
transitivity_random = nx.transitivity(random_graph1)
print("Transitivity of the Random Graph:", transitivity_random)

Transitivity of the Random Graph: 0.04018509498295178


In [6]:
triangles = nx.triangles(G1)
total_triangles = sum(triangles.values()) // 3
total_triangles

2704

In [7]:
triangles_random = nx.triangles(random_graph1)
total_triangles_random = sum(triangles_random.values()) // 3
total_triangles_random

165

In [8]:
bridges = list(nx.bridges(G1))

In [9]:
bridges

[('UCGdSS9hhh6zY24W-dwEnzDQ', 'UCkNBA4RIDjYUtuqMN9qxDPg'),
 ('UCMNe8ZwAbRuPnR0K4ler19w', 'UCrKS-46FKmKF2w1O6zOl3Yg'),
 ('UCexOYFSLShO9UYjWu9N-oGA', 'UCm9JTzawvFc_rKHP-hvoYxQ'),
 ('UCiBYJsSMYaCEMiJieYn3BGA', 'UC1cDpj_OXMMlpEmWpDbaGsg'),
 ('UC0hpVrTGDScpXR16lWbgg6w', 'UC8J0vy5x7scOm6itUxWH8gg'),
 ('UC1NKdUirpmn_cafZh75wmQw', 'UCMCGp4tDPxubob7x6pllk5Q'),
 ('UC1NKdUirpmn_cafZh75wmQw', 'UCmyF_0FfApPNSza05AYXyLg'),
 ('UC1NKdUirpmn_cafZh75wmQw', 'UCoa0xrRT0Jp6Z4G4P0sYLXQ'),
 ('UC8ftM8eQ2YKx0VxX9DwrzlQ', 'UCvfq3LXXfW72wx2cNt_ZeFg'),
 ('UCAT4v8QGnVXJVDugoMRCgiA', 'UC9aAeejaVOTL0nykywAXbUw'),
 ('UCE-4nyxCasi9HipGdtmFQoQ', 'UCXJ3R84_BWrfF6VQGBzV0dg'),
 ('UCMgX0CY6DB39o8ZpDiFo62g', 'UCC3JS1qW5CC-VMwsaGuVFwQ'),
 ('UCPItuYMjJJzffkogqNNg8HQ', 'UCWAb0-QD2WhYtst4j1OtfrQ'),
 ('UChJKW3cMYGtHK8VG28HN28A', 'UCXJ3R84_BWrfF6VQGBzV0dg'),
 ('UChnoWA76ja8PRdRdHzwDG7g', 'UCo4KYVNqfty1SXCQCKee5TQ'),
 ('UCkNBA4RIDjYUtuqMN9qxDPg', 'UC6jcKtWZPAgRDWwY99alTpQ'),
 ('UCkNBA4RIDjYUtuqMN9qxDPg', 'UCCYVVAFfmU62I6gIiDBjc4Q'

In [10]:
bridges = list(nx.bridges(G1))
critical_nodes_with_labels = []
node_frequency = {}  # Dictionary to keep track of node frequency

for u, v in bridges:
    label_u = G1.nodes[u].get('label', u)  # Get label of node u, default to u if label not present
    label_v = G1.nodes[v].get('label', v)  # Get label of node v, default to v if label not present
    critical_nodes_with_labels.append((label_u, label_v))

    # Update the frequency count for each node
    node_frequency[label_u] = node_frequency.get(label_u, 0) + 1
    node_frequency[label_v] = node_frequency.get(label_v, 0) + 1

# Now, critical_nodes_with_labels contains the labels of the critical nodes for each bridge
# and node_frequency contains the frequency of each node's appearance as a critical node

In [11]:
sorted_nodes_by_frequency = sorted(node_frequency.items(), key=lambda item: item[1], reverse=True)

print("Sorted Nodes by Frequency:", sorted_nodes_by_frequency)

Sorted Nodes by Frequency: [('Skiourakic', 9), ('SonOfZeus', 4), ('12os Pithikos', 3), ('Nicolas Jovani', 3), ('DAMIAN', 2), ('Mastor', 2), ('Annoula', 2), ('Daddy Playz', 2), ('GL Stars', 2), ('VENE', 2), ('LPDudes', 2), ('konilo shorts', 1), ('Ορεινή Μέλισσα', 1), ('Ορεινή Μέλισσα SHORTS', 1), ('marioTUBE', 1), ('mariotube', 1), ('Paras Kaltsas', 1), ('Chris Kogias', 1), ('Barcode Entertainment', 1), ('Tsede The Real', 1), ('Sugar Boy King', 1), ('MR Ampa', 1), ('Capital Music', 1), ('Light Official', 1), ('Konstantinos Pantelidis Official', 1), ('Cobalt Music', 1), ('TRANNOS', 1), ('Natasha Kay', 1), ('Elena Kremlidou', 1), ('FIVE Trading', 1), ('Gloomy Gentlemen', 1), ('Balkan Records', 1), ('Geo Hunter', 1), ('PigTard', 1), ('Gunner bossGR', 1), ('Lau Jr', 1), ('MINATION', 1), ('Tzitzi', 1), ('zmpa', 1), ('HardCoreGamerGR', 1), ('b-PsychoStories', 1), ('SkuLLy', 1), ('OffBeat Records', 1), ('Emily Official', 1), ('Εθνική Ομάδα Ποδοσφαίρου', 1), ('Mpoulakis Projects', 1), ('City Ce

In [12]:
critical_nodes_with_labels

[('konilo shorts', 'Skiourakic'),
 ('Ορεινή Μέλισσα', 'Ορεινή Μέλισσα SHORTS'),
 ('marioTUBE', 'mariotube'),
 ('Paras Kaltsas', 'Chris Kogias'),
 ('Barcode Entertainment', '12os Pithikos'),
 ('Nicolas Jovani', 'Tsede The Real'),
 ('Nicolas Jovani', 'Sugar Boy King'),
 ('Nicolas Jovani', 'MR Ampa'),
 ('Capital Music', 'Light Official'),
 ('Konstantinos Pantelidis Official', 'Cobalt Music'),
 ('TRANNOS', 'DAMIAN'),
 ('Natasha Kay', 'Elena Kremlidou'),
 ('FIVE Trading', 'Gloomy Gentlemen'),
 ('Balkan Records', 'DAMIAN'),
 ('Geo Hunter', 'PigTard'),
 ('Skiourakic', 'Gunner bossGR'),
 ('Skiourakic', 'Lau Jr'),
 ('Skiourakic', 'MINATION'),
 ('Skiourakic', 'Tzitzi'),
 ('Skiourakic', 'zmpa'),
 ('Skiourakic', 'HardCoreGamerGR'),
 ('Skiourakic', 'b-PsychoStories'),
 ('Skiourakic', 'SkuLLy'),
 ('OffBeat Records', 'Mastor'),
 ('Emily Official', 'Annoula'),
 ('Εθνική Ομάδα Ποδοσφαίρου', 'Mpoulakis Projects'),
 ('City Center', 'Lazarouk0s'),
 ('Annoula', 'Marseaux - Topic'),
 ('BilYolo', 'Το Κανάλι 

In [13]:
print(len(bridges))

50


In [14]:
random_graph_bridges = list(nx.bridges(random_graph1))
print(len(random_graph_bridges))

0


In [15]:
import networkx as nx

# Assuming 'G' is your pre-defined graph with 'label' attributes for each node
local_bridges = []
node_frequency = {}  # Dictionary to keep track of node frequency

for edge in G.edges():
    u, v = edge
    neighbors_u = set(G.neighbors(u))
    neighbors_v = set(G.neighbors(v))

    # Check if there are no common neighbors, indicating a local bridge
    if not neighbors_u & neighbors_v:
        label_u = G.nodes[u].get('label', u)
        label_v = G.nodes[v].get('label', v)
        local_bridges.append((label_u, label_v))

        # Update the frequency count for each node
        node_frequency[label_u] = node_frequency.get(label_u, 0) + 1
        node_frequency[label_v] = node_frequency.get(label_v, 0) + 1

# Sort the nodes by their frequency, highest to lowest
sorted_nodes_by_frequency = sorted(node_frequency.items(), key=lambda item: item[1], reverse=True)





In [16]:
print(len(local_bridges))

701


In [17]:
print("Local Bridges:", local_bridges)

Local Bridges: [('Ορεινή Μέλισσα', 'Ορεινή Μέλισσα SHORTS'), ('marioTUBE', 'mariotube'), ('Ορεινή Μέλισσα SHORTS', 'Ορεινή Μέλισσα'), ('Puck Games', 'Puck Redflix'), ('Nicolas Jovani', 'Alexandros Kopsialis'), ('Nicolas Jovani', 'BilYolo'), ('Nicolas Jovani', 'HeitLord'), ('Nicolas Jovani', 'FrediGtv'), ('Nicolas Jovani', 'Puck Redflix'), ('Nicolas Jovani', 'Kakos Xamos'), ('Nicolas Jovani', 'Mikeius Official'), ('Nicolas Jovani', 'Konilo'), ('Nicolas Jovani', 'SENIORA'), ('Nicolas Jovani', 'Tzivag'), ('Nicolas Jovani', 'AmiYiamiTube'), ('Nicolas Jovani', 'Giorgos Kavvalos'), ('Nicolas Jovani', 'Booyah TV'), ('Nicolas Jovani', 'Tsede The Real'), ('Nicolas Jovani', 'Skrap'), ('Nicolas Jovani', 'SNIK OFFICIAL'), ('Nicolas Jovani', 'John Li'), ('Nicolas Jovani', 'Internet4u'), ('Nicolas Jovani', 'Vibrator Productions'), ('Nicolas Jovani', 'WeirdWhisper'), ('Nicolas Jovani', 'Dimitris DK  Kyrsanidis'), ('Nicolas Jovani', 'Panagiotis Milas'), ('Nicolas Jovani', 'marioTUBE'), ('Nicolas Jovan

In [18]:
print("Sorted Nodes by Frequency:", sorted_nodes_by_frequency)

Sorted Nodes by Frequency: [('Skiourakic', 43), ('W1ndz', 38), ('Alekkun', 38), ('Nicolas Jovani', 34), ('SonOfZeus', 34), ('LPDudes', 29), ('Unboxholics', 28), ('Karpouzis', 27), ('Kristy', 23), ('Hacky', 22), ('Rocket Dust', 22), ('Annoula', 22), ('Konilo', 21), ('Booyah TV', 21), ('Natasha Kay', 20), ('Mastor', 20), ('RebellGamesGR', 20), ('PanosDent', 19), ('DeadlyMuffin', 19), ('Daddy Playz', 19), ('Kakos Xamos', 18), ('ItsOnlySkillz', 17), ('LegitGamingGR', 17), ('Mikeius Official', 15), ('Chris Kogias', 15), ('Axel', 15), ('TechItSerious', 14), ('TukuGr', 14), ('R1ou TV', 14), ('GRamers', 14), ('Animentary', 14), ('Manos', 13), ('Skrap', 12), ('Vibrator Productions', 12), ('NerZhul', 12), ('2J', 12), ('ComedyLab GR', 11), ('ΜΠΑΜ FC', 11), ('KafroGamer', 11), ('Unboxing Specialist', 11), ('D0nalduck7', 11), ('Alexandros Kopsialis', 10), ('Megacyber', 10), ('VENE', 10), ('Baze', 10), ('Giorgos Kavvalos', 9), ('Gunzar', 9), ('MateoProd', 9), ('Puck Redflix', 8), ('BilYolo', 8), ('S

In [19]:
import networkx as nx

# Assuming 'G' is your pre-defined graph with 'label' attributes for each node
local_bridges = []
node_frequency = {}  # Dictionary to keep track of node frequency

for edge in random_graph.edges():
    u, v = edge
    neighbors_u = set(random_graph.neighbors(u))
    neighbors_v = set(random_graph.neighbors(v))

    # Check if there are no common neighbors, indicating a local bridge
    if not neighbors_u & neighbors_v:
        label_u = random_graph.nodes[u].get('label', u)
        label_v = random_graph.nodes[v].get('label', v)
        local_bridges.append((label_u, label_v))

        # Update the frequency count for each node
        node_frequency[label_u] = node_frequency.get(label_u, 0) + 1
        node_frequency[label_v] = node_frequency.get(label_v, 0) + 1

# Sort the nodes by their frequency, highest to lowest
sorted_nodes_by_frequency = sorted(node_frequency.items(), key=lambda item: item[1], reverse=True)





In [20]:
print(len(local_bridges))

1130


In [21]:
print("Local Bridges:", local_bridges)

Local Bridges: [(0, 71), (0, 166), (0, 172), (0, 232), (1, 11), (1, 181), (1, 232), (2, 83), (2, 135), (2, 235), (3, 8), (3, 90), (3, 183), (4, 9), (4, 37), (4, 120), (4, 158), (5, 7), (5, 14), (5, 15), (5, 17), (5, 113), (5, 144), (6, 116), (6, 149), (6, 151), (6, 191), (6, 227), (7, 12), (7, 44), (7, 72), (7, 77), (7, 85), (7, 98), (7, 106), (7, 110), (7, 121), (7, 180), (7, 185), (8, 1), (8, 125), (8, 210), (8, 231), (9, 104), (10, 11), (10, 23), (10, 77), (10, 126), (10, 227), (11, 25), (11, 45), (11, 74), (11, 96), (11, 116), (11, 242), (12, 86), (12, 119), (12, 149), (13, 4), (14, 20), (14, 92), (14, 107), (14, 160), (15, 9), (15, 83), (15, 239), (16, 173), (16, 223), (17, 11), (17, 154), (17, 164), (17, 208), (17, 229), (17, 231), (17, 237), (18, 133), (18, 239), (19, 74), (19, 82), (19, 162), (19, 196), (19, 221), (19, 242), (20, 106), (20, 124), (20, 138), (20, 176), (20, 193), (20, 218), (21, 9), (21, 144), (21, 243), (22, 39), (22, 84), (22, 225), (22, 240), (23, 107), (23, 

In [9]:
print("Sorted Nodes by Frequency:", sorted_nodes_by_frequency)

NameError: name 'sorted_nodes_by_frequency' is not defined

In [23]:
same_attribute_count = 0
for u, v in G.edges():
    if G.nodes[u]['rap_content'] == G.nodes[v]['rap_content']:
        same_attribute_count += 1
homophily_index = same_attribute_count / G.number_of_edges()


In [24]:
homophily_index

0.4059561128526646

In [25]:
import numpy as np

categories = ['rapper', 'entertainment', 'vlog', 'tv', 'training', 'tech reviews', 'sinomosiologos', 'podcast', 'music label', 'learning', 'motorbike', 'Greek laiko', 'gaming', 'football', 'cars', 'cooking']
for node in random_graph.nodes():
    random_graph.nodes[node]['category'] = np.random.choice(categories)


In [26]:
same_category_count = 0
for u, v in random_graph.edges():
    if random_graph.nodes[u]['category'] == random_graph.nodes[v]['category']:
        same_category_count += 1
homophily_index_random_graph = same_category_count / random_graph.number_of_edges()
homophily_index_random_graph

0.0776