In [4]:
from googleapiclient.discovery import build
import csv
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
import networkx as nx
from itertools import combinations
import math

In [None]:
DEVELOPER_KEY = open("../esercizi_classe/api_key.txt").read()
YOUTUBE_API_SERVICE_NAME = "youtube"
YOUTUBE_API_VERSION = "v3"
youtube = build(YOUTUBE_API_SERVICE_NAME, YOUTUBE_API_VERSION, developerKey=DEVELOPER_KEY)

Ricerca e salvataggio di tutti i video per canale nel periodo temporale

In [2]:
channels = ["Romeo Agresti", "Il BiancoNero", "Colpo Gobbo","Luca Toselli","lAngolodiKinoshi"]

In [None]:
def getIDfromName(name):
    request = youtube.search().list(
        part="snippet",
        q=name,
        type="channel",
        maxResults=5
    )
    response = request.execute()
    return response['items'][0]['id']['channelId']

def getChannelPlaylist(channel_id):
    request = youtube.channels().list(
        part="snippet,contentDetails",
        id=channel_id
    )
    response = request.execute()
    return response['items'][0]['contentDetails']['relatedPlaylists']['uploads']

def get_videos_from_channel(playlist_id,channel_name):
    begin_date = datetime(2024,7,7)
    end_date = datetime(2025,3,24)
    video_ids_and_dates = []
    next_page_token = None
    while True:
        request = youtube.playlistItems().list(
            part="snippet",
            playlistId=playlist_id,
            maxResults=50,
            pageToken=next_page_token
        )
        response = request.execute()
        for item in response['items']:
            video_date = datetime.strptime(item['snippet']['publishedAt'], "%Y-%m-%dT%H:%M:%SZ")
            if begin_date <= video_date <= end_date:
                video_ids_and_dates.append((item['snippet']['resourceId']['videoId'], video_date))
        next_page_token = response.get('nextPageToken')
        if not next_page_token:
            break
    with open(f"video_ids_{channel_name}.csv", "w", newline='') as csvfile:
        writer = csv.writer(csvfile)
        for video_id in video_ids_and_dates:
            writer.writerow([video_id[0],video_id[1].strftime("%Y-%m-%d")])
    print(f"Video IDs for {channel_name} saved to video_ids_{channel_name}.csv")

def get_csv_files(channels):
    for channel in channels:
        channel_id = getIDfromName(channel)
        playlist_id = getChannelPlaylist(channel_id)
        get_videos_from_channel(playlist_id,channel)
    

In [None]:
get_csv_files(channels)

<p>Reperimento dei commenti:
<p>-per ogni commento salvo id, video commentato, autore, contenuto, data, likes, id commento a cui risponde (se c'è)

<img src="../esercizi_classe/Grafo_Canali.png" width="500">

In [5]:
class Comment:
    def __init__(self, id, video_id, content, author,date, likes, reply_to_id=None):
        self.id = id
        self.video_id = video_id
        self.content = content
        self.author = author
        self.date = date
        self.likes = likes
        self.reply_to_id = reply_to_id

In [None]:
def get_comments_one_vid(video_id):
    comments = []
    request = youtube.commentThreads().list(
        part="snippet,replies",
        videoId=video_id,
        textFormat="plainText",
        maxResults=100
    )
    response = request.execute()
    comments.extend(get_comments_from_response(response["items"]))
    next_page_token = response.get("nextPageToken", None)
    while next_page_token:
        request = youtube.commentThreads().list(
            part="snippet,replies",
            videoId=video_id,
            textFormat="plainText",
            maxResults=100,
            pageToken=next_page_token
        )
        response = request.execute()
        comments.extend(get_comments_from_response(response["items"]))
        next_page_token = response.get("nextPageToken", None)
    return comments

def get_comments_from_response(items):
    comments = []
    for item in items:
        main_comment = item["snippet"]["topLevelComment"]
        comment_id = main_comment["id"]
        video_id = item["snippet"]["videoId"]
        author = main_comment["snippet"]["authorDisplayName"]
        content = main_comment["snippet"]["textDisplay"]
        date = datetime.strptime(main_comment["snippet"]["publishedAt"], "%Y-%m-%dT%H:%M:%SZ")
        likes = main_comment["snippet"]["likeCount"]
        comments.append(Comment(comment_id, video_id, content, author, date, likes))
        if "replies" in item:
            for reply in item["replies"]["comments"]:
                reply_id = reply["id"]
                reply_content = reply["snippet"]["textDisplay"]
                reply_author = reply["snippet"]["authorDisplayName"]
                reply_date = datetime.strptime(reply["snippet"]["publishedAt"], "%Y-%m-%dT%H:%M:%SZ")
                reply_likes = reply["snippet"]["likeCount"]
                comments.append(Comment(reply_id, video_id, reply_content, reply_author, reply_date, reply_likes, comment_id))
    return comments

def save_comments_csv(comments, channel_name):
    with open(f"comments_{channel_name}.csv", "w", newline='', encoding='utf-8') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(["Comment ID", "Video ID", "Content", "Author", "Date", "Likes", "Reply To ID"])
        for comment in comments:
            writer.writerow([comment.id, comment.video_id, comment.content, comment.author, comment.date.strftime("%Y-%m-%d"), comment.likes, comment.reply_to_id])
    print(f"Comments for {channel_name} saved to comments_{channel_name}.csv")

def get_comments_from_csv_file(channel):
    with open(f"video_ids_{channel}.csv", "r") as csvfile:
        reader = csv.reader(csvfile)
        video_ids = [row[0] for row in reader]
    all_comments = []
    for video_id in video_ids:
        comments = get_comments_one_vid(video_id)
        all_comments.extend(comments)
    save_comments_csv(all_comments, channel)

In [None]:
for channel in channels:
    get_comments_from_csv_file(channel)

In [6]:
# Strutture ausiliarie:
# video_id -> canale
# video_id -> commentatori
# canale -> video_id
# canale -> commentatori
from collections import defaultdict

def video_id_to_channel(channels):
    video_id_to_channel = defaultdict(str)
    for channel in channels:
        with open(f"video_ids_{channel}.csv", "r") as csvfile:
            reader = csv.reader(csvfile)
            for row in reader:
                video_id_to_channel[row[0]] = channel
    return video_id_to_channel

def channel_to_video_id(channels):
    channel_to_video_id = defaultdict(str)
    for channel in channels:
        with open(f"video_ids_{channel}.csv", "r") as csvfile:
            reader = csv.reader(csvfile)
            for row in reader:
                if channel not in channel_to_video_id:
                    channel_to_video_id[channel] = []
                channel_to_video_id[channel].append(row[0])
    return channel_to_video_id

def video_id_to_commenters(channels):
    video_id_to_commenters = defaultdict(str)
    for channel in channels:
        with open(f"comments_{channel}.csv", "r", encoding='utf-8') as csvfile:
            reader = csv.DictReader(csvfile)
            for row in reader:
                video_id = row["Video ID"]
                author = row["Author"]
                if video_id not in video_id_to_commenters:
                    video_id_to_commenters[video_id] = set()
                video_id_to_commenters[video_id].add(author)
    return video_id_to_commenters

def channel_to_commenters(channels):
    channel_to_commenters = defaultdict(str)
    for channel in channels:
        with open(f"comments_{channel}.csv", "r", encoding='utf-8') as csvfile:
            reader = csv.DictReader(csvfile)
            for row in reader:
                if channel not in channel_to_commenters:
                    channel_to_commenters[channel] = set()
                channel_to_commenters[channel].add(row["Author"])
    return channel_to_commenters

def user_to_video_id(channels):
    user_to_video_id = defaultdict(str)
    for channel in channels:
        with open(f"comments_{channel}.csv", "r", encoding='utf-8') as csvfile:
            reader = csv.DictReader(csvfile)
            for row in reader:
                author = row["Author"]
                video_id = row["Video ID"]
                if author not in user_to_video_id:
                    user_to_video_id[author] = set()
                user_to_video_id[author].add(video_id)
    return user_to_video_id

def channel_to_comments(channels):
    channel_to_comments = defaultdict(str)
    for channel in channels:
        with open(f"comments_{channel}.csv", "r", encoding='utf-8') as csvfile:
            reader = csv.DictReader(csvfile)
            for row in reader:
                Comment(row["Comment ID"], row["Video ID"], row["Content"], row["Author"], row["Date"], row["Likes"], row["Reply To ID"])
                if channel not in channel_to_comments:
                    channel_to_comments[channel] = []
                channel_to_comments[channel].append(Comment(row["Comment ID"], row["Video ID"], row["Content"], row["Author"], row["Date"], row["Likes"], row["Reply To ID"]))
    return channel_to_comments

def video_to_date(channels):
    video_to_date = defaultdict(str)
    for channel in channels:
        with open(f"video_ids_{channel}.csv", "r") as csvfile:
            reader = csv.reader(csvfile)
            for row in reader:
                video_to_date[row[0]] = row[1]
    return video_to_date

def commenter_to_channels(channels):
    commenter_to_channels = defaultdict(str)
    for channel in channels:
        with open(f"comments_{channel}.csv", "r", encoding='utf-8') as csvfile:
            reader = csv.DictReader(csvfile)
            for row in reader:
                author = row["Author"]
                if author not in commenter_to_channels:
                    commenter_to_channels[author] = set()
                commenter_to_channels[author].add(channel)
    return commenter_to_channels


Channel_of = video_id_to_channel(channels)
Videos_of = channel_to_video_id(channels)
Commenters_of_video = video_id_to_commenters(channels)
Commenters_of_channel = channel_to_commenters(channels)
Videos_commented_by = user_to_video_id(channels)
Channels_commented_by = commenter_to_channels(channels)
Comments_of_channel = channel_to_comments(channels)
Date_of_video = video_to_date(channels)

In [None]:
import json
print(json.dumps(Videos_of["Romeo Agresti"], indent=4))
print(Commenters_of_video["FaILsGTzUA8"])

[
    "spe1z4ebmsw",
    "rznXkLkzLC4",
    "HfymSU0FvpU",
    "63w9vrAHB7g",
    "hbP1fSUfYhg",
    "FaILsGTzUA8",
    "I_OiC7MLoN8",
    "vuWnmnM4xrQ",
    "gVhADjwA5Kc",
    "5KXECIcHj4Q",
    "hLLa2lHo-6w",
    "PGdx1nufqEI",
    "ZioIZk31T44",
    "ZKXsnf3aGss",
    "C-LPz1x3uAs",
    "LiH7VzYhKRo",
    "pCV-lN81aso",
    "1W1dJC3L1vE",
    "0SKFfmXRJg0",
    "M4i9in42_4c",
    "O663FTx55tc",
    "6mklcTlb3-k",
    "MQFh0pLr3_Q",
    "Pas4cM-Tkho",
    "vFwxZq7Y6PI",
    "flIPpjyorSg",
    "9-dqlIMbzag",
    "slmDNCXFXD8",
    "1ZBQDbFLN2Y",
    "xlDiHibbsJs",
    "nH8MUJUxOOs",
    "64vwWb--K5c",
    "wQtPUj3V8ps",
    "k90nBfS1BOE",
    "a3cUYnuYAoc",
    "51eStzva8j0",
    "wQz-ZAnyRY0",
    "1RgHwkBjfPA",
    "BVlH1XnZd90",
    "c7TmV-eisfY",
    "AMEhy9rFgVs",
    "bCdvJV0inTE",
    "Qem-fh_OYjc",
    "gOtIWSecwnU",
    "lw5sKehgVLY",
    "e6bEgm0R6h4",
    "3kdCtGZG9iE",
    "dkDamWeEMJM",
    "lB5iQH9mTAI",
    "96Q_r1-oSZU",
    "6g_892ZRdgU",
    "Res4jCKOmdo",
    "pah8b

TypeError: Object of type set is not JSON serializable

In [None]:
Comments_of_channel["Romeo Agresti"][0].likes

In [None]:
class NodeCentralities:
    def __init__(self, node, degree, eigenvector_centrality, betweenness_centrality,degree_centrality):
        self.node = node
        self.degree = degree
        self.eigenvector_centrality = eigenvector_centrality
        self.betweenness_centrality = betweenness_centrality
        self.degree_centrality = degree_centrality

In [9]:
total_videos = []
for channel in channels:
    i = 0
    for video_id in Videos_of[channel]:
        if i == 50:
            break
        total_videos.append(video_id)
        i += 1        

50 video per canale. Cocommentatori Comuni

In [None]:
G = nx.Graph()

for video1, video2 in combinations(total_videos, 2):
    commenters1 = set(Commenters_of_video[video1])
    commenters2 = set(Commenters_of_video[video2])
    common_commenters = commenters1.intersection(commenters2)
    if len(common_commenters) > 1:
        G.add_edge(video1, video2, weight=len(common_commenters))
for n in G.nodes:
    G.nodes[n]['channel'] = Channel_of[n]
    G.nodes[n]['date'] = Date_of_video[n]

len(G.nodes), len(G.edges)
nx.write_gexf(G,"cocommentatori_50_video.gexf")

In [None]:
media_pesi = 0
media_pesi_stesso_canale = 0
counter = 0

for edge in G.edges(data=True):
    sorgente = edge[0]
    destinazione = edge[1]
    if G.nodes[sorgente]['channel'] == G.nodes[destinazione]['channel']:
        media_pesi_stesso_canale += edge[2]['weight']
        counter += 1
        continue
    media_pesi += edge[2]['weight']

media_pesi_stesso_canale /= counter
media_pesi /= len(G.edges)
print("Media pesi:", media_pesi)
print("Media pesi stesso canale:", media_pesi_stesso_canale)

In [None]:
# weight distribution
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import pandas as pd

def plot_weight_distribution(G):
    weights = [data['weight'] for _, _, data in G.edges(data=True)]
    plt.figure(figsize=(10, 6))
    plt.plot(weights, 'o', markersize=2, alpha=0.5)
    plt.title('Weight Distribution of Edges')
    plt.xlabel('Weight')
    plt.ylabel('Frequency')
    # log scale
    plt.xscale('log')
    plt.yscale('log')
    plt.show()
plot_weight_distribution(G)

In [None]:
print(len(list(nx.local_bridges(G)))>0)

In [None]:
import seaborn as sns
import scipy
degree=dict(nx.degree(G))
degree_centrality = {n: round(c/(G.order()-1),4) for n,c in degree.items()}
closeness_centrality = nx.closeness_centrality(G)
betweenness_centrality=nx.betweenness_centrality(G)
eigenvector_centrality = nx.eigenvector_centrality(G,200)
all_centr = [degree_centrality, closeness_centrality, betweenness_centrality, eigenvector_centrality]
nodes = list(G.nodes())

corrs = [
    [
        scipy.stats.pearsonr(
            [c1[n] for n in nodes],
            [c2[n] for n in nodes]
        )[0]
        for c1 in all_centr
    ]
    for c2 in all_centr
]

centralities = ["degree", "closeness", "betweenness", "eigenvector"]
len(betweenness_centrality)

In [10]:
total_videos = set(total_videos)

In [None]:
# degree distribution
def plot_degree_distribution(G):
    degrees = [degree for node, degree in G.degree()]
    unique_degrees = sorted(set(degrees))
    plt.figure(figsize=(10, 6))
    plt.hist(degrees, bins=len(unique_degrees), color='blue', alpha=0.7, edgecolor='black')
    plt.title('Degree Distribution')
    plt.xlabel('Degree')
    plt.ylabel('Frequency')
    plt.show()


In [None]:
Videos_commented_by["@RomeoAgresti"]

In [None]:
plt.figure(figsize = (10,5))
sns.heatmap(pd.DataFrame(corrs),annot = True,cmap = 'coolwarm')
plt.xticks([x+0.5 for x in range(4)],centralities, rotation = 0)
plt.yticks([x+0.5 for x in range(4)],centralities, rotation = 0)
plt.title('Pearson correlation\n', weight = 'bold')
plt.show()

In [13]:
G_utenti = nx.Graph()
utenti = Videos_commented_by.keys()
for user1, user2 in combinations(utenti, 2):
    videos1 = Videos_commented_by[user1]
    videos2 = Videos_commented_by[user2]
    common_videos = videos1.intersection(videos2)
    common_videos = common_videos.intersection(total_videos)
    if len(common_videos) > 0:
        G_utenti.add_edge(user1, user2, weight=len(common_videos))
for n in G_utenti.nodes:
    videos = Videos_commented_by[n]
    if len(videos) == 1:
        G_utenti.nodes[n]['video'] = videos.pop()
       
nx.write_gexf(G_utenti,"co-commentatori_utenti_piu_grande_con_info.gexf")

<img src="../esercizi_classe/Grafo_Canali.png" width="500">

In [6]:
G_utenti = nx.read_gexf("co-commentatori_utenti_piu_grande.gexf")

In [None]:
# find modularity classes
from networkx.algorithms import community
def get_modularity_classes(G):
    partition = community.greedy_modularity_communities(G)
    return partition
classi = get_modularity_classes(G_utenti)
len(classi)

In [None]:
average_degree = sum(dict(G_utenti.degree()).values()) / len(G_utenti.nodes())
print("Average degree of the user graph:", average_degree)

In [None]:
eigenvector_centrality = nx.eigenvector_centrality(G_utenti,200)
sorted_eigenvector = sorted(eigenvector_centrality.items(), key=lambda x: x[1], reverse=True)
print("Top 10 users by eigenvector centrality:")
for user, centrality in sorted_eigenvector[:10]:
    print(f"User: {user}, Eigenvector Centrality: {centrality:.4f}")

In [None]:
len(G_utenti.nodes), len(G_utenti.edges) 
# (37916, 23021975) len > 0
# (21805, 5129502) len > 1
# (5541, 544245) len > 1 e intersezione con 50 video per canale
# (11873, 4373890) len > 0 e intersezione con 50 video per canale

In [None]:
# graph with the same degree distribution
def generate_graph_with_same_degree_distribution(G, num_nodes):
    # Create a random graph with the same degree distribution
    degree_sequence = [d for n, d in G.degree()]
    G_random = nx.configuration_model(degree_sequence)
    G_random = nx.Graph(G_random)  # Convert to simple graph
    return G_random
generated_graph = generate_graph_with_same_degree_distribution(G_utenti, len(G_utenti.nodes()))
eigenvector_centrality_2 = nx.eigenvector_centrality(generated_graph,200)
sorted_eigenvector_2 = sorted(eigenvector_centrality_2.items(), key=lambda x: x[1], reverse=True)
print("Top 10 users by eigenvector centrality in generated graph:")


In [None]:
plot_degree_distribution(G_utenti)
plot_degree_distribution(generated_graph)

In [None]:
Motta_comments = []
for channel in channels:
    with open(f"comments_{channel}.csv", "r", encoding='utf-8') as csvfile:
        reader = csv.DictReader(csvfile)
        for row in reader:
            if "motta" in row["Content"].lower() or "tiago" in row["Content"].lower() or "thiago" in row["Content"].lower() :
                Motta_comments.append(row)
            if ("motta" in row["Content"].lower() or "tiago" in row["Content"].lower() or "thiago" in row["Content"].lower() )and "interista" in row["Content"].lower() :
                print(row["Content"])

In [None]:
Motta_comments