In [6]:
from googleapiclient.discovery import build
import csv
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
import scipy
import networkx as nx
from itertools import combinations
import math
import numpy as np
from statsmodels.distributions.empirical_distribution import ECDF
import json

In [2]:
DEVELOPER_KEY = open("../esercizi_classe/api_key.txt").read()
YOUTUBE_API_SERVICE_NAME = "youtube"
YOUTUBE_API_VERSION = "v3"
youtube = build(YOUTUBE_API_SERVICE_NAME, YOUTUBE_API_VERSION, developerKey=DEVELOPER_KEY)

Canali Youtube su cui verte la ricerca

In [3]:
channels = ["Romeo Agresti", "Il BiancoNero", "Colpo Gobbo","Luca Toselli","lAngolodiKinoshi"]

Reperimento dei video pubblicati fra il 7 luglio 2024 al 24 marzo 2025

In [None]:
def getIDfromName(name):
    request = youtube.search().list(
        part="snippet",
        q=name,
        type="channel",
        maxResults=5
    )
    response = request.execute()
    return response['items'][0]['id']['channelId']

def getChannelPlaylist(channel_id):
    request = youtube.channels().list(
        part="snippet,contentDetails",
        id=channel_id
    )
    response = request.execute()
    return response['items'][0]['contentDetails']['relatedPlaylists']['uploads']

def get_videos_from_channel(playlist_id,channel_name):
    begin_date = datetime(2024,7,7)
    end_date = datetime(2025,3,24)
    video_ids_and_dates = []
    next_page_token = None
    while True:
        request = youtube.playlistItems().list(
            part="snippet",
            playlistId=playlist_id,
            maxResults=50,
            pageToken=next_page_token
        )
        response = request.execute()
        for item in response['items']:
            video_date = datetime.strptime(item['snippet']['publishedAt'], "%Y-%m-%dT%H:%M:%SZ")
            if begin_date <= video_date <= end_date:
                video_ids_and_dates.append((item['snippet']['resourceId']['videoId'], video_date))
        next_page_token = response.get('nextPageToken')
        if not next_page_token:
            break
    with open(f"video_ids_{channel_name}.csv", "w", newline='') as csvfile:
        writer = csv.writer(csvfile)
        for video_id in video_ids_and_dates:
            writer.writerow([video_id[0],video_id[1].strftime("%Y-%m-%d")])
    print(f"Video IDs for {channel_name} saved to video_ids_{channel_name}.csv")

def get_csv_files(channels):
    for channel in channels:
        channel_id = getIDfromName(channel)
        playlist_id = getChannelPlaylist(channel_id)
        get_videos_from_channel(playlist_id,channel)

In [None]:
get_csv_files(channels)

Reperimento dei commenti dai video selezionati

In [4]:
class Comment:
    def __init__(self, id, video_id, content, author,date, likes, reply_to_id=None):
        self.id = id
        self.video_id = video_id
        self.content = content
        self.author = author
        self.date = date
        self.likes = likes
        self.reply_to_id = reply_to_id

In [5]:
def get_comments_one_vid(video_id):
    comments = []
    request = youtube.commentThreads().list(
        part="snippet,replies",
        videoId=video_id,
        textFormat="plainText",
        maxResults=100
    )
    response = request.execute()
    comments.extend(get_comments_from_response(response["items"]))
    next_page_token = response.get("nextPageToken", None)
    while next_page_token:
        request = youtube.commentThreads().list(
            part="snippet,replies",
            videoId=video_id,
            textFormat="plainText",
            maxResults=100,
            pageToken=next_page_token
        )
        response = request.execute()
        comments.extend(get_comments_from_response(response["items"]))
        next_page_token = response.get("nextPageToken", None)
    return comments

def get_comments_from_response(items):
    comments = []
    for item in items:
        main_comment = item["snippet"]["topLevelComment"]
        comment_id = main_comment["id"]
        video_id = item["snippet"]["videoId"]
        author = main_comment["snippet"]["authorDisplayName"]
        content = main_comment["snippet"]["textDisplay"]
        date = datetime.strptime(main_comment["snippet"]["publishedAt"], "%Y-%m-%dT%H:%M:%SZ")
        likes = main_comment["snippet"]["likeCount"]
        comments.append(Comment(comment_id, video_id, content, author, date, likes))
        if "replies" in item:
            for reply in item["replies"]["comments"]:
                reply_id = reply["id"]
                reply_content = reply["snippet"]["textDisplay"]
                reply_author = reply["snippet"]["authorDisplayName"]
                reply_date = datetime.strptime(reply["snippet"]["publishedAt"], "%Y-%m-%dT%H:%M:%SZ")
                reply_likes = reply["snippet"]["likeCount"]
                comments.append(Comment(reply_id, video_id, reply_content, reply_author, reply_date, reply_likes, comment_id))
    return comments

def save_comments_csv(comments, channel_name):
    with open(f"comments_{channel_name}.csv", "w", newline='', encoding='utf-8') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(["Comment ID", "Video ID", "Content", "Author", "Date", "Likes", "Reply To ID"])
        for comment in comments:
            writer.writerow([comment.id, comment.video_id, comment.content, comment.author, comment.date.strftime("%Y-%m-%d"), comment.likes, comment.reply_to_id])
    print(f"Comments for {channel_name} saved to comments_{channel_name}.csv")

def get_comments_from_csv_file(channel):
    with open(f"video_ids_{channel}.csv", "r") as csvfile:
        reader = csv.reader(csvfile)
        video_ids = [row[0] for row in reader]
    all_comments = []
    for video_id in video_ids:
        comments = get_comments_one_vid(video_id)
        all_comments.extend(comments)
    save_comments_csv(all_comments, channel)

In [None]:
for channel in channels:
    get_comments_from_csv_file(channel)

In [8]:
matches = json.load(open("../progetto/matches.json"))

In [None]:

from datetime import datetime

def convert_date_format(date_string):
    """Convert date from DD.MM.YYYY to YYYY-MM-DD format"""
    # Parse the date in DD.MM.YYYY format
    date_obj = datetime.strptime(date_string, "%d.%m.%Y")
    # Return in YYYY-MM-DD format
    return date_obj.strftime("%Y-%m-%d")

for match in matches:
    match["data"] = convert_date_format(match["data"])

with open("../progetto/matches.json", "w", encoding='utf-8') as f:
    json.dump(matches, f, ensure_ascii=False, indent=2)

print("Date format converted successfully!")
print(f"Total matches processed: {len(matches)}")
print("\nFirst 5 matches with new date format:")
for i, match in enumerate(matches[:5]):
    print(f"{i+1}. {match['data']} - {match['partita']} ({match['risultato']})")