In [2]:
from googleapiclient.discovery import build
import csv
from datetime import datetime, timedelta
import networkx as nx
from itertools import combinations
import math

In [3]:
DEVELOPER_KEY = open("../esercizi_classe/api_key.txt").read()
YOUTUBE_API_SERVICE_NAME = "youtube"
YOUTUBE_API_VERSION = "v3"
youtube = build(YOUTUBE_API_SERVICE_NAME, YOUTUBE_API_VERSION, developerKey=DEVELOPER_KEY)

Ricerca e salvataggio di tutti i video per canale nel periodo temporale

In [4]:
channels = ["Romeo Agresti", "Il BiancoNero", "Colpo Gobbo","Luca Toselli","lAngolodiKinoshi"]

In [None]:
def getIDfromName(name):
    request = youtube.search().list(
        part="snippet",
        q=name,
        type="channel",
        maxResults=5
    )
    response = request.execute()
    return response['items'][0]['id']['channelId']

def getChannelPlaylist(channel_id):
    request = youtube.channels().list(
        part="snippet,contentDetails",
        id=channel_id
    )
    response = request.execute()
    return response['items'][0]['contentDetails']['relatedPlaylists']['uploads']

def get_videos_from_channel(playlist_id,channel_name):
    begin_date = datetime(2024,7,7)
    end_date = datetime(2025,3,24)
    video_ids_and_dates = []
    next_page_token = None
    while True:
        request = youtube.playlistItems().list(
            part="snippet",
            playlistId=playlist_id,
            maxResults=50,
            pageToken=next_page_token
        )
        response = request.execute()
        for item in response['items']:
            video_date = datetime.strptime(item['snippet']['publishedAt'], "%Y-%m-%dT%H:%M:%SZ")
            if begin_date <= video_date <= end_date:
                video_ids_and_dates.append((item['snippet']['resourceId']['videoId'], video_date))
        next_page_token = response.get('nextPageToken')
        if not next_page_token:
            break
    with open(f"video_ids_{channel_name}.csv", "w", newline='') as csvfile:
        writer = csv.writer(csvfile)
        for video_id in video_ids_and_dates:
            writer.writerow([video_id[0],video_id[1].strftime("%Y-%m-%d")])
    print(f"Video IDs for {channel_name} saved to video_ids_{channel_name}.csv")

def get_csv_files(channels):
    for channel in channels:
        channel_id = getIDfromName(channel)
        playlist_id = getChannelPlaylist(channel_id)
        get_videos_from_channel(playlist_id,channel)
    

In [None]:
get_csv_files(channels)

<p>Reperimento dei commenti:
<p>-per ogni commento salvo id, video commentato, autore, contenuto, data, likes, id commento a cui risponde (se c'è)

In [None]:
class Comment:
    def __init__(self, id, video_id, content, author,date, likes, reply_to_id=None):
        self.id = id
        self.video_id = video_id
        self.content = content
        self.author = author
        self.date = date
        self.likes = likes
        self.reply_to_id = reply_to_id

In [None]:
def get_comments_one_vid(video_id):
    comments = []
    request = youtube.commentThreads().list(
        part="snippet,replies",
        videoId=video_id,
        textFormat="plainText",
        maxResults=100
    )
    response = request.execute()
    comments.extend(get_comments_from_response(response["items"]))
    next_page_token = response.get("nextPageToken", None)
    while next_page_token:
        request = youtube.commentThreads().list(
            part="snippet,replies",
            videoId=video_id,
            textFormat="plainText",
            maxResults=100,
            pageToken=next_page_token
        )
        response = request.execute()
        comments.extend(get_comments_from_response(response["items"]))
        next_page_token = response.get("nextPageToken", None)
    return comments

def get_comments_from_response(items):
    comments = []
    for item in items:
        main_comment = item["snippet"]["topLevelComment"]
        comment_id = main_comment["id"]
        video_id = item["snippet"]["videoId"]
        author = main_comment["snippet"]["authorDisplayName"]
        content = main_comment["snippet"]["textDisplay"]
        date = datetime.strptime(main_comment["snippet"]["publishedAt"], "%Y-%m-%dT%H:%M:%SZ")
        likes = main_comment["snippet"]["likeCount"]
        comments.append(Comment(comment_id, video_id, content, author, date, likes))
        if "replies" in item:
            for reply in item["replies"]["comments"]:
                reply_id = reply["id"]
                reply_content = reply["snippet"]["textDisplay"]
                reply_author = reply["snippet"]["authorDisplayName"]
                reply_date = datetime.strptime(reply["snippet"]["publishedAt"], "%Y-%m-%dT%H:%M:%SZ")
                reply_likes = reply["snippet"]["likeCount"]
                comments.append(Comment(reply_id, video_id, reply_content, reply_author, reply_date, reply_likes, comment_id))
    return comments

def save_comments_csv(comments, channel_name):
    with open(f"comments_{channel_name}.csv", "w", newline='', encoding='utf-8') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(["Comment ID", "Video ID", "Content", "Author", "Date", "Likes", "Reply To ID"])
        for comment in comments:
            writer.writerow([comment.id, comment.video_id, comment.content, comment.author, comment.date.strftime("%Y-%m-%d"), comment.likes, comment.reply_to_id])
    print(f"Comments for {channel_name} saved to comments_{channel_name}.csv")

def get_comments_from_csv_file(channel):
    with open(f"video_ids_{channel}.csv", "r") as csvfile:
        reader = csv.reader(csvfile)
        video_ids = [row[0] for row in reader]
    all_comments = []
    for video_id in video_ids:
        comments = get_comments_one_vid(video_id)
        all_comments.extend(comments)
    save_comments_csv(all_comments, channel)

In [None]:
for channel in channels:
    get_comments_from_csv_file(channel)

In [None]:
def create_user_set(channel_name):
    user_set = set()
    with open(f"comments_{channel_name}.csv", "r", encoding='utf-8') as csvfile:
        reader = csv.reader(csvfile)
        next(reader)  # Skip header
        for row in reader:
            user_set.add(row[3])  # Assuming the author is in the 4th column
    with open(f"user_set_{channel_name}.csv", "w",newline="", encoding='utf-8') as f:
        writer = csv.writer(f)
        for user in user_set:
            writer.writerow([user])

In [None]:
for channel in channels:
    create_user_set(channel)

In [None]:
def get_videos_commented_by_user(channel):
    user_and_videos = {}
    with open(f"comments_{channel}.csv", "r", encoding='utf-8') as csvfile:
        reader = csv.DictReader(csvfile)
        next(reader)
        for row in reader:
            user = row["Author"]
            video_id = row["Video ID"]
            if user not in user_and_videos:
                user_and_videos[user] = set()
            if video_id not in user_and_videos[user]:
                user_and_videos[user].add(video_id)
    with open(f"user_and_videos_{channel}.csv", "w", newline='', encoding='utf-8') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(["User", "Video ID"])
        for user, videos in user_and_videos.items():
            for video in videos:
                writer.writerow([user, video])
    print(f"User and videos for {channel} saved to user_and_videos_{channel}.csv")        

In [None]:
for channel in channels:
    get_videos_commented_by_user(channel)

In [None]:
def read_user_and_videos_csv(channel):
    user_and_videos = {}
    with open(f"user_and_videos_{channel}.csv", "r", encoding='utf-8') as csvfile:
        reader = csv.DictReader(csvfile)
        for row in reader:
            user = row["User"]
            video_id = row["Video ID"]
            if user not in user_and_videos:
                user_and_videos[user] = set()
            user_and_videos[user].add(video_id)
    return user_and_videos

In [None]:
G2 = nx.Graph()
for channel in channels:
    user_and_videos = read_user_and_videos_csv(channel)
    for user1, user2 in combinations(user_and_videos.keys(), 2):
        common_videos = user_and_videos[user1].intersection(user_and_videos[user2])
        if len(common_videos) > 1:
            G2.add_edge(user1, user2, weight=len(common_videos))
nx.write_gexf(G2, "user_common_videos_graph_2.gexf")

In [None]:
grafo = nx.read_gexf("user_common_videos_graph_2.gexf")
eigenvector_centrality = nx.eigenvector_centrality(grafo,200)
node_e_centr = list(eigenvector_centrality.items())
node_e_centr.sort(key=lambda x: x[1], reverse=True)

In [None]:
node_e_centr[:math.floor(0.1*len(node_e_centr))]

In [None]:
node_and_betweenness = nx.betweenness_centrality(grafo,200)
node_bet_centr = list(node_and_betweenness.items())
node_bet_centr.sort(key=lambda x: x[1], reverse=True)

In [None]:
class NodeCentralities:
    def __init__(self, node, degree, eigenvector_centrality, betweenness_centrality,degree_centrality):
        self.node = node
        self.degree = degree
        self.eigenvector_centrality = eigenvector_centrality
        self.betweenness_centrality = betweenness_centrality
        self.degree_centrality = degree_centrality

In [None]:
node_and_degree = nx.degree_centrality(grafo)
node_degree_centr = list(node_and_degree.items())
node_degree_centr.sort(key=lambda x: x[1], reverse=True)

In [None]:
grafo.degree(grafo.nodes(2))

In [None]:
def create_node_centralities_list(node_and_degree, node_e_centr, node_bet_centr):

In [None]:
# Get all videos and number of comments
def get_all_videos_and_comments():
    all_videos = {}
    for channel in channels:
        if channel != "Romeo Agresti":
            continue
        with open(f"comments_{channel}.csv", "r", encoding='utf-8') as csvfile:
            reader = csv.DictReader(csvfile)
            next(reader)
            for row in reader:
                video_id = row["Video ID"]
                if video_id not in all_videos:
                    all_videos[video_id] = 0
                all_videos[video_id] += 1
    return all_videos
all_videos = get_all_videos_and_comments()

In [None]:
def get_videos_per_week():
    videos_per_week = {}
    videos_and_comments = {}
    for channel in channels:
        if channel != "Romeo Agresti":
            continue
        with open(f"video_ids_{channel}.csv", "r") as csvfile:
            reader = csv.reader(csvfile)
            for row in reader:
                video_id = row[0]
                date_str = datetime.strptime(row[1], "%Y-%m-%d")
                week_start = date_str - timedelta(days=date_str.weekday())
                if week_start not in videos_per_week:
                    videos_per_week[week_start] = []
                videos_per_week[week_start].append(video_id)
        with open(f"comments_{channel}.csv", "r", encoding='utf-8') as csvfile:
            reader = csv.DictReader(csvfile)
            next(reader)
            for row in reader:
                video_id = row["Video ID"]
                if video_id not in videos_and_comments:
                    videos_and_comments[video_id] = 0
                videos_and_comments[video_id] += 1

    return videos_per_week, videos_and_comments
videos_per_week, videos_and_comments = get_videos_per_week()


In [None]:
def get_top_commented_videos_per_week(n):
    videos_per_week, videos_and_comments = get_videos_per_week()
    top_commented_videos = {}
    for week_start, videos in videos_per_week.items():
        # Sort videos by number of comments
        for video in videos:
            if video in videos_and_comments:
                if week_start not in top_commented_videos:
                    top_commented_videos[week_start] = []
                top_commented_videos[week_start].append((video, videos_and_comments[video]))
        top_commented_videos[week_start].sort(key=lambda x: x[1], reverse=True)
        if len(top_commented_videos[week_start]) > n:
            top_commented_videos[week_start] = top_commented_videos[week_start][:n]
    return top_commented_videos
top_commented_videos = get_top_commented_videos_per_week(1)
top_commented_videos

In [None]:
user_and_video = {}
for channel in channels:
    # if channel != "Romeo Agresti":
    #     continue
    with open(f"comments_{channel}.csv", "r", encoding='utf-8') as csvfile:
        reader = csv.DictReader(csvfile)
        next(reader)
        for row in reader:
            user = row["Author"]
            video_id = row["Video ID"]
            if user not in user_and_video:
                user_and_video[user] = set()
            user_and_video[user].add(video_id)
user_and_video

In [None]:
videos = set()
for week_start, videos_list in top_commented_videos.items():
    for video in videos_list:
        videos.add(video[0])
print(f"Total number of videos: {len(videos)}")
videos, len(videos)

In [8]:
def collapse_comments_files_in_one_file(channels):
    all_comments = []
    for channel in channels:
        with open(f"comments_{channel}.csv", "r", encoding='utf-8') as csvfile:
            reader = csv.DictReader(csvfile)
            next(reader)
            for row in reader:
                row["Channel"] = channel
                all_comments.append(row)
    with open("all_comments.csv", "w", newline='', encoding='utf-8') as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=all_comments[0].keys())
        writer.writeheader()
        writer.writerows(all_comments)
collapse_comments_files_in_one_file(channels)