### Importación de datos utilizando Youtube API a través de la cuenta de Google Cloud Platform

Instalando el Cliente para Google Api Python Client. Traemos la api-key desde la variable de ambiente usando dotenv
Referencias de la API: https://developers.google.com/youtube/v3/docs?hl=es-419

In [None]:
#%pip install google-api-python-client
#%pip install python-dotenv

API KEY add:
Windows --> 
setx YOUTUBE_API_KEY "your_actual_api_key"
Linux --> 
export YOUTUBE_API_KEY="your_actual_api_key"

In [2]:
import os
import csv
import pandas as pd
import re
import time
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError


Inicializando la api-key desde la variable de ambiente

In [3]:
# Get the API key from the environment
api_key = os.getenv('YOUTUBE_API_KEY')
#api_key = "" #add the api key as a variable whenever it's good to go

if api_key is None:
    raise ValueError("Clave de API no encontrada. Asegurate de que esté configurada en el entorno o en el archivo .env.")
    


La función principal que ejecuta la llamada a la API para obtener la información de los videos y los comentarios.

In [19]:
# Function to fetch comments for a YouTube video along with video information
def get_video_comments(video_id, relacion_evento, evento, tipo_evento, condiciones_cuenta, max_retries=5):
    # Build a YouTube Data API client
    youtube = build('youtube', 'v3', developerKey=api_key)

    # Fetch video information including duration
    video_response = youtube.videos().list(
        part='snippet,statistics,contentDetails',
        id=video_id
    ).execute()

    # Extract relevant video data
    video_info = video_response['items'][0]['snippet']
    video_stats = video_response['items'][0]['statistics']
    video_content_details = video_response['items'][0]['contentDetails']
    video_data = {
        'video_title': video_info['title'],
        'channel_title': video_info['channelTitle'],
        'video_published_at': video_info['publishedAt'],
        'video_views': video_stats.get('viewCount', 0),
        'video_likes': video_stats.get('likeCount', 0),
        'video_duration': video_content_details['duration'],
        'relacion_evento': relacion_evento,
        'evento': evento,
        'tipo_evento': tipo_evento,
        'condiciones_cuenta': condiciones_cuenta
    }

    # Fetch comments with retry logic
    comments_data = []
    next_page_token = None
    retries = 0

    while True:
        try:
            comment_response = youtube.commentThreads().list(
                part='snippet,replies',
                videoId=video_id,
                maxResults=100,  # Maximum results per page (can adjust as needed)
                pageToken=next_page_token
            ).execute()

            # Extract comment data
            for item in comment_response['items']:
                top_comment = item['snippet']['topLevelComment']['snippet']
                comment_data = {
                    'comment_id': item['snippet']['topLevelComment']['id'],
                    'comment': top_comment['textDisplay'],
                    'user_id': top_comment['authorChannelId']['value'],
                    'user_name': top_comment['authorDisplayName'],
                    'comment_time': top_comment['publishedAt'],
                    'comment_likes': top_comment['likeCount'],
                    'total_reply_count': item['snippet']['totalReplyCount'],
                    'is_top_level_comment': True,
                }
                comment_data.update(video_data)
                comments_data.append(comment_data)

                # Check for replies
                if 'replies' in item:
                    for reply in item['replies']['comments']:
                        reply_snippet = reply['snippet']
                        reply_data = {
                            'comment_id': reply['id'],
                            'comment': reply_snippet['textDisplay'],
                            'user_id': reply_snippet['authorChannelId']['value'],
                            'user_name': reply_snippet['authorDisplayName'],
                            'comment_time': reply_snippet['publishedAt'],
                            'comment_likes': reply_snippet['likeCount'],
                            'total_reply_count': 0,  # Replies don't have replies in this structure
                            'is_top_level_comment': False,
                        }
                        reply_data.update(video_data)
                        comments_data.append(reply_data)

            next_page_token = comment_response.get('nextPageToken')
            if not next_page_token:
                break

        except HttpError as e:
            if e.resp.status in [500, 503]:
                retries += 1
                if retries > max_retries:
                    print(f"Se alcanzo el maximo de reintentos para el ID: {video_id}.")
                    break
                sleep_time = 2 ** retries  # Exponential backoff
                print(f"Error del servidor (estado  {e.resp.status}), reintentando en {sleep_time} segundos...")
                time.sleep(sleep_time)
            else:
                print(f"Ocurrio un error: {e}")
                break

    return comments_data

Utilizamos la lista de los videos previamente seleccionados para crear tags y asociaciones por canales, eventos y youtubers

In [20]:
# Input CSV file path
ruta_archivo = os.path.join('..', "data", "external", "list_links_videos.csv")
input_csv = ruta_archivo

# List to store the video info dictionaries
videos_info = []

# Open the CSV file and read its content
with open(input_csv, mode='r', encoding='utf-8') as file:
    reader = csv.DictReader(file)
    for row in reader:
        # Create a dictionary for each row with the specified keys
        video_entry = {
            "video_id": row["ID"],
            "relacion_evento": row["Relacion con tema"],
            "evento": row["Evento cercano"],
            "tipo_evento": row["Tipo de evento"],
            "condiciones_cuenta": row["Condicion Cuenta"]
        }
        videos_info.append(video_entry)

# Now `videos_info` contains the list of dictionaries in the desired format

Creamos un dataframe vacio para guardar los datos de importacion y las clasificaciones

In [21]:
# Initialize an empty DataFrame
all_comments_df = pd.DataFrame()

Iteramos usando la función previamente definida y la lista de los videos

In [None]:
for video_info in videos_info:
    video_id = video_info["video_id"]
    relacion_evento = video_info["relacion_evento"]
    evento = video_info["evento"]
    tipo_evento = video_info["tipo_evento"]
    condiciones_cuenta = video_info["condiciones_cuenta"]

    # Fetch comments data for the current video
    comments_data = get_video_comments(video_id, relacion_evento, evento, tipo_evento, condiciones_cuenta)

    # Convert the comments data to a DataFrame
    comments_df = pd.DataFrame(comments_data)

    # Append the current DataFrame to the main DataFrame
    all_comments_df = pd.concat([all_comments_df, comments_df], ignore_index=True)

Guardamos el dataframe en un archivo CSV para su posterior exploración.

In [24]:
# Relative destination for all the notebooks
directorio_destino = os.path.join('..', "data", "raw")
archivo_csv = os.path.join(directorio_destino, 'youtube_comments_with_flags_and_replies.csv')

# Saving the combined DataFrame to a CSV file with UTF-8 encoding
all_comments_df.to_csv(archivo_csv, index=False, encoding='utf-8')
