### Install necessary libraries

In [1]:
# !pip install vaderSentiment
# !pip install langdetect
# !pip install transformers

### Import libraries

In [2]:
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
import time
import pandas as pd
import re

### Function to scrap youtube comments

In [None]:
# assuming df is your existing DataFrame
# Convert the 'Video ID' column of the df DataFrame to a set for faster lookup
existing_ids = set(df['Video ID'])

def get_topic_videos(keywords, api_key, max_videos_per_keyword, existing_ids):
    youtube = build('youtube', 'v3', developerKey=api_key)

    videos = {}
    for keyword in keywords:
        search_response = youtube.search().list(
            q=keyword,
            part="id,snippet",
            maxResults=max_videos_per_keyword
        ).execute()

        for search_result in search_response.get("items", []):
            if search_result["id"]["kind"] == "youtube#video":
                video_id = search_result["id"]["videoId"]

                # Skip videos that are already in the existing_ids set
                if video_id in existing_ids:
                    continue

                video_response = youtube.videos().list(
                    id=video_id,
                    part="topicDetails"
                ).execute()

                topic = video_response["items"][0]["topicDetails"]["topicCategories"][0] if video_response["items"] and "topicDetails" in video_response["items"][0] and "topicCategories" in video_response["items"][0]["topicDetails"] else None
                videos[video_id] = {"keyword": keyword, "topic": topic}

    return videos

def get_comments(youtube, video_id, max_comments_per_video):
    comments = []
    results = youtube.commentThreads().list(
        part="snippet",
        videoId=video_id,
        textFormat="plainText",
        maxResults=max_comments_per_video
    ).execute()

    for item in results["items"]:
        comment = item["snippet"]["topLevelComment"]
        text = comment["snippet"]["textDisplay"]
        comments.append(text)

    return comments

### Topic Api parameters

In [None]:
max_videos_per_keyword = 30
max_comments_per_video = 70
api_key = 'AIzaSyCKl0QOWp03a58PGscBDOQJgfYuzZEzVKI'  # replace with your actual API key

keywords = [
    "Online courses",
    "Educational content",
    "Study with me",
    "Language learning",
    "Science experiments",
    "Math tutorials",
    "Programming tutorials",
    "Music videos",
    "Stand-up comedy",
    "Short films",
    "Documentaries",
    "Movie trailers",
    "Concerts",
    "Animations",
    "Yoga classes",
    "Meditation",
    "Home workouts",
    "Diet tips",
    "Mental health",
    "Wellness tips",
    "Healthy recipes",
    "Product reviews",
    "Software tutorials",
    "Gadget unboxing",
    "Tech news",
    "Coding tutorials",
    "Blockchain and cryptocurrencies",
    "Artificial intelligence",
    "Travel vlogs",
    "Food recipes",
    "City tours",
    "Cultural experiences",
    "Adventure travel",
    "Street food",
    "Restaurant reviews",
    "Makeup tutorials",
    "Fashion hauls",
    "Skincare routines",
    "Hair styling",
    "Fashion trends",
    "DIY fashion",
    "Gameplay walkthroughs",
    "Esports tournaments",
    "Game reviews",
    "Gaming news",
    "Live gaming",
    "Game mods",
    "Game strategies",
    "DIY projects",
    "Craft tutorials",
    "Home decor ideas",
    "Gardening tips",
    "Pet care tips",
    "Automotive repair",
    "Financial advice",
    "Book reviews",
    "Current events",
    "Interviews",
]

#### Get video topic

In [None]:
video_data = get_topic_videos(keywords, api_key, max_videos_per_keyword, existing_ids)

data = {'Video ID': [], 'Comment': [], 'Keyword': [], 'Topic': []}
for video_id, video_info in video_data.items():
    print(f'Fetching comments for video: {video_id}')
    try:
        comments = get_comments(build('youtube', 'v3', developerKey=api_key), video_id, max_comments_per_video)
        for comment in comments:
            data['Video ID'].append(video_id)
            data['Comment'].append(comment)
            data['Keyword'].append(video_info["keyword"])
            data['Topic'].append(video_info["topic"])
    except HttpError as e:
        print(f'An HTTP error {e.resp.status} occurred: {e.content}')

new_df = pd.DataFrame(data)

### Function to extract comments from youtube video

In [None]:
def get_video_metadata(youtube, video_id):
    video_response = youtube.videos().list(
        part='snippet',
        id=video_id
    ).execute()
    video_items = video_response.get('items', [])
    if not video_items:
        print(f"No metadata available for video ID: {video_id}")
        return None  # return None if there's no metadata

    video_info = video_items[0]
    return {
        'Video ID': video_id,
        'Title': video_info['snippet']['title'],
        'Description': video_info['snippet']['description'],
        'PublishedAt': video_info['snippet']['publishedAt'],
        'ChannelId': video_info['snippet']['channelId'],
        'ChannelTitle': video_info['snippet']['channelTitle']
    }

def create_metadata_dataframe(video_ids, api_key):
    youtube = build('youtube', 'v3', developerKey=api_key)
    metadata = []
    for video_id in video_ids:
        print(f'Fetching metadata for video: {video_id}')
        video_metadata = get_video_metadata(youtube, video_id)
        if video_metadata is not None:  # only append if metadata is not None
            metadata.append(video_metadata)
    return pd.DataFrame(metadata)

### Comments Api parameters

In [None]:
api_key = 'AIzaSyAEx2hgETv_z1sRknLRtqCWpEsXw64n-k0'  # replace with your actual API key

#### Get comments from youtube video

In [None]:
# Fetch unique video IDs from full_df
unique_video_ids_full_df = full_df['Video ID'].unique()
# Create a metadata DataFrame for full_df
full_df_metadata = create_metadata_dataframe(unique_video_ids_full_df, api_key)

full_df_metadata.to_csv('full_df_metadata.csv', index=False)

### Function to extract performance

In [None]:
def get_video_performance(youtube, video_id):
    video_response = youtube.videos().list(
        part='statistics',
        id=video_id
    ).execute()
    video_items = video_response.get('items', [])
    if not video_items:
        print(f"No performance data available for video ID: {video_id}")
        return None  # return None if there's no performance data

    video_info = video_items[0]
    return {
        'Video ID': video_id,
        'ViewCount': video_info['statistics'].get('viewCount'),
        'LikeCount': video_info['statistics'].get('likeCount'),
        'DislikeCount': video_info['statistics'].get('dislikeCount'),
        'FavoriteCount': video_info['statistics'].get('favoriteCount'),
        'CommentCount': video_info['statistics'].get('commentCount')
    }

def create_performance_dataframe(video_ids, api_key):
    youtube = build('youtube', 'v3', developerKey=api_key)
    performance = []
    for video_id in video_ids:
        print(f'Fetching performance data for video: {video_id}')
        video_performance = get_video_performance(youtube, video_id)
        if video_performance is not None:  # only append if performance data is not None
            performance.append(video_performance)
    return pd.DataFrame(performance)

### Performance Api parameters

In [None]:
api_key = 'AIzaSyAzYOzTr32NkUs6QVDoSSDK5JeNfcfjTs8'  # replace with your actual API key

#### Get video performance

In [None]:
# Fetch unique video IDs from full_df
unique_video_ids_full_df = full_df['Video ID'].unique()

# Create a performance DataFrame for full_df
full_df_performance = create_performance_dataframe(unique_video_ids_full_df, api_key)

full_df_performance.to_csv('df_video_permormance.csv', index=False)