In [None]:
## Kevin

In [1]:
pip install google-api-python-client




In [2]:
pip install isodate




In [32]:
# Dependencies
import pandas as pd
import requests
import json
from matplotlib import pyplot as plt
from googleapiclient.discovery import build
import isodate
import numpy as np

# Import the API key
from config import api_key

# Initialize YouTube Data API v3
youtube = build('youtube', 'v3', developerKey=api_key)

# Define the region codes for US(United States), GB(United Kingdom), and CA(Canada)
regions = {
    "US": "United States",
    "GB": "United Kingdom",
    "CA": "Canada"
}

def get_video_category(category_id):

    request = youtube.videoCategories().list(
        part="snippet",
        id=category_id
    )
    response = request.execute()
    category_name = response["items"][0]["snippet"]["title"]
    return category_name

def format_duration(duration):
    
    duration_obj = isodate.parse_duration(duration)
    minutes = duration_obj.total_seconds() // 60
    seconds = duration_obj.total_seconds() % 60
    return f"{int(minutes)}m {int(seconds)}s"
    
def get_trending_videos(region_code):

    # Call the API to get the top trending videos
    request = youtube.videos().list(
        part="snippet,statistics,contentDetails",
        chart="mostPopular",
        regionCode=region_code,
        maxResults=100
    )
    response = request.execute()

    # Extract video information
    videos = []
    for item in response["items"]:
        video_id = item["id"]
        video_title = item["snippet"]["title"]
        video_category_id = item["snippet"]["categoryId"]
        video_category_name = get_video_category(video_category_id)
        video_views = item["statistics"]["viewCount"]
        #video_likes = item["statistics"]["likeCount"]
        video_comments = item["statistics"]["commentCount"]
        video_duration = format_duration(item["contentDetails"]["duration"])
        videos.append({
            "region code": region_code,
            "id": video_id,
            "title": video_title,
            "category": video_category_name,
            "views": video_views,
            #"likes": video_likes,
            "comments": video_comments,
            "duration": video_duration
        })
    
    return videos

# Get trending videos for each region
all_videos = []
for region_code, region_name in regions.items():
    print(f"\nTop Trending Videos in {region_name}:")
    trending_videos = get_trending_videos(region_code)
    all_videos.extend(trending_videos)
    for video in trending_videos:
        print(f"Title: {video['title']}")
        print(f"Video ID: {video['id']}")
        print(f"Category: {video['category']}")
        print(f"Views: {video['views']}")
        #print(f"Likes: {video['likes']}")
        print(f"Comments: {video['comments']}")
        print(f"Duration: {video['duration']}")
        print("-" * 30)


Top Trending Videos in United States:
Title: Conan O'Brien Needs a Doctor While Eating Spicy Wings | Hot Ones
Video ID: FALlhXl6CmA
Category: Entertainment
Views: 2685268
Comments: 24800
Duration: 27m 21s
------------------------------
Title: Bridgerton Season 3 | Official Trailer | Netflix
Video ID: U4JYAx5rNRA
Category: Entertainment
Views: 2927883
Comments: 5513
Duration: 2m 48s
------------------------------
Title: Liverpool vs. Atalanta: Extended Highlights | UEL Quarter-Finals 1st Leg | CBS Sports Golazo
Video ID: zTBflv2qBKY
Category: Sports
Views: 711517
Comments: 1333
Duration: 14m 46s
------------------------------
Title: UFC 300: Pre-Fight Press Conference
Video ID: ezlJzCNrXZ0
Category: Sports
Views: 944395
Comments: 3058
Duration: 54m 26s
------------------------------
Title: Dua Lipa - Illusion (Official Music Video)
Video ID: a9cyG_yfh1k
Category: Music
Views: 2161697
Comments: 11139
Duration: 3m 7s
------------------------------
Title: Coming Soon: New Pokémon GO Updat

In [14]:
# From the information above, Shameer exported the results into an excel file
videos_df = pd.DataFrame(all_videos)
videos_df.to_csv("trending_videos.csv", index=False)

In [15]:
## Sophia
#Getting the statistical measures for the trending YouTube Videos.
trending_videos = "trending_videos.csv"
results = pd.read_csv(trending_videos)
df = pd.DataFrame(results)
results.nunique()

region code     3
id             76
title          76
category       11
views          78
comments       80
duration       72
dtype: int64

In [38]:
# Defining columns and dictionary to store statistics for each region and column
numerical_columns = ['views', 'comments']

all_statistics = {'US': {}, 'GB': {}, 'CA': {}}

# Creating a for loop to go through each reigon
for region_code, region_name in regions.items():
    # Creating a dictionary to store statistics for the current region
    region_statistics = {}
    
    # Get trending videos for the current region
    videos_df = get_trending_videos(region_code)
    
    # Iterate over each numerical column
    for column in numerical_columns:
        numeric_values = pd.to_numeric(df[column], errors='coerce')
        numeric_values = numeric_values.dropna()
        
        # Calculate statistics
        mean = np.mean(numeric_values)
        median = np.median(numeric_values)
        variance = np.var(numeric_values)
        std_dev = np.std(numeric_values)
        std_error = std_dev / np.sqrt(len(numeric_values))

        # Creating dictionary with our statistics
        region_statistics[column] = {
            'Mean': mean,
            'Median': median,
            'Variance': variance,
            'Std. Deviation': std_dev,
            'Std. Error': std_error
        }
    
    all_statistics[region_code] = region_statistics

# Print statistics for each region and column
for region_code, region_stats in all_statistics.items():
    print(f"\nStatistics for {regions[region_code]} (Region Code: {region_code}):")
    for column, stats in region_stats.items():
        print(f"\n{column}:")
        for stat, value in stats.items():
            print(f"{stat}: {value}")
        print('-'*30)


Statistics for United States (Region Code: US):

views:
Mean: 1514397.7866666666
Median: 492397.0
Variance: 9481763672174.195
Std. Deviation: 3079247.2573949294
Std. Error: 251419.48574940374
------------------------------

comments:
Mean: 5321.786666666667
Median: 2091.0
Variance: 181820571.0478222
Std. Deviation: 13484.085843980014
Std. Error: 1100.9709988545633
------------------------------

Statistics for United Kingdom (Region Code: GB):

views:
Mean: 1514397.7866666666
Median: 492397.0
Variance: 9481763672174.195
Std. Deviation: 3079247.2573949294
Std. Error: 251419.48574940374
------------------------------

comments:
Mean: 5321.786666666667
Median: 2091.0
Variance: 181820571.0478222
Std. Deviation: 13484.085843980014
Std. Error: 1100.9709988545633
------------------------------

Statistics for Canada (Region Code: CA):

views:
Mean: 1514397.7866666666
Median: 492397.0
Variance: 9481763672174.195
Std. Deviation: 3079247.2573949294
Std. Error: 251419.48574940374
---------------

In [None]:
## Juan

In [None]:
## Amanda

In [None]:
## Shameer