In [None]:
## Kevin

In [4]:
pip install google-api-python-client

Note: you may need to restart the kernel to use updated packages.


In [5]:
pip install isodate

Note: you may need to restart the kernel to use updated packages.


In [31]:
# Dependencies
import pandas as pd
import requests
import json
from matplotlib import pyplot as plt
from googleapiclient.discovery import build
import isodate

# Import the API key
from config import api_key

# Initialize YouTube Data API v3
youtube = build('youtube', 'v3', developerKey=api_key)

# Define the region codes for US(United States), GB(United Kingdom), and CA(Canada)
regions = {
    "US": "United States",
    "GB": "United Kingdom",
    "CA": "Canada"
}

def get_video_category(category_id):

    request = youtube.videoCategories().list(
        part="snippet",
        id=category_id
    )
    response = request.execute()
    category_name = response["items"][0]["snippet"]["title"]
    return category_name

def format_duration(duration):
    
    duration_obj = isodate.parse_duration(duration)
    minutes = duration_obj.total_seconds() // 60
    seconds = duration_obj.total_seconds() % 60
    return f"{int(minutes)}m {int(seconds)}s"
    
def get_trending_videos(region_code):

    # Call the API to get the top trending videos
    request = youtube.videos().list(
        part="snippet,statistics,contentDetails",
        chart="mostPopular",
        regionCode=region_code,
        maxResults=100
    )
    response = request.execute()

    # Extract video information
    videos = []
    for item in response["items"]:
        video_id = item["id"]
        video_title = item["snippet"]["title"]
        video_category_id = item["snippet"]["categoryId"]
        video_category_name = get_video_category(video_category_id)
        video_views = item["statistics"]["viewCount"]
        #video_likes = item["statistics"]["likeCount"]
        video_comments = item["statistics"]["commentCount"]
        video_duration = format_duration(item["contentDetails"]["duration"])
        videos.append({
            "region code": region_code,
            "id": video_id,
            "title": video_title,
            "category": video_category_name,
            "views": video_views,
            #"likes": video_likes,
            "comments": video_comments,
            "duration": video_duration
        })

    return videos

# Get trending videos for each region
for region_code, region_name in regions.items():
    print(f"\nTop Trending Videos in {region_name}:")
    trending_videos = get_trending_videos(region_code)
    for video in trending_videos:
        print(f"Title: {video['title']}")
        print(f"Video ID: {video['id']}")
        print(f"Category: {video['category']}")
        print(f"Views: {video['views']}")
        #print(f"Likes: {video['likes']}")
        print(f"Comments: {video['comments']}")
        print(f"Duration: {video['duration']}")
        print("-" * 30)


Top Trending Videos in United States:
Title: Conan O'Brien Needs a Doctor While Eating Spicy Wings | Hot Ones
Video ID: FALlhXl6CmA
Category: Entertainment
Views: 2427482
Comments: 23461
Duration: 27m 21s
------------------------------
Title: Bridgerton Season 3 | Official Trailer | Netflix
Video ID: U4JYAx5rNRA
Category: Entertainment
Views: 2735690
Comments: 5379
Duration: 2m 48s
------------------------------
Title: Coming Soon: New Pokémon GO Updates
Video ID: 7D0wFf4GqLI
Category: Gaming
Views: 396896
Comments: 1631
Duration: 0m 15s
------------------------------
Title: Dua Lipa - Illusion (Official Music Video)
Video ID: a9cyG_yfh1k
Category: Music
Views: 1688286
Comments: 10403
Duration: 3m 7s
------------------------------
Title: Joker: Folie à Deux | Official Teaser Trailer
Video ID: xy8aJw1vYHo
Category: Film & Animation
Views: 20014946
Comments: 31242
Duration: 2m 25s
------------------------------
Title: PSG vs. Barcelona: Extended Highlights | UCL Quarter-Finals 1st Leg |

In [32]:
# From the information above, Shameer exported the results into an excel file
results_trending_videos = pd.DataFrame(trending_videos)
results_trending_videos.to_csv("trending_videos.csv", index=False)

In [42]:
## Sophia
#Getting the statistical measures for the trending YouTube Videos.
trending_videos = "trending_videos.csv"
results = pd.read_csv(trending_videos)
df = pd.DataFrame(results)
results.nunique()

region code     1
id             50
title          50
category       10
views          50
comments       50
duration       49
dtype: int64

In [48]:
import numpy as np
numerical_columns = ['views', 'comments']
statistics = {}

for column in numerical_columns:
    numeric_values = pd.to_numeric(df[column], errors='coerce')
    numeric_values = numeric_values.dropna()
    
    mean = np.mean(df[column])
    median = np.median(df[column])
    variance = np.var(df[column])
    std_dev = np.std(df[column])
    std_error = std_dev / np.sqrt(len(df[column]))

    statistics[column] = {
        'Mean': mean,
        'Median': median,
        'Variance': variance,
        'Std. Deviation': std_dev,
        'Std. Error': std_error
    }

for column, values in statistics.items():
    print('-'*30)
    print(f"Statistics for {column} in {region_code}:")
    for stat, value in values.items():
        print(f"{stat}: {value}")

------------------------------
Statistics for views in CA:
Mean: 1330602.48
Median: 449657.0
Variance: 8379728818803.124
Std. Deviation: 2894776.1258520707
Std. Error: 409383.1657213844
------------------------------
Statistics for comments in CA:
Mean: 3865.72
Median: 1656.0
Variance: 36336844.88159999
Std. Deviation: 6028.005049898348
Std. Error: 852.488649561975


In [None]:
## Juan

In [None]:
## Amanda

In [None]:
## Shameer