## YouTube API Video Data Scraping and Analysis

In [None]:
import googleapiclient.discovery
import pandas as pd

# Initialize YouTube API
api_service_name = "youtube"
api_version = "v3"
DEVELOPER_KEY = dev # API key

youtube = googleapiclient.discovery.build(
    api_service_name, api_version, developerKey=DEVELOPER_KEY)

# Search for videos with the keyword 'vlogs'
search_request = youtube.search().list(
    part="snippet",
    q="challenge",
    type="video",
    maxResults=50  # Fetch details for 50 videos
)

search_response = search_request.execute()

# Collect video IDs
video_ids = [item['id']['videoId'] for item in search_response['items']]

# Fetch video details
videos_request = youtube.videos().list(
    part="snippet,statistics",
    id=",".join(video_ids)
)

videos_response = videos_request.execute()

# Extract required data
videos_data = []
for item in videos_response['items']:
    title = item['snippet']['title']
    view_count = item['statistics'].get('viewCount', 0)
    like_count = item['statistics'].get('likeCount', 0)

    videos_data.append([
        title,
        view_count,
        like_count,
    ])

# Create DataFrame
df = pd.DataFrame(videos_data, columns=['title', 'view_count', 'like_count'])

print(df)





                                                title view_count like_count
0   Big vs Medium vs Small Food Challenge | Funny ...    4317733      15876
1   Choose One – YES or NO Challenge (40 Hardest C...    4090060       9746
2   Green Food VS Brown Food Challenge ASMR🪴💩#mukb...   50097367    1123617
3   Most Iconic Eliminations In Challenge History ...    2726564      18698
4                 $1 VS $100,000 WATERPARK CHALLENGE!   29024007     233666
5             Choose One Button - YES or No Challenge    3346463      61599
6   Me vs Grandma Cooking Challenge with Monkey by...     193399       1219
7                         HOT vs COLD POOL Challenge!    4490136      27501
8   PINK VS BLACK FOOD CHALLENGE! Eating Only 1 Co...   22882862      80187
9           Try Not To Laugh Challenge....(VERY HARD)    3214998      99213
10                       Pick a Flag!? 🥵 (Spicy Food)  124248175    3197486
11  Don't Hurt Yourself Challenge Dance Compilatio...    5048831      17948
12  Marshmal

In [None]:
df.sample(10)

Unnamed: 0,title,view_count,like_count
28,Défi de Cache-Cache Extrême #4 par Multi DO Ch...,2338166,8777
9,Try Not To Laugh Challenge....(VERY HARD),3214998,99213
39,The FABARM Shotgun Challenge!,14747,667
10,Pick a Flag!? 🥵 (Spicy Food),124248175,3197486
20,"Big, Medium and Small Plate Challenge #2 by Mu...",67159227,328535
38,"Challenge (Full Video) Ninja, Sidhu Moose Wala...",26463957,323155
25,Pink Colour food challenge for 24 Hours 😱,19056925,344342
31,Overnight surviving challenge on low to high b...,1298903,41575
17,REAL Vs CAKE Challenge! **DELICIOUS**,18374247,231108
0,Big vs Medium vs Small Food Challenge | Funny ...,4317733,15876


## Fetch Top 50 Results for "Reaction" and "Vlog" Videos



In [None]:
import googleapiclient.discovery
import pandas as pd

# Initialize YouTube API
api_service_name = "youtube"
api_version = "v3"
DEVELOPER_KEY = dev  # API key

youtube = googleapiclient.discovery.build(
    api_service_name, api_version, developerKey=DEVELOPER_KEY
)

def fetch_videos_data(keyword):
    # Search for videos with the given keyword
    search_request = youtube.search().list(
        part="snippet",
        q=keyword,
        type="video",
        maxResults=50  # Fetch details for 50 videos
    )

    search_response = search_request.execute()

    # Collect video IDs
    video_ids = [item['id']['videoId'] for item in search_response['items']]

    # Fetch video details
    videos_request = youtube.videos().list(
        part="snippet,statistics",
        id=",".join(video_ids)
    )

    videos_response = videos_request.execute()

    # Extract required data
    videos_data = []
    for item in videos_response['items']:
        title = item['snippet']['title']
        view_count = item['statistics'].get('viewCount', 0)
        like_count = item['statistics'].get('likeCount', 0)

        videos_data.append([
            title,
            view_count,
            like_count
        ])

    return videos_data

# Fetch top 50 videos for "reaction" and "vlog" as well
challenge_videos = fetch_videos_data("challenge")
reaction_videos = fetch_videos_data("reaction")
vlog_videos = fetch_videos_data("vlog")

# Combine the data
all_videos = challenge_videos + reaction_videos + vlog_videos

# Create DataFrame
df = pd.DataFrame(all_videos, columns=['title', 'view_count', 'like_count'])

# Display the DataFrame
print(df)


                                                 title view_count like_count
0    Big vs Medium vs Small Food Challenge | Funny ...    4317733      15876
1    Choose One – YES or NO Challenge (40 Hardest C...    4090060       9746
2    Green Food VS Brown Food Challenge ASMR🪴💩#mukb...   50097367    1123620
3    Most Iconic Eliminations In Challenge History ...    2726564      18698
4                  $1 VS $100,000 WATERPARK CHALLENGE!   29024007     233666
..                                                 ...        ...        ...
145  お泊まりした日のモーニングルーティン🎀✨彼より早く起きて準備するのだ！#社会人#社会人の休日...     186487       4679
146  Youtube Payment Aa gyi🥳🌸|Miss.Hosiyar|#shorts ...    7977731     408035
147  DOES ANYONE ELSE FEEL THIS WAY? 😔 #vlog #vlogg...     342355      19434
148  Flipkart Delivery Job🥵 || VLOG 12/75 || #miniv...      43366       3445
149                  My Birthday Surprise 😍 New Gharpe    5114298     300334

[150 rows x 3 columns]


In [None]:
df.sample(10)

Unnamed: 0,title,view_count,like_count
113,COLLEGE MORNING IN MY LIFE | thursday morning ...,3369623,317283
78,Band Teacher Reacts to Henning May,91803592,4322681
1,Choose One – YES or NO Challenge (40 Hardest C...,4090060,9746
16,My Daughter's 24 HOUR TRAMPOLINE PARK Challenge,60811298,483399
31,Overnight surviving challenge on low to high b...,1298903,41576
94,#Doctor reacts: Foot transplant surgery,48919919,1435608
98,SASUKE & ITACHI REUNITE! | Naruto Shippuden Re...,64773,3228
28,Défi de Cache-Cache Extrême #4 par Multi DO Ch...,2338166,8777
130,Trip pe kaha gye ? 😍🌊 #shorts #trending #miniv...,339454,32567
115,(ENG)[学校VLOG] 夜10時まで学校で勉強する韓国の女子高校生のルーティン | クラ...,9296402,240343


## Add Column for Keyword

In [None]:
import googleapiclient.discovery
import pandas as pd

# Initialize YouTube API
api_service_name = "youtube"
api_version = "v3"
DEVELOPER_KEY = dev  # API key

youtube = googleapiclient.discovery.build(
    api_service_name, api_version, developerKey=DEVELOPER_KEY
)

def fetch_videos_data(keyword):
    # Search for videos with the given keyword
    search_request = youtube.search().list(
        part="snippet",
        q=keyword,
        type="video",
        maxResults=50  # Fetch details for 50 videos
    )

    search_response = search_request.execute()

    # Collect video IDs
    video_ids = [item['id']['videoId'] for item in search_response['items']]

    # Fetch video details
    videos_request = youtube.videos().list(
        part="snippet,statistics",
        id=",".join(video_ids)
    )

    videos_response = videos_request.execute()

    # Extract required data
    videos_data = []
    for item in videos_response['items']:
        title = item['snippet']['title']
        view_count = item['statistics'].get('viewCount', 0)
        like_count = item['statistics'].get('likeCount', 0)

        videos_data.append([
            title,
            view_count,
            like_count,
            keyword  # Add keyword to the data
        ])

    return videos_data

# Fetch top 50 videos for "challenge," "reaction," and "vlog"
challenge_videos = fetch_videos_data("challenge")
reaction_videos = fetch_videos_data("reaction")
vlog_videos = fetch_videos_data("vlog")

# Combine the data
all_videos = challenge_videos + reaction_videos + vlog_videos

# Create DataFrame with an additional column for the keyword
df = pd.DataFrame(all_videos, columns=['title', 'view_count', 'like_count', 'keyword'])

# Display the DataFrame
print(df)

                                                 title view_count like_count  \
0    Big vs Medium vs Small Food Challenge | Funny ...    4317733      15876   
1    Choose One – YES or NO Challenge (40 Hardest C...    4090060       9746   
2    Most Iconic Eliminations In Challenge History ...    2726564      18698   
3    Green Food VS Brown Food Challenge ASMR🪴💩#mukb...   50097367    1123620   
4                  $1 VS $100,000 WATERPARK CHALLENGE!   29024007     233666   
..                                                 ...        ...        ...   
145  お泊まりした日のモーニングルーティン🎀✨彼より早く起きて準備するのだ！#社会人#社会人の休日...     186487       4679   
146                                       【1日密着】高校生の一日    9275759     248216   
147  Youtube Payment Aa gyi🥳🌸|Miss.Hosiyar|#shorts ...    7977731     408035   
148  DOES ANYONE ELSE FEEL THIS WAY? 😔 #vlog #vlogg...     342355      19434   
149  Flipkart Delivery Job🥵 || VLOG 12/75 || #miniv...      43366       3445   

       keyword  
0    challenge  
1    

In [None]:
df.sample(10)

Unnamed: 0,title,view_count,like_count,keyword
69,Superman audience reaction #blackadam #shorts,1400444,83541,reaction
3,Green Food VS Brown Food Challenge ASMR🪴💩#mukb...,50097367,1123620,challenge
125,எங்க அப்பா யாசாவ😂பாருங்க அப்பா அம்மா ✌️ #kanya...,80012,4163,vlog
143,28/30 THAT *FEAR* BEFORE EXAMS🫣 #youtube #mini...,216975,22520,vlog
116,"UNI VLOG 💻 5AM morning, getting back into rout...",232432,6385,vlog
122,how my weekends look like 😅 *productive day vl...,1330456,98354,vlog
95,SASUKE & ITACHI REUNITE! | Naruto Shippuden Re...,64773,3228,reaction
48,FASTEST STREET VADAPAV EATING CHALLENGE😱Brothe...,245015,14473,challenge
141,Etne Saare Gifts Mile 😍 Birthday Pe,4429884,252066,vlog
114,COLLEGE MORNING IN MY LIFE | thursday morning ...,3369623,317283,vlog


## Add Column for Date Posted

In [None]:
import googleapiclient.discovery
import pandas as pd

# Initialize YouTube API
api_service_name = "youtube"
api_version = "v3"
DEVELOPER_KEY = dev  # API key

youtube = googleapiclient.discovery.build(
    api_service_name, api_version, developerKey=DEVELOPER_KEY
)

def fetch_videos_data(keyword):
    # Search for videos with the given keyword
    search_request = youtube.search().list(
        part="snippet",
        q=keyword,
        type="video",
        maxResults=50  # Fetch details for 50 videos
    )

    search_response = search_request.execute()

    # Collect video IDs
    video_ids = [item['id']['videoId'] for item in search_response['items']]

    # Fetch video details
    videos_request = youtube.videos().list(
        part="snippet,statistics",
        id=",".join(video_ids)
    )

    videos_response = videos_request.execute()

    # Extract required data
    videos_data = []
    for item in videos_response['items']:
        title = item['snippet']['title']
        view_count = item['statistics'].get('viewCount', 0)
        like_count = item['statistics'].get('likeCount', 0)
        published_at = item['snippet']['publishedAt']  # Get the published date

        videos_data.append([
            title,
            view_count,
            like_count,
            keyword,  # Add keyword to the data
            published_at  # Add published date to the data
        ])

    return videos_data

# Fetch top 50 videos for "challenge," "reaction," and "vlog"
challenge_videos = fetch_videos_data("challenge")
reaction_videos = fetch_videos_data("reaction")
vlog_videos = fetch_videos_data("vlog")

# Combine the data
all_videos = challenge_videos + reaction_videos + vlog_videos

# Create DataFrame with additional columns for keyword and publication date
df = pd.DataFrame(all_videos, columns=['title', 'view_count', 'like_count', 'keyword', 'published_at'])

# Display the DataFrame
print(df)

                                                 title view_count like_count  \
0    Big vs Medium vs Small Food Challenge | Funny ...    4317733      15876   
1    Choose One – YES or NO Challenge (40 Hardest C...    4090060       9746   
2    Most Iconic Eliminations In Challenge History ...    2726564      18698   
3                  $1 VS $100,000 WATERPARK CHALLENGE!   29024007     233666   
4    Green Food VS Brown Food Challenge ASMR🪴💩#mukb...   50097367    1123621   
..                                                 ...        ...        ...   
145  Dono Bhai kese lag rhe hai?😍❤️ #vlog #minivlog...      89923      11052   
146  COME WITH ME TO GET MERMAID NAILS!! #vlog #nai...     207010      15696   
147  Okul Vlog 💗💗#itzy#keşfetaçılartık#vlog#tutsuna...     203609       3539   
148  Aaj m wapas ja rhi hu 🥹💔 #shorts #trending #viral    4257058     229479   
149  Aisa Kya Bana Tha vlog-38#minivlog#trending#ne...     325958          0   

       keyword          published_at  


In [None]:
df.sample(10)

Unnamed: 0,title,view_count,like_count,keyword,published_at
51,A TRIP!| FIRST TIME HEARING The Byrds - Eight...,26472,3144,reaction,2024-09-08T22:54:29Z
106,grwm for the airport 🤍✨🎧☁️ #vlog #aesthetic #m...,14813720,770749,vlog,2023-10-23T17:56:04Z
89,ONE PIECE ANIME REACTION | Episode 36/37 Seas...,8779,723,reaction,2024-09-09T22:00:09Z
61,Majed rates to Hanumankind - Big Dawgs 🇮🇳🇺🇸🔥,27500045,2199248,reaction,2024-07-31T18:50:39Z
14,Marshmallow Emoji Food Challenge ASMR 🥵 #asmr ...,540651,15412,challenge,2024-08-29T15:08:38Z
119,I am *LAUNCHING* my brand!!🫶🏻♥️ @ekaami.by.mit...,1129116,110034,vlog,2024-09-03T05:30:18Z
111,spend friday with me! 💛 #aesthetic #vlog #prod...,1284383,70890,vlog,2023-05-20T01:25:46Z
41,"Eating Challenge ( 8 drumsticks, 5 mochi, 2 lb...",121386364,3738981,challenge,2022-10-21T11:05:56Z
124,first day of preschool 🍎 #vlog #toddlermom #mo...,8906,0,vlog,2024-09-04T14:30:22Z
139,Youtube Payment Aa gyi🥳🌸|Miss.Hosiyar|#shorts ...,7977731,408035,vlog,2024-04-28T13:25:00Z


## Export Final Data to Excel File

In [None]:
import googleapiclient.discovery
import pandas as pd

# Initialize YouTube API
api_service_name = "youtube"
api_version = "v3"
DEVELOPER_KEY = dev  # Replace with your actual API key

youtube = googleapiclient.discovery.build(
    api_service_name, api_version, developerKey=DEVELOPER_KEY
)

def fetch_videos_data(keyword):
    # Search for videos with the given keyword
    search_request = youtube.search().list(
        part="snippet",
        q=keyword,
        type="video",
        maxResults=50  # Fetch top 50 videos
    )

    search_response = search_request.execute()

    # Collect video IDs
    video_ids = [item['id']['videoId'] for item in search_response['items']]

    # Fetch video details
    videos_request = youtube.videos().list(
        part="snippet,statistics",
        id=",".join(video_ids)
    )

    videos_response = videos_request.execute()

    # Extract required data
    videos_data = []
    for item in videos_response['items']:
        title = item['snippet']['title']  # Get video title
        view_count = item['statistics'].get('viewCount', 0) # Get the view count
        like_count = item['statistics'].get('likeCount', 0) # Get the like count
        published_at = item['snippet']['publishedAt']  # Get the published date

        videos_data.append([
            title,
            view_count,
            like_count,
            keyword,
            published_at
        ])

    return videos_data

# Fetch top 50 videos for "challenge," "reaction," and "vlog"
challenge_videos = fetch_videos_data("challenge")
reaction_videos = fetch_videos_data("reaction")
vlog_videos = fetch_videos_data("vlog")

# Combine the data
all_videos = challenge_videos + reaction_videos + vlog_videos

# Create DataFrame with additional columns for keyword and publication date
df = pd.DataFrame(all_videos, columns=['title', 'view_count', 'like_count', 'keyword', 'published_at'])

df.to_excel("video_data.xlsx", index=False)

print("Data exported to video_data.xlsx")

Data exported to video_data.xlsx
