## 1. Setup

### 1.1 Importing Dependencies


In [34]:
from googleapiclient.discovery import build
import pandas as pd
import os
import webbrowser
import pytube
import matplotlib.pyplot as plt
import urllib.request

## 2. Creating Dataset


### 2.1 Creating directories

In [68]:
os.makedirs('data', exist_ok=True)

### 2.2 Creating Word List

In [48]:
word_list = []
words_path = os.path.join('data', 'words.csv')
df = pd.DataFrame({'word': word_list})
df.to_csv(words_path, index=False)

### 2.3 Youtube Scraping

In [49]:
def scrape_youtube_videos(query):
    api_key = os.environ.get("YOUTUBE_API_KEY")
    youtube = build('youtube', 'v3', developerKey=api_key)

    video_links = []
    video_ids = []
    next_page_token = None

    while True:
        search_response = youtube.search().list(
            q=query,
            type='video',
            part='id',
            maxResults=50,
            pageToken=next_page_token
        ).execute()

        for item in search_response['items']:
            video_links.append(
                'https://www.youtube.com/watch?v=' + item['id']['videoId'])
            video_ids.append(item['id']['videoId'])

        next_page_token = search_response.get('nextPageToken')

        if not next_page_token:
            break

    df = pd.DataFrame({'link': video_links, 'id': video_ids})
    p = os.path.join('data', query + '.csv')
    df.to_csv(p, index=False)

In [50]:
wd = pd.read_csv(words_path)
wd = wd['word'].tolist()

In [41]:
for word in wd:
    scrape_youtube_videos(word)

### 2.4 Creating Unique Dataset

In [52]:
for word in wd:
    p = os.path.join('data', word + '.csv')
    if os.path.exists(p):
        df = pd.read_csv(p)
        p2 = os.path.join('data', 'links.csv')
        df.to_csv(p2, mode='a', header=False, index=False)

In [55]:
df = pd.read_csv('data/links.csv')
df.columns = ['index','link', 'id']
df.to_csv('data/links.csv', index=False)

In [66]:
df = pd.read_csv('data/links.csv')
df = df.drop('index', axis=1)
# df.head()

In [67]:
video_id = df['id']
video_id = video_id.sample(5)
# video_id

## 3. Visualizing Dataset

### 3.1  Playing Video in Browser


In [64]:
def play_youtube_video(video_id):
    for id in video_id:
        url = f"https://www.youtube.com/watch?v={id}"
        webbrowser.open(url)

In [65]:
play_youtube_video(video_id)

In [None]:


# Define the YouTube video URL
video_url = "https://www.youtube.com/watch?v=VIDEO_ID"

# Create a YouTube object
yt = pytube.YouTube(video_url)

# Get the thumbnail URL of the video
thumbnail_url = yt.thumbnail_url

# Download the thumbnail image
urllib.request.urlretrieve(thumbnail_url, "thumbnail.jpg")

# Display the thumbnail image using Matplotlib
thumbnail_image = plt.imread("thumbnail.jpg")
plt.imshow(thumbnail_image)
plt.axis('off')
plt.show()
