In [None]:
# Scraping data from YouTube using Selenium

In [1]:
%%capture

!pip install selenium
!pip install webdriver-manager
!pip install pandas

In [None]:
# Get the Chrome Web Driver
# https://sites.google.com/a/chromium.org/chromedriver/download
# Get the latest stable release

# !cd Downloads
# !unzip chromedriver_linux64.zip
# !mv chromedriver X
# where X is a path directory

In [2]:
from selenium import webdriver
import pandas as pd 
from selenium.webdriver.common.by import By 
from selenium.webdriver.support.ui import WebDriverWait 
from selenium.webdriver.support import expected_conditions as EC

In [3]:
# Scraping Wildlife videos
driver = webdriver.Chrome()
driver.get("https://www.youtube.com/results?search_query=wildlife")
driver.execute_script("document.body.style.zoom='40%'")

In [4]:
# Fetch all the video links on that page

user_data = driver.find_elements_by_xpath('//*[@id="video-title"]')
links = []

for i in user_data:
    links.append(i.get_attribute('href'))
    
print("Number of Encountered Links: " + str(len(links)))

Number of Encountered Links: 26


In [5]:
df = pd.DataFrame(columns = ['ID', 'Title', 'Description', 'Release date', 'Views'])

In [6]:
is_demo = True
limit = 8

if not is_demo:
    limit = int(1e18)

In [7]:
# Let's scrape

wait = WebDriverWait(driver, 10)
v_category = "wildlife"

for x in links[0: limit]:
    v_likes = v_dlikes = v_views = -1
    driver.get(x)
    driver.execute_script("document.body.style.zoom='40%'")
    
    v_id = x.split('https://www.youtube.com/watch?v=')[1]
    
    v_title = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "h1.title.style-scope.ytd-video-primary-info-renderer"))).text
    
    v_desc = wait.until(EC.presence_of_element_located((By.XPATH,'//*[@id="description"]/yt-formatted-string'))).text
    
    v_date = wait.until(EC.presence_of_element_located((By.XPATH,'//*[@id="info-strings"]/yt-formatted-string'))).text

    v_views= wait.until(EC.presence_of_element_located((By.XPATH,'//*[@id="count"]/ytd-video-view-count-renderer/span[1]'))).text
    
    df.loc[len(df)] = [v_id, v_title, v_desc, v_date, v_views]

In [8]:
df

Unnamed: 0,ID,Title,Description,Release date,Views
0,GcRKREorGSc,Amazing Wildlife of Botswana - 8K Nature Docum...,Go on a virtual 1-hour safari tour without lea...,10 Jul 2020,"9,746,191 views"
1,GK1ll8e017k,4K Ostrich the Flightless Bird - African Wildl...,"Ostrich - the flightless bird, African wildlif...",9 Mar 2019,"1,157,092 views"
2,4zxAxbBuz8s,Wildlife Africa & Asia: Elephants | Free Docum...,Wildlife Africa & Asia - Episode 5: Elephants ...,4 Dec 2020,"1,612,942 views"
3,7jUW96CiEKA,Wild Life - Nature Documentary Full HD 1080p,Krugers Pafuri Wildlife in Africa | National G...,21 Sept 2020,"230,725 views"
4,Pvl-A3nsfhU,"The best battles of the animal world, Harsh Li...","The best battles of the animal world, Harsh Li...",Premiered on 6 Feb 2021,"4,866,096 views"
5,rIw0_Qy1tjw,Animal Documentary 2021 Nature Wildlife Animal...,Do not forget to subscribe and like the channel,4 Jun 2021,"25,051 views"
6,YTJg8q9Q940,Nature And Wildlife Video – Bird and animal is...,This awesome wildlife video is the compilation...,14 Jul 2017,"3,836,256 views"
7,V2bxUcD0PS0,Lion is haunted by the onslaught of Wildebeest...,Lion is haunted by the onslaught of Wildebeest...,Premiered on 5 Jan 2021,"4,666,014 views"


In [9]:
df.to_csv('./YouTube_data.csv')

In [10]:
%%capture
!pip install pytube

In [11]:
from pytube import YouTube

dump_path = './dump'
IDs = df['ID'].tolist()

fetch_quality = {'highest': -1,
                 'lowest': 0}

count = 0

for ID in IDs:
    extract_link = 'https://www.youtube.com/watch?v=' + ID
    print('Fetching ' + str(extract_link))
    try:
        yt = YouTube(extract_link)
    except:
        print("> Error")
        continue
    try:
        stream = yt.streams.filter(file_extension='mp4').order_by('resolution')[fetch_quality['lowest']]
        stream.download(dump_path)
        count += 1
    except:
        print("> Download error")
        
print('Number of videos fetched = ' + str(count))

Fetching https://www.youtube.com/watch?v=GcRKREorGSc
Fetching https://www.youtube.com/watch?v=GK1ll8e017k
Fetching https://www.youtube.com/watch?v=4zxAxbBuz8s
Fetching https://www.youtube.com/watch?v=7jUW96CiEKA
Fetching https://www.youtube.com/watch?v=Pvl-A3nsfhU
Fetching https://www.youtube.com/watch?v=rIw0_Qy1tjw
Fetching https://www.youtube.com/watch?v=YTJg8q9Q940
Fetching https://www.youtube.com/watch?v=V2bxUcD0PS0
Number of videos fetched = 8


In [12]:
# ^_^ Thank You