In [43]:
from pytube import YouTube
from moviepy.editor import *
from bs4 import BeautifulSoup
import os


import sys
sys.path.insert(0,'/usr/lib/chromium-browser/chromedriver')
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

import time

In [33]:
def download_youtube_mp3(url):
    """
    Downloads a YouTube video as an MP3 file.
    
    Args:
    url: the YouTube video URL
    
    Returns:
    None
    """
    # Download the YouTube video
    yt = YouTube(url)
    video = yt.streams.filter(only_audio=True).first()
    output_path = video.download()

    # Convert the video to an MP3 file
    mp4_file = AudioFileClip(output_path)
    mp3_file = os.path.splitext(output_path)[0] + ".mp3"
    mp4_file.write_audiofile(mp3_file)

    # Delete the original video file
    os.remove(output_path)

    print("Download complete. MP3 file saved at: ", mp3_file)

In [3]:
def build_youtube_query_url(theme):
    """
    Build a youtube search query based on theme keyword.
    All videos from URL will be filtered to be between 4-20 minutes long.
    """

    return f"https://www.youtube.com/results?search_query={theme}+instrumental+song&sp=EgIYAw%253D%253D"

In [55]:

def scrape_youtube_urls_from_query_page(youtube_query_url, scroll_pause_time=3, scroll_amount = 5000):
    # Launch the Chrome browser using Selenium WebDriver

    chrome_options = webdriver.ChromeOptions()
    # chrome_options.add_argument('--headless')
    chrome_options.add_argument('--no-sandbox')
    chrome_options.add_argument('--disable-dev-shm-usage')
    driver = webdriver.Chrome('chromedriver', options=chrome_options)
    # Load the YouTube url
    driver.get(youtube_query_url)

    # Wait for the page to load completely
    driver.implicitly_wait(10)
    
    # scroll
    driver.execute_script(f"window.scrollTo(0, {scroll_amount});")
    # Wait to load page
    time.sleep(scroll_pause_time)
    
    # Find all the links on the page
    links = driver.find_elements(By.TAG_NAME, "a")

    youtube_links = set()

    # Print the href attribute of each link
    for link in links:
        href = link.get_attribute("href")
        if href and "watch?v=" in href:
            youtube_links.add(href)

    # Close the browser
    driver.quit()
    return youtube_links

In [56]:
query_url = build_youtube_query_url("peaceful")
print(query_url)
scrape_youtube_urls_from_query_page(query_url)

https://www.youtube.com/results?search_query=peaceful+instrumental+song&sp=EgIYAw%253D%253D


{'https://www.youtube.com/watch?v=11JxJh654qk',
 'https://www.youtube.com/watch?v=1P59sBpb3xg',
 'https://www.youtube.com/watch?v=1P59sBpb3xg&t=302s',
 'https://www.youtube.com/watch?v=1P59sBpb3xg&t=618s',
 'https://www.youtube.com/watch?v=3bwWp2GvmAc',
 'https://www.youtube.com/watch?v=4J6kWT4ggHg',
 'https://www.youtube.com/watch?v=5hjnNq-JG-8',
 'https://www.youtube.com/watch?v=7gz-gcHSPhw',
 'https://www.youtube.com/watch?v=D4BZshNWJMc',
 'https://www.youtube.com/watch?v=DFdORAPImvE',
 'https://www.youtube.com/watch?v=Ditj9MZmJ1k',
 'https://www.youtube.com/watch?v=EQuoy4Zr_sk',
 'https://www.youtube.com/watch?v=EiP4ug_d_Vc',
 'https://www.youtube.com/watch?v=HpvedStXfTo',
 'https://www.youtube.com/watch?v=IcRkwmZZD24',
 'https://www.youtube.com/watch?v=JTrBONyofRc',
 'https://www.youtube.com/watch?v=KBeQHitqRZA',
 'https://www.youtube.com/watch?v=LXwx6CCZvq8',
 'https://www.youtube.com/watch?v=LnFQZukLhXU',
 'https://www.youtube.com/watch?v=OrHNNZHhqyg',
 'https://www.youtube.com/

In [34]:
download_youtube_mp3('https://www.youtube.com/watch?v=1P59sBpb3xg')

MoviePy - Writing audio in /Users/richardguo/Columbia/applied_cv/hans-zimmer-bot/notebooks/Sounds Of Isha ⋄ Soothing instrumental music.mp3


                                                                                

MoviePy - Done.
Download complete. MP3 file saved at:  /Users/richardguo/Columbia/applied_cv/hans-zimmer-bot/notebooks/Sounds Of Isha ⋄ Soothing instrumental music.mp3


