In [18]:
from selenium import webdriver
import time
from selenium.webdriver import FirefoxOptions

In [19]:
def fetch_page(url):
    options = FirefoxOptions()
    options.add_argument('--headless')
    browser = webdriver.Firefox()
    try:
        browser.get(url)
        time.sleep(5)
        return browser.page_source
    finally:
        browser.quit()

In [20]:
# print(fetch_page("https://rutube.ru"))

In [21]:
from bs4 import BeautifulSoup
from urllib.parse import urljoin

In [22]:
def find_a_hrefs(soup : BeautifulSoup, base_url : str, links : set) -> None:
    for a in soup.find_all('a', href=True):
            href = a['href']
            if href.startswith('/video/'):
                links.add(urljoin(base_url, href))

In [23]:
def find_videos(soup : BeautifulSoup, base_url : str, links : set) -> None:
    for video in soup.find_all('video'):
        src = video.get('src')
        if src:
            links.add(urljoin(base_url, src))

In [24]:
def find_video_links(page_source, base_url) -> set:
    soup = BeautifulSoup(page_source, 'html.parser')
    links = set()
    find_a_hrefs(soup, base_url, links)
    find_videos(soup, base_url, links)
    return links

# url = "https://rutube.ru"
# find_video_links(fetch_page(url),url)

{'https://rutube.ru/video/068c079224409a2160fb4dd86227f147/',
 'https://rutube.ru/video/0693b46b7c474bdaed48028bbc5a7a7b/',
 'https://rutube.ru/video/07893625ddd5efa12cff680f112aa888/',
 'https://rutube.ru/video/0a80791ce1196a4f3c8c0b02d0a05f19/',
 'https://rutube.ru/video/0b407a93cc9cd563e1f55b518d3a1166/',
 'https://rutube.ru/video/0d11dd5f87902f6eb116457214fd4512/',
 'https://rutube.ru/video/0dac528294919dc37f7baec50d284c28/',
 'https://rutube.ru/video/1093f29f9667ec03a98d36cca70da991/',
 'https://rutube.ru/video/10d4d9dac5f97b2b35081789da81c480/',
 'https://rutube.ru/video/113d2b6f054110ab716781a8a9a56df4/',
 'https://rutube.ru/video/11babf8f6ea28ba89f6484e2e158f0e0/',
 'https://rutube.ru/video/12fd18685396da76d5156d0903124178/',
 'https://rutube.ru/video/1312f78f7428613696f13f9cc63123c2/',
 'https://rutube.ru/video/13a6a04b7582fb16f755ff0fbb8c250c/',
 'https://rutube.ru/video/18f583b8c5eed021f1c05798579e0f92/',
 'https://rutube.ru/video/1d30146673ebbf0c72d73cbae37c21ea/',
 'https:

In [25]:
import yt_dlp
import os

def download_video(video_url : str, save_directory : str) -> None:
    opts = {
        'outtmpl' : os.path.join(save_directory, f'video_%(title)s.%(ext)s'),
        'writeinfojson' : True,
        'writethumnail' : True,
        'merge_output_format' : 'mp4'
    }

    with yt_dlp.YoutubeDL(opts) as ydl:
        ydl.download([video_url])

    print("Video from {video_url} downloaded.")

def fetch_video(url):
    try:
        page_source = fetch_page(url)
        directory_suffix = url.replace('://', '_').replace('/', '_')
        directory = f'videos_{directory_suffix}'

        os.makedirs(directory, exist_ok=True)

        links = find_video_links(page_source, url)
        
        if not links:
            print('Has no videos on page: {url}')
        else:
            for video_url in links:
                try:
                    download_video(video_url, directory)
                except Exception as ex:
                    print(f'Exception was thrown by download {url}. \n\t Exception: {ex}')
        
    except Exception as ex:
        print("Page {url} processing exception: {ex}")
         

In [26]:
fetch_video('https://rutube.ru')

[rutube:person] Extracting URL: https://rutube.ru/video/person/43728229/
[download] Downloading playlist: 43728229
[rutube:person] 43728229: Downloading page 1
[rutube:person] 43728229: Downloading page 2
[rutube:person] 43728229: Downloading page 3
[rutube:person] 43728229: Downloading page 4
[info] Writing playlist metadata as JSON to: videos_https_rutube.ru/video_NA.info.json
[rutube:person] Playlist 43728229: Downloading 68 items of 68
[download] Downloading item 1 of 68
[rutube] Extracting URL: https://rutube.ru/video/29db3ffc655ab1ba33182117303b9de0/
[rutube] 29db3ffc655ab1ba33182117303b9de0: Downloading video JSON
[rutube] 29db3ffc655ab1ba33182117303b9de0: Downloading options JSON
[rutube] 29db3ffc655ab1ba33182117303b9de0: Downloading m3u8 information
[rutube] 29db3ffc655ab1ba33182117303b9de0: Downloading m3u8 information
[info] 29db3ffc655ab1ba33182117303b9de0: Downloading 1 format(s): m3u8-2974-1
[info] Writing video metadata as JSON to: videos_https_rutube.ru/video_Научись ск

KeyboardInterrupt: 