In [39]:
from bs4 import BeautifulSoup
from datetime import datetime
import concurrent.futures
import requests
import threading

def get_soup_by_link(link):
    response = requests.get(link) 
    return BeautifulSoup(response.text, 'html.parser')
 
def parse_rating(table_movie_rating): 
    try: 
        return table_movie_rating.tbody.tr.td.div.span.text 
    except: 
        return "Unknown" 
    
def parse_genre(table_movie_info): 
    try: 
        return table_movie_info.find('td', {'class': 'genre'}).p.a.text
    except: 
        return "Unknown" 
    
def parse_year(table_movie_info):
    try:
        return table_movie_info.find('td', {'class': 'year'}).a.text
    except:
        return "Unknown"

def parse(table_movie_info, class_name):
    try:
        return table_movie_info.find('td', {'class': class_name}).text
    except:
        return "Unknown"

def parse_min_age(td_with_data):
    try:
        return td_with_data.find('div', {'class': 'title__labels'}).contents[1].text
    except:
        return "Unknown"

def parse_director(td_with_data):
    try:
        return td_with_data.findAll('p', recursive = False)[0].text[10:]
    except:
        return "Unknown"    

def parse_actors(td_with_data):
    try:
        return td_with_data.findAll('p', recursive = False)[1].text[9:]
    except:
        return "Unknown"

def parse_film(link):
    start_time = datetime.now().strftime("%H:%M:%S.%f")
    
    soup = get_soup_by_link(link)
    td_with_data = soup.find('td', {'class': 'post b-event-post'}) 
    table_movie_info = td_with_data.find('table', {'class': 'movie_info'}) 
    table_movie_rating = td_with_data.find('table', {'class': 'movie_rating'}) 
    name = td_with_data.h1.string
    movies[name] = {}
    movies[name]['link'] = link
    movies[name]['genre'] = parse_genre(table_movie_info)
    movies[name]['year'] = parse_year(table_movie_info)
    movies[name]['country'] = parse(table_movie_info, 'author')
    movies[name]['duration'] = parse(table_movie_info, 'duration')
    movies[name]['end_date'] = parse(table_movie_info, 'date')
    movies[name]['director'] = parse_director(td_with_data) 
    movies[name]['actors'] = parse_actors(td_with_data)
    movies[name]['rating'] = parse_rating(table_movie_rating)
    movies[name]['min_age'] = parse_min_age(td_with_data)
    
    finish_time = datetime.now().strftime("%H:%M:%S.%f")
    if show_details:
        print(f"{start_time} -> {finish_time} - {name}")

def parse_all_films(links, threads_amount, ):
    start_time = datetime.now()
    with concurrent.futures.ThreadPoolExecutor(max_workers=threads_amount) as executor:
        executor.map(parse_film, links)
    duration = datetime.now() - start_time
    print(f"{len(links)} films were parsed by {threads_amount} threads in {duration}")
    
def get_links():
    main_link = 'https://afisha.tut.by/film/'
    soup = get_soup_by_link(main_link)
    films_li = soup.findAll('li', {'class': 'lists__li'}) 
    for film_li in films_li: 
        film_links = film_li.findAll('a', {'class': 'name'}) 
        if len(film_links) != 0: 
            link = film_links[0]['href']
            if link.startswith(main_link) and '?utm_source' not in link: 
                yield link

movies = {}
links = list(get_links())
show_details = False
parse_all_films(links, 80)
parse_all_films(links, 40)
parse_all_films(links, 20)
parse_all_films(links, 16)
parse_all_films(links, 12)
parse_all_films(links, 8)
parse_all_films(links, 6)
parse_all_films(links, 5)
parse_all_films(links, 4)
parse_all_films(links, 3)
parse_all_films(links, 2)
parse_all_films(links, 1)
show_details = True
parse_all_films(links, 1)

76 films were parsed by 80 threads in 0:00:14.574859
76 films were parsed by 40 threads in 0:00:15.560387
76 films were parsed by 20 threads in 0:00:15.321041
76 films were parsed by 16 threads in 0:00:14.598171
76 films were parsed by 12 threads in 0:00:15.328391
76 films were parsed by 8 threads in 0:00:15.456251
76 films were parsed by 6 threads in 0:00:17.414321
76 films were parsed by 5 threads in 0:00:19.728700
76 films were parsed by 4 threads in 0:00:22.416148
76 films were parsed by 3 threads in 0:00:26.755054
76 films were parsed by 2 threads in 0:00:38.003264
76 films were parsed by 1 threads in 0:01:07.577010
23:39:52.382473 -> 23:39:54.102370 - Джуманджи: новый уровень
23:39:54.102682 -> 23:39:55.460265 - Полицейский с Рублевки. Новогодний беспредел 2
23:39:55.460457 -> 23:39:56.333892 - II (Два)
23:39:56.334035 -> 23:39:57.180978 - Царь зверей
23:39:57.181142 -> 23:39:58.074317 - Тайна Мосли
23:39:58.074452 -> 23:39:59.114219 - Собаки не носят штанов
23:39:59.114501 -> 23