In [None]:
!pip install asyncio aiohttp nest_asyncio

Collecting asyncio
  Downloading asyncio-3.4.3-py3-none-any.whl (101 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m101.8/101.8 kB[0m [31m21.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting aiohttp
  Downloading aiohttp-3.9.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.2/1.2 MB[0m [31m95.4 MB/s[0m eta [36m0:00:00[0m
Collecting aiosignal>=1.1.2
  Downloading aiosignal-1.3.1-py3-none-any.whl (7.6 kB)
Collecting multidict<7.0,>=4.5
  Downloading multidict-6.0.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (123 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m123.8/123.8 kB[0m [31m32.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting yarl<2.0,>=1.0
  Downloading yarl-1.9.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (304 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m304.3/304.3 kB[0m [31m50.8 MB/s[0m eta [36m0:00

In [None]:
import asyncio
import nest_asyncio
import aiohttp
import pandas as pd
import os
import json
import logging


# IMPLEMENTATION

## Api Definition

In [None]:
API_KEY = os.environ["TMDB_API"]

## Applying event loop

In [None]:
nest_asyncio.apply()

# Functions:

## Fetching the TMDB page for given "year"

In [None]:
async def fetch_page(Session, year,page):
    url = f'https://api.themoviedb.org/3/discover/movie?api_key={API_KEY}&primary_release_date.gte={year}-01-01&primary_release_date.lte={year}-12-31&\
    sort_by=popularity.desc&language=en-US'
    async with Session.get(url) as response:
        return await response.json()
        

## Fetching each movie details

In [None]:
async def fetch_movie_details(Session, movie_id):
    url = f"https://api.themoviedb.org/3/movie/{movie_id}?api_key={API_KEY}&language=en-US"
    async with Session.get(url) as response:
        return await response.json()

## Fetching Credits for each movie

In [None]:
async def credits(session, movie_id):
    url = f'https://api.themoviedb.org/3/movie/{movie_id}/credits?api_key={API_KEY}&language=en-US'

    async with session.get(url) as response:
        return await response.json()

## Fetching keywords for each movie

In [None]:
async def keywords(session, movie_id):
    url = f'https://api.themoviedb.org/3/movie/{movie_id}/keywords?api_key={API_KEY}&language=en-US'

    async with session.get(url) as response:
        return await response.json()

## Main 

Here we use all function defined above to create the final movie data set

In [None]:
async def main(year):
    async with aiohttp.ClientSession() as Session:
        # fetching the tmdb movie page
        tasks = [fetch_page(Session,year, i) for i in range(1,201)]
        page_response = await asyncio.gather(*tasks)
        
        data_frame = [pd.DataFrame(response["results"]) for response in page_response]
        
        data = pd.concat(data_frame, ignore_index= True)
        # fetching the movie details
        movie_details = [fetch_movie_details(Session, i) for i in data.id.values]

        details_response = await asyncio.gather(*movie_details)

        # fetching keywords
        task = [keywords(Session,i) for i in data.id.values]

        key_response = await asyncio.gather(*task)
        # fetching the credits
        credit_task = [credits(Session,i) for i in data.id.values]

        credit_response = await asyncio.gather(*credit_task)

        # list for storing the row
        details_list = []

        for response, key, credit, movie_id in zip(details_response,key_response, credit_response, data.id.values):
            if response['budget'] > 0:
                top_cast = credit.get('cast', [])[:5]
                top_crew = credit.get('crew', [])[:5]
                
                try:
                    new_rows = {
                        'id': movie_id,
                        'imdb_id': response.get('imdb_id'),
                        'title': response.get('title'),
                        'original_language': response.get('original_language'),
                        'original_title': response.get('original_title'),
                        'origin_country': ', '.join(response.get('origin_country')),
                        'production_companies': ', '.join([company.get('name') for company in response.get('production_companies')]),
                        'production_countries': ', '.join([country.get('name') for country in response.get('production_countries')]),
                        'runtime_in_mins': response.get('runtime'),
                        'spoken_languages': ', '.join([lang.get('name') for lang in response.get('spoken_languages')]),
                        'status': response.get('status'),
                        'tagline': response.get('tagline'),
                        'homepage': response.get('homepage'),
                        'popularity': response.get('popularity'),
                        'release_date': response.get('release_date'),
                        'overview': response.get('overview'),
                        'vote_count': response.get('vote_count'),
                        'vote_average': response.get('vote_average'),
                        'keywords': ", ".join([keyword.get('name') for keyword in key.get('keywords')]),
                        'cast': ", ".join([cast.get("name") for cast in top_cast]),
                        'crew': ", ".join([crew.get("name") for crew in top_crew]),
                        'revenue': response.get('revenue')
                    }
                    # appending the rows to details_list
                    details_list.append(new_rows)
                except Exception as e:
                    logging.error(f'error in proccesing movie_id {movie_id};  {traceback.format_exc()}"')
        # creating the DataFrame using the details_list
        Details_data = pd.DataFrame(details_list)
        return Details_data


# Calling Main function

In [None]:
data = asyncio.run(main(2021))

In [None]:
data.head()

Unnamed: 0,id,imdb_id,title,original_language,original_title,origin_country,production_companies,production_countries,runtime_in_mins,spoken_languages,...,homepage,popularity,release_date,overview,vote_count,vote_average,keywords,cast,crew,revenue
0,438631,tt1160419,Dune,en,Dune,US,Legendary Pictures,United States of America,155,"普通话, English",...,https://www.dunemovie.com/,360.33,2021-09-15,"Paul Atreides, a brilliant and gifted young ma...",11856,7.782,"empire, future, epic, army, based on novel or ...","Timothée Chalamet, Rebecca Ferguson, Oscar Isa...","Denis Villeneuve, Frank Herbert, Thomas Tull, ...",407573628
1,580489,tt7097896,Venom: Let There Be Carnage,en,Venom: Let There Be Carnage,US,"Columbia Pictures, Pascal Pictures, Matt Tolma...",United States of America,97,"English, Español",...,https://www.venom.movie,240.59,2021-09-30,After finding a host body in investigative rep...,9708,6.799,"hero, psychopath, superhero, anti hero, villai...","Tom Hardy, Woody Harrelson, Michelle Williams,...","Avi Arad, Amy Pascal, Matt Tolmach, Robert Ric...",506900000
2,634649,tt10872600,Spider-Man: No Way Home,en,Spider-Man: No Way Home,US,"Marvel Studios, Pascal Pictures, Columbia Pict...",United States of America,148,"English,",...,https://www.sonypictures.com/movies/spidermann...,226.707,2021-12-15,Peter Parker is unmasked and no longer able to...,19470,8.0,"new york city, hero, showdown, magic, loss of ...","Tom Holland, Zendaya, Benedict Cumberbatch, Ja...","Amy Pascal, Kevin Feige, Jon Watts, Stan Lee, ...",1921847111
3,615457,tt7888964,Nobody,en,Nobody,US,"87North Productions, Odenkirk Provissiero Ente...",United States of America,92,"English, Pусский, Español",...,https://www.nobody.movie,157.498,2021-03-18,"Hutch Mansell, a suburban dad, overlooked husb...",6785,7.968,"assassin, double life, midlife crisis, bratva ...","Bob Odenkirk, Aleksey Serebryakov, Connie Niel...","Ilya Naishuller, Bob Odenkirk, Braden Aftergoo...",61779301
4,497698,tt3480822,Black Widow,en,Black Widow,US,Marvel Studios,United States of America,134,"English, Pусский",...,https://www.marvel.com/movies/black-widow,165.419,2021-07-07,"Natasha Romanoff, also known as Black Widow, c...",9853,7.254,"assassin, hero, spy, kgb, based on comic, fema...","Scarlett Johansson, Florence Pugh, Rachel Weis...","Kevin Feige, Louis D'Esposito, Victoria Alonso...",379751131


<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=8a1d676e-e27b-404d-bfd5-87d0888f3dfd' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>