In [1]:
from urllib.parse import quote_plus as parser
import requests
import time
import numpy as np
import pandas as pd
import csv
import warnings
warnings.filterwarnings("ignore")

In [3]:
def get_movie_info():
    # Create empty list to store iMDb ids
    imdb_ids = []

    # Initiate page number
    page_number = 1

    # Create the necessary variables for the request
    url = "https://data-imdb1.p.rapidapi.com/titles"
    query = {"info":"mini_info","limit":"50","page": page_number,"titleType":"movie","genre":"Action","year":"2020"}
    headers = {
        "X-RapidAPI-Host": "data-imdb1.p.rapidapi.com",
        "X-RapidAPI-Key": "" #insert your own key
    }

    # Package the request, send the request and catch the response for page number 1: response
    response = requests.request("GET", url, headers=headers, params=query)
    content = response.json()
    for container in content["results"]:
            imdb_ids.append(container["id"])

    # Catch the response for remaining pages
    while content["next"] is not None:  
        query = {"info":"mini_info","limit":"50","page":page_number+1,"titleType":"movie","genre":"Action","year":"2020"}
        response = requests.request("GET", url, headers=headers, params=query)
        content = response.json()
        for container in content["results"]:
                imdb_ids.append(container["id"])
        page_number += 1

    ######################################### 

    # Set a DataFrame and Lists to store information
    movie_info_df = pd.DataFrame(columns = ["movie_id", "title", "year_of_production", 
                                            "release_date","runtime",
                                            "genres","directors",
                                            "writers","actors",
                                            "plot", "languages", "countries_of_origin",
                                            "awards", "boxoffice","metascore", "imdb_votes",
                                            "imdb_rating" ])
    movie_info_row_index = 0



    for imdb_id in imdb_ids: 

        # Create the necessary variables for the request
        base_url = 'http://www.omdbapi.com/?'
        apikey = '' #insert your own key
        query = imdb_id
        url = base_url + "apikey=" + apikey + "&i=" + parser(query)

        # Package the request, send the request and catch the response: response
        response = requests.get(url)
        content = response.json()

        if (content["Response"] != "False"):
            #Store request response in variables
            movie_id = content["imdbID"]
            title = content["Title"]
            year_of_production = content["Year"]
            release_date = content["Released"]
            runtime = content["Runtime"]
            genres = content["Genre"]
            directors = content["Director"]
            writers = content["Writer"]
            actors = content["Actors"]
            plot = content["Plot"]
            languages = content["Language"]
            countries_of_origin = content["Country"]
            awards = content["Awards"]
            
            boxoffice = content["BoxOffice"].replace(",", "").replace("$", "")
            metascore = content["Metascore"]
            imdb_votes = content["imdbVotes"].replace(",", "")
            imdb_rating = content["imdbRating"]

        else: 
            continue

        # Append movie_info_row to DataFrame
        movie_info_row = [movie_id, title, year_of_production, 
                          release_date, runtime,
                          genres, directors, 
                          writers, actors,
                          plot, languages, countries_of_origin,
                          awards,boxoffice, metascore, imdb_votes,imdb_rating 
                         ]


        movie_info_df.loc[movie_info_row_index] = movie_info_row
        movie_info_row_index += 1

    return movie_info_df


In [4]:
movie_info_df = get_movie_info()
movie_info_df.to_csv('movie_info_df.csv')
movie_info_df

Unnamed: 0,movie_id,title,year_of_production,release_date,runtime,genres,directors,writers,actors,plot,languages,countries_of_origin,awards,boxoffice,metascore,imdb_votes,imdb_rating
0,tt0849437,Duel of Legends,2020,29 Aug 2020,,"Action, Drama, Sport",Hector Echavarria,"Brad Bode, Fuad C'Amanero, Hector Echavarria","Cary-Hiroyuki Tagawa, Quinton 'Rampage' Jackso...","1967 China, A young boy (Dax) is left to survi...",English,United States,,,,48,6.1
1,tt0926132,Road to Red,2020,03 Jan 2020,120 min,"Action, Adventure, Horror",Tito da Costa,"Larry Leahy, Tito da Costa","Sean Gray, Chris Blasman, Renee Dorian",A surf-skate action-thriller about five best f...,English,"United States, Portugal",,,,639,3.8
2,tt0983946,Fantasy Island,2020,14 Feb 2020,109 min,"Fantasy, Horror, Mystery",Jeff Wadlow,"Jeff Wadlow, Christopher Roach, Jillian Jacobs","Michael Peña, Maggie Q, Lucy Hale",When the owner and operator of a luxurious isl...,English,United States,5 nominations,27309289,22,48315,4.9
3,tt10011228,Hunted,2020,14 Jan 2021,87 min,"Action, Thriller",Vincent Paronnaud,"Vincent Paronnaud, Léa Pernollet, Stephen Shields","Lucie Debay, Arieh Worthalter, Ciaran O'Brien","""The company of wolves is better than that of ...",English,"Belgium, France, Ireland",2 nominations,,47,2049,5.3
4,tt10022990,Seeru,2020,07 Feb 2020,123 min,"Action, Drama","Rathina Shiva, Sami Rajalingam",Rathina Shiva,"Jiiva, Navdeep, Riya Suman",A guy from a small-town takes on a ruthless cr...,Tamil,India,,,,324,5.4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
947,tt18884646,Worlds Apart,2020,11 May 2020,45 min,"Animation, Action, Game-Show",David R.B.,,,War with an alien species known as the Droug h...,English,United States,,,,,
948,tt18924974,Into the Jimster Verse,2020,,,Action,James McGrath,James McGrath,James McGrath,The worlds of each series to ever exist on The...,English,United States,,,,,
949,tt18951988,Fractures 2,2020,10 Oct 2020,80 min,"Action, Adventure, Fantasy",Rain Olaguer,,,In a dystopian Overworld where Herobrine's und...,English,Philippines,,,,,
950,tt18954030,Jannat House,2020,22 Jun 2020,,Action,Parvez Alam,,"Rimjhim Das, Anushree Dutta, Payal Ghosh",,Hindi,India,,,,,


In [5]:
directors_list = []
directors_rows = list(set(movie_info_df['directors'].tolist()))

for row in directors_rows:
    row = row.split(", ")
    directors_list.extend(row)

actors_list = []
actors_rows = list(set(movie_info_df['actors'].tolist()))

for row in actors_rows:
    row = row.split(", ")
    actors_list.extend(row)

writers_list = []
writers_rows = list(set(movie_info_df['writers'].tolist()))

for row in writers_rows:
    row = row.split(", ")
    writers_list.extend(row)


In [6]:
len(set(directors_rows))

827

In [7]:
#TMDb API
def get_arist_info(artists_list):
    # Set a DataFrame and Lists to store information
    artist_info_df = pd.DataFrame(columns = ["name","popularity"])
    artist_info_row_index = 0


    for name in artists_list:
        base_url = 'https://api.themoviedb.org/3/search/person?'
        apikey = '' #insert your own key
        query = name
        url = base_url + "api_key=" + apikey + '&query=' + (query)


        # Package the request, send the request and catch the response: response
        response = requests.get(url)
        content = response.json()
        if query == 'N/A':
            continue
        elif content['total_results'] == 0:
            popularity = np.nan
        else:
            popularity = content['results'][0]['popularity']

        # Append movie_info_row to DataFrame
        artist_info_row = [query, popularity]          


        artist_info_df.loc[artist_info_row_index] = artist_info_row
        artist_info_row_index += 1

    return artist_info_df


In [11]:
director_info_df = get_arist_info(directors_list)
director_info_df.to_csv('director_info_df.csv')
director_info_df

Unnamed: 0,name,popularity
0,Yue Song,3.038
1,Ku Xing-Zhe,
2,Shekkhar Suri,
3,Ziwei Han,
4,Hirofumi Ogura,3.373
...,...,...
906,Marlon Hargrave,1.380
907,Billy Holden,0.600
908,Ronald Koeman,1.400
909,Andrea Pirlo,1.620


In [12]:
writer_info_df = get_arist_info(writers_list)
writer_info_df.to_csv('writer_info_df.csv')
writer_info_df

Unnamed: 0,name,popularity
0,Bejoy Nambiar,0.600
1,Anjali Nair,2.744
2,Kartik R. Iyer,0.600
3,Lior Chefetz,0.600
4,Joe Swanson,0.980
...,...,...
1130,Kevin Lukata,0.980
1131,R.L. Scott,2.066
1132,BC Fourteen,3.576
1133,Nicole M. Brevard (screenplay),


In [13]:
actor_info_df = get_arist_info(actors_list)
actor_info_df.to_csv('actor_info_df.csv')
actor_info_df

Unnamed: 0,name,popularity
0,Mel Rodriguez,5.681
1,Ryan Churchill,1.159
2,Joelle Carter,5.074
3,Biola Adebayo,
4,Biola Adekunle,
...,...,...
2533,Azhar Mohammed,
2534,Aishwarya Prashanth,
2535,Adam Corey,0.600
2536,Nicholas Lisitsin,0.600
