# Imports

In [1]:
import os, time, json
import tmdbsimple as tmdb
from tqdm.notebook import tqdm_notebook

# Standard imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Define Functions

In [2]:
# adapting movie certification to movie.info()
def get_movie_with_rating(movie_id):
    """Adapted from source = https://github.com/celiao/tmdbsimple"""
    # Get the movie object for the current id
    movie = tmdb.Movies(movie_id)
    # save the .info .releases dictionaries
    info = movie.info()
    releases = movie.releases()
    # Loop through countries in releases
    for c in releases['countries']:
        # if the country abbreviation==US
        if c['iso_3166_1' ] =='US':
            ## save a "certification" key in the info dict with the certification
           info['certification'] = c['certification']
    return info

In [3]:
def write_json(new_data, filename): 
    """Appends a list of records (new_data) to a json file (filename). 
    Adapted from: https://www.geeksforgeeks.org/append-to-json-file-using-python/"""  
    
    with open(filename,'r+') as file:
        # First we load existing data into a dict.
        file_data = json.load(file)
        ## Choose extend or append
        if (type(new_data) == list) & (type(file_data) == list):
            file_data.extend(new_data)
        else:
             file_data.append(new_data)
        # Sets file's current position at offset.
        file.seek(0)
        # convert back to json.
        json.dump(file_data, file)

# Login with API Credentials

In [4]:
with open('/Users/joshl/.secret/tmdb_api.json', 'r') as f:
    login = json.load(f)
# Use api-key to login
tmdb.API_KEY =  login['api-key']

# Designate a Folder for API Call Data

In [6]:
FOLDER = "Data/"
os.makedirs(FOLDER, exist_ok=True)
os.listdir(FOLDER)

['final_tmdb_data_2000.csv.gz',
 'final_tmdb_data_2001.csv.gz',
 'title_akas.csv.gz',
 'title_basics.csv.gz',
 'title_ratings.csv.gz',
 'tmdb_api_results_2000.json',
 'tmdb_api_results_2001.json',
 'tmdb_results_combined.csv.gz']

# Load in the Title Basics Data
This data will need to be loaded and ready to be filtered for the years of the data I am looking for. For part 4 of this project, I will be calling data for the 5 years prior to the pandemic when the entertainment business was very successful.

In [7]:
# Load in the dataframe from project part 1 as basics:
basics = pd.read_csv('Data/title_basics.csv.gz')
basics.head()

Unnamed: 0,tconst,titleType,primaryTitle,originalTitle,isAdult,startYear,endYear,runtimeMinutes,genres
0,tt0035423,movie,Kate & Leopold,Kate & Leopold,0,2001,,118,"Comedy,Fantasy,Romance"
1,tt0062336,movie,The Tango of the Widower and Its Distorting Mi...,El tango del viudo y su espejo deformante,0,2020,,70,Drama
2,tt0069049,movie,The Other Side of the Wind,The Other Side of the Wind,0,2018,,122,Drama
3,tt0088751,movie,The Naked Monster,The Naked Monster,0,2005,,100,"Comedy,Horror,Sci-Fi"
4,tt0096056,movie,Crime and Punishment,Crime and Punishment,0,2002,,126,Drama


# Creating Lists for the Loops
There are 2 lists that I want to create. One list will be for the years that I will be getting the API call data for. The other will contain any errors encountered by the loops. The loop will continue despite running into any errors, but those errors will be compiled into a list to observe after the loops have completed.

In [8]:
# List for years
YEARS_TO_GET = [2015,2016,2017,2018,2019]

In [9]:
# Creating list to store errors
errors = []

In [10]:
# Start of OUTER loop
for YEAR in tqdm_notebook(YEARS_TO_GET, desc='YEARS', position=0):
    #Defining the JSON file to store results for year
    JSON_FILE = f'{FOLDER}tmdb_api_results_{YEAR}.json'
    # Check if file exists
    file_exists = os.path.isfile(JSON_FILE)
    # If it does not exist: create it
    if file_exists == False:
    # save an empty dict with just "imdb_id" to the new json file.
        with open(JSON_FILE,'w') as f:
            json.dump([{'imdb_id':0}],f)
    #Saving new year as the current df
    df = basics.loc[ basics['startYear']==YEAR].copy()
    # saving movie ids to list
    movie_ids = df['tconst'].copy()
    # Load existing data from json into a dataframe called "previous_df"
    previous_df = pd.read_json(JSON_FILE)
    # filter out any ids that are already in the JSON_FILE
    movie_ids_to_get = movie_ids[~movie_ids.isin(previous_df['imdb_id'])]
        #Get index and movie id from list
    # INNER Loop
    for movie_id in tqdm_notebook(movie_ids_to_get,
                                  desc=f'Movies from {YEAR}',
                                  position=1,
                                  leave=True):
        try:
            # Retrieve then data for the movie id
            temp = get_movie_with_rating(movie_id)  
            # Append/extend results to existing file using a pre-made function
            write_json(temp,JSON_FILE)
            # Short 20 ms sleep to prevent overwhelming server
            time.sleep(0.02)
            
        except Exception as e:
            errors.append([movie_id, e])
    
    final_year_df = pd.read_json(JSON_FILE)
    final_year_df.to_csv(f"{FOLDER}final_tmdb_data_{YEAR}.csv.gz", compression="gzip", index=False)

YEARS:   0%|          | 0/5 [00:00<?, ?it/s]

Movies from 2015:   0%|          | 0/5015 [00:00<?, ?it/s]

Movies from 2016:   0%|          | 0/5216 [00:00<?, ?it/s]

Movies from 2017:   0%|          | 0/5587 [00:00<?, ?it/s]

Movies from 2018:   0%|          | 0/5710 [00:00<?, ?it/s]

Movies from 2019:   0%|          | 0/5818 [00:00<?, ?it/s]

# Find the Movies that Caused Errors

In [11]:
print(f"- Total errors: {len(errors)}")

- Total errors: 5718


# Confirming All Years Saved to csv.gz File

In [12]:
df_2015 = pd.read_csv('Data/final_tmdb_data_2015.csv.gz')
df_2015.head()

Unnamed: 0,imdb_id,adult,backdrop_path,belongs_to_collection,budget,genres,homepage,id,original_language,original_title,...,revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count,certification
0,0,,,,,,,,,,...,,,,,,,,,,
1,tt0369610,0.0,/aIGIYJTyOkEVUmEd3z5x6diYsFx.jpg,"{'id': 328, 'name': 'Jurassic Park Collection'...",150000000.0,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...",http://www.jurassicworld.com/,135397.0,en,Jurassic World,...,1671537000.0,124.0,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,The park is open.,Jurassic World,0.0,6.679,18891.0,PG-13
2,tt0420293,0.0,/b0p0OPx1ZDZVc8iDB1kle0Cc2H9.jpg,,0.0,"[{'id': 53, 'name': 'Thriller'}]",http://www.stanfordprisonexperimentfilm.com/,308032.0,en,The Stanford Prison Experiment,...,643557.0,122.0,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,They were given 2 weeks. It lasted 6 days.,The Stanford Prison Experiment,0.0,6.9,835.0,R
3,tt0462335,0.0,/wESuRMxELPAwo56qFRcoyI4p20F.jpg,,0.0,"[{'id': 18, 'name': 'Drama'}]",http://www.highrisefilm.com,254302.0,en,High-Rise,...,346472.0,119.0,"[{'english_name': 'French', 'iso_639_1': 'fr',...",Released,Leave the real world behind,High-Rise,0.0,5.7,1025.0,R
4,tt0478970,0.0,/a7sAqMKv5tkAdMzFfIhPqIBmQ9g.jpg,"{'id': 422834, 'name': 'Ant-Man Collection', '...",130000000.0,"[{'id': 878, 'name': 'Science Fiction'}, {'id'...",http://marvel.com/movies/movie/180/ant-man,102899.0,en,Ant-Man,...,519312000.0,117.0,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,Heroes don't get any bigger.,Ant-Man,0.0,7.083,18094.0,PG-13


In [13]:
df_2016 = pd.read_csv('Data/final_tmdb_data_2016.csv.gz')
df_2016.head()

Unnamed: 0,imdb_id,adult,backdrop_path,belongs_to_collection,budget,genres,homepage,id,original_language,original_title,...,revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count,certification
0,0,,,,,,,,,,...,,,,,,,,,,
1,tt0315642,0.0,/lD6lYulY6js9EAgUufh2ekNnCR4.jpg,,5200000.0,"[{'id': 53, 'name': 'Thriller'}, {'id': 28, 'n...",,275269.0,hi,वज़ीर,...,9200000.0,103.0,"[{'english_name': 'Hindi', 'iso_639_1': 'hi', ...",Released,This new year make your best move,Wazir,0.0,6.689,106.0,PG-13
2,tt0376479,0.0,/fDxdfM4Qj2Qhc6EZYPyB3k4xatw.jpg,,0.0,"[{'id': 80, 'name': 'Crime'}, {'id': 18, 'name...",http://www.americanpastoral.movie/,326285.0,en,American Pastoral,...,0.0,108.0,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,A radically ordinary story.,American Pastoral,0.0,6.1,617.0,R
3,tt0443533,0.0,/iU9ij6jSQOJDPFnqBUPzEY2eOs6.jpg,,20000000.0,"[{'id': 18, 'name': 'Drama'}, {'id': 10749, 'n...",,410366.0,en,The History of Love,...,0.0,134.0,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,,The History of Love,0.0,6.5,80.0,
4,tt0470936,0.0,,,0.0,[],,532608.0,hy,"Tak erkir, tsurt dzmer",...,0.0,104.0,"[{'english_name': 'Armenian', 'iso_639_1': 'hy...",Released,,"Hot Country, Cold Winter",0.0,0.0,0.0,


In [14]:
df_2017 = pd.read_csv('Data/final_tmdb_data_2017.csv.gz')
df_2017.head()

Unnamed: 0,imdb_id,adult,backdrop_path,belongs_to_collection,budget,genres,homepage,id,original_language,original_title,...,revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count,certification
0,0,,,,,,,,,,...,,,,,,,,,,
1,tt0100275,0.0,/2S2XTeS2b0LPnqzXBDD3ZmwPF2X.jpg,,0.0,"[{'id': 14, 'name': 'Fantasy'}, {'id': 35, 'na...",,467254.0,es,La telenovela errante,...,0.0,80.0,"[{'english_name': 'Spanish', 'iso_639_1': 'es'...",Released,,The Wandering Soap Opera,0.0,6.6,12.0,
2,tt0137204,0.0,/92PDk4xvjTdGXrQYF0QOojK2btM.jpg,,0.0,"[{'id': 12, 'name': 'Adventure'}, {'id': 16, '...",,503326.0,en,Joe Finds Grace,...,0.0,83.0,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,A GLOVE STORY,Joe Finds Grace,0.0,10.0,1.0,
3,tt0331314,0.0,/BiDHVlIs5Pm2uSqd2e2n7yT582.jpg,,0.0,"[{'id': 12, 'name': 'Adventure'}, {'id': 16, '...",,435995.0,en,Bunyan and Babe,...,0.0,84.0,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,You don't have to BE BIG to do BIG THINGS.,Bunyan and Babe,0.0,6.0,18.0,PG
4,tt0339736,0.0,/lobIr3Fq2buOUxXtursuEJEjfri.jpg,,4000000.0,"[{'id': 27, 'name': 'Horror'}]",,444193.0,en,The Evil Within,...,0.0,98.0,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,You Can't Run From a Nightmare,The Evil Within,0.0,5.8,78.0,


In [15]:
df_2018 = pd.read_csv('Data/final_tmdb_data_2018.csv.gz')
df_2018.head()

Unnamed: 0,imdb_id,adult,backdrop_path,belongs_to_collection,budget,genres,homepage,id,original_language,original_title,...,revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count,certification
0,0,,,,,,,,,,...,,,,,,,,,,
1,tt0069049,0.0,/wXUcSJG6dqNKgIRgqYqX98UA1wz.jpg,,12000000.0,"[{'id': 18, 'name': 'Drama'}]",https://www.netflix.com/title/80085566,299782.0,en,The Other Side of the Wind,...,0.0,122.0,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,40 years in the making.,The Other Side of the Wind,0.0,6.66,172.0,R
2,tt0192528,0.0,/gkBxjhtVEqgxOiRwRfWw1j7ez6K.jpg,,5000000.0,"[{'id': 18, 'name': 'Drama'}]",,567662.0,en,Reverse Heaven,...,0.0,104.0,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,Heaven's in trouble and there's one man who ca...,Heaven & Hell,0.0,6.3,6.0,
3,tt0276568,0.0,,,0.0,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...",https://networkonair.com/all-products/2806-man...,1057586.0,en,To Chase A Million,...,0.0,98.0,[],Released,A key to a fortune... a key to death!,To Chase A Million,0.0,0.0,0.0,
4,tt0360556,0.0,/7oy4miyq4WYYy0xtX6lbNVPrEsr.jpg,,0.0,"[{'id': 18, 'name': 'Drama'}, {'id': 878, 'nam...",https://www.hbo.com/movies/fahrenheit-451,401905.0,en,Fahrenheit 451,...,0.0,100.0,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,Knowledge is a dangerous thing,Fahrenheit 451,0.0,5.42,740.0,PG-13


In [16]:
df_2019 = pd.read_csv('Data/final_tmdb_data_2019.csv.gz')
df_2019.head()

Unnamed: 0,imdb_id,adult,backdrop_path,belongs_to_collection,budget,genres,homepage,id,original_language,original_title,...,revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count,certification
0,0,,,,,,,,,,...,,,,,,,,,,
1,tt0385887,0.0,/xMHeU84wLQ9uH9acJ0oPgdii9wO.jpg,,26000000.0,"[{'id': 18, 'name': 'Drama'}, {'id': 53, 'name...",https://www.motherlessbrooklynfilm.com,504562.0,en,Motherless Brooklyn,...,18377736.0,145.0,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,,Motherless Brooklyn,0.0,6.719,1251.0,R
2,tt0437086,0.0,/8RKBHHRqOMOLh5qW3sS6TSFTd8h.jpg,,170000000.0,"[{'id': 28, 'name': 'Action'}, {'id': 878, 'na...",https://family.20thcenturystudios.com/movies/a...,399579.0,en,Alita: Battle Angel,...,404980543.0,122.0,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,An angel falls. A warrior rises.,Alita: Battle Angel,0.0,7.21,8062.0,PG-13
3,tt0441881,0.0,/7uacpbrJd9g9xSTLgfyg1NwkAiu.jpg,,23934823.0,"[{'id': 10752, 'name': 'War'}, {'id': 28, 'nam...",http://dangerclosemovie.com,508664.0,en,Danger Close: The Battle of Long Tan,...,2078370.0,119.0,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,Outnumbered. Outgunned. Never out of courage,Danger Close: The Battle of Long Tan,0.0,6.875,259.0,R
4,tt0448115,0.0,/OIGX2lm5tmlCKvZUghtwHzoxxO.jpg,"{'id': 724848, 'name': 'Shazam! Collection', '...",80000000.0,"[{'id': 28, 'name': 'Action'}, {'id': 35, 'nam...",http://www.shazammovie.com,287947.0,en,Shazam!,...,366080049.0,132.0,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,Just say the word.,Shazam!,0.0,7.0,8280.0,PG-13
