### API Calls

In [9]:
# imports
import pandas as pd
import numpy as np
import os, time, json
import tmdbsimple as tmdb
from tqdm.notebook import tqdm_notebook

In [10]:
# setting credentials 
with open('/Users/laffe/.secret/tmdb_api.json', 'r') as f:
    login = json.load(f)
    
# setting API key variable
tmdb.API_KEY = login['api_key']

In [11]:
# checking folder creation and what is located inside
FOLDER = "Data/"
os.makedirs(FOLDER, exist_ok = True)
os.listdir(FOLDER)

['.ipynb_checkpoints',
 'akas.csv.gz',
 'basics.csv.gz',
 'final_tmbd_data_2000.csv.gz',
 'final_tmbd_data_2001.csv.gz',
 'movie_database.sql',
 'number_movie_per_rating.png',
 'ratings.csv.gz',
 'revenue_per_rating.png',
 'tmdb_api_results_for_2000.json',
 'tmdb_api_results_for_2001.json',
 'tmdb_results_combined.csv.gz']

### Functions

In [12]:
# creating a function to get the movie rating
def get_movie_details(movie_id):
    # using the provided movie id to query the tmdb api
    movie = tmdb.Movies(movie_id)
    # store the information captured
    info = movie.info()
    releases = movie.releases()
    # look over the countries in the releases dictionary
    for c in releases['countries']:
        if c['iso_3166_1'] == 'US':
            # create a new key in the info dictionary with the rating
            info['rating'] = c['certification']
    
    # only selecting the items needed from the dictionary
    movie_details = {}       
    for data in ['title', 'revenue', 'budget', 'rating', 'imdb_id']:
        movie_details[data] = info[data]
        
    return movie_details

In [13]:
# creating a function to write new data to a json file
def write_json(input_data, filename):
    with open(filename, 'r+') as f:
        # load data we currently have
        file_data = json.load(f)
        # extend data already written or write for the first time
        if (type(input_data) == list) & (type(file_data) == list):
            file_data.extend(input_data)
        else:
            file_data.append(input_data)
        # sets the files current position at offset
        f.seek(0)
        # converting the file back to json
        json.dump(file_data, f)

### Creating Queries for selected years

In [14]:
# limiting number of years
#YEARS = [2000, 2001]
# creating year list from 2000 to 2022
YEARS = list(range(2000, 2023, 1))

In [15]:
# for loop for cycling years
for year in tqdm_notebook(YEARS, desc = 'YEARS', position = 0):
    # creating unique name for each year
    JSON_FILE = f'{FOLDER}tmdb_api_results_for_{year}.json'
    # checking for file and making if it hasn't been made yet
    file_exists = os.path.isfile(JSON_FILE)
    # if it does exist:
    if file_exists == False:

        # response to user
        print(f"[i] {JSON_FILE} not found. Saving empty list to file.")
    
        # save the empty dict with imdb_id key
        with open(JSON_FILE, 'w') as f:
            json.dump([{'imdb_id':0}],f)
    # if it exits, inform user
    else:
        print(f"[i] {JSON_FILE} already exists.")
        
    # loading in the basics file
    basics = pd.read_csv('Data/basics.csv.gz')
    # grab the data based on the year
    df = basics.loc[basics['startYear'] == year].copy()
    # getting the movie ids and saving to a list
    movie_ids = df['tconst'].copy()#.to_list()
    
    # loading any existing data
    previous_df = pd.read_json(JSON_FILE)
    # filter out any movie ids that may already be in the file
    movie_ids_needed = movie_ids[~movie_ids.isin(previous_df['imdb_id'])]
    
    # getting the movie id and index from list
    for movie_id in tqdm_notebook(movie_ids_needed, 
                                  desc = f'Movies from {year}',
                                 position = 1,
                                 leave = True):
        try:
            # using function to call api and get details from specific movie
            film = get_movie_details(movie_id)
            # using function to write new data to json file
            write_json(film, JSON_FILE)
            # time inbetween calls with tqdm
            time.sleep(0.02)
        
        except Exception as e:
            continue
            
    # saving the final file to csv
    final_year_df = pd.read_json(JSON_FILE)
    final_year_df.to_csv(f"{FOLDER}final_tmbd_data_{year}.csv.gz",
                         compression = "gzip",
                        index = False)
            

YEARS:   0%|          | 0/23 [00:00<?, ?it/s]

[i] Data/tmdb_api_results_for_2000.json already exists.


Movies from 2000:   0%|          | 0/621 [00:00<?, ?it/s]

[i] Data/tmdb_api_results_for_2001.json already exists.


Movies from 2001:   0%|          | 0/668 [00:00<?, ?it/s]

[i] Data/tmdb_api_results_for_2002.json not found. Saving empty list to file.


Movies from 2002:   0%|          | 0/1515 [00:00<?, ?it/s]

[i] Data/tmdb_api_results_for_2003.json not found. Saving empty list to file.


Movies from 2003:   0%|          | 0/1637 [00:00<?, ?it/s]

[i] Data/tmdb_api_results_for_2004.json not found. Saving empty list to file.


Movies from 2004:   0%|          | 0/1836 [00:00<?, ?it/s]

[i] Data/tmdb_api_results_for_2005.json not found. Saving empty list to file.


Movies from 2005:   0%|          | 0/2132 [00:00<?, ?it/s]

[i] Data/tmdb_api_results_for_2006.json not found. Saving empty list to file.


Movies from 2006:   0%|          | 0/2351 [00:00<?, ?it/s]

[i] Data/tmdb_api_results_for_2007.json not found. Saving empty list to file.


Movies from 2007:   0%|          | 0/2488 [00:00<?, ?it/s]

[i] Data/tmdb_api_results_for_2008.json not found. Saving empty list to file.


Movies from 2008:   0%|          | 0/2834 [00:00<?, ?it/s]

[i] Data/tmdb_api_results_for_2009.json not found. Saving empty list to file.


Movies from 2009:   0%|          | 0/3456 [00:00<?, ?it/s]

[i] Data/tmdb_api_results_for_2010.json not found. Saving empty list to file.


Movies from 2010:   0%|          | 0/3759 [00:00<?, ?it/s]

[i] Data/tmdb_api_results_for_2011.json not found. Saving empty list to file.


Movies from 2011:   0%|          | 0/4140 [00:00<?, ?it/s]

[i] Data/tmdb_api_results_for_2012.json not found. Saving empty list to file.


Movies from 2012:   0%|          | 0/4434 [00:00<?, ?it/s]

[i] Data/tmdb_api_results_for_2013.json not found. Saving empty list to file.


Movies from 2013:   0%|          | 0/4635 [00:00<?, ?it/s]

[i] Data/tmdb_api_results_for_2014.json not found. Saving empty list to file.


Movies from 2014:   0%|          | 0/4787 [00:00<?, ?it/s]

[i] Data/tmdb_api_results_for_2015.json not found. Saving empty list to file.


Movies from 2015:   0%|          | 0/4941 [00:00<?, ?it/s]

[i] Data/tmdb_api_results_for_2016.json not found. Saving empty list to file.


Movies from 2016:   0%|          | 0/5164 [00:00<?, ?it/s]

[i] Data/tmdb_api_results_for_2017.json not found. Saving empty list to file.


Movies from 2017:   0%|          | 0/5523 [00:00<?, ?it/s]

[i] Data/tmdb_api_results_for_2018.json not found. Saving empty list to file.


Movies from 2018:   0%|          | 0/5649 [00:00<?, ?it/s]

[i] Data/tmdb_api_results_for_2019.json not found. Saving empty list to file.


Movies from 2019:   0%|          | 0/5692 [00:00<?, ?it/s]

[i] Data/tmdb_api_results_for_2020.json not found. Saving empty list to file.


Movies from 2020:   0%|          | 0/4771 [00:00<?, ?it/s]

[i] Data/tmdb_api_results_for_2021.json not found. Saving empty list to file.


Movies from 2021:   0%|          | 0/4789 [00:00<?, ?it/s]

[i] Data/tmdb_api_results_for_2022.json not found. Saving empty list to file.


Movies from 2022:   0%|          | 0/2292 [00:00<?, ?it/s]