### API Calls

In [2]:
# imports
import pandas as pd
import numpy as np
import os, time, json
import tmdbsimple as tmdb
from tqdm.notebook import tqdm_notebook

In [3]:
# setting credentials 
with open('/Users/laffe/.secret/tmdb_api.json', 'r') as f:
    login = json.load(f)
    
# setting API key variable
tmdb.API_KEY = login['api_key']

In [4]:
# checking folder creation and what is located inside
FOLDER = "Data/"
os.makedirs(FOLDER, exist_ok = True)
os.listdir(FOLDER)

['.ipynb_checkpoints',
 'akas.csv.gz',
 'basics.csv.gz',
 'final_tmbd_data_2000.csv.gz',
 'final_tmbd_data_2001.csv.gz',
 'ratings.csv.gz',
 'tmdb_api_results_for_2000.json',
 'tmdb_api_results_for_2001.json']

### Functions

In [5]:
# creating a function to get the movie rating
def get_movie_details(movie_id):
    # using the provided movie id to query the tmdb api
    movie = tmdb.Movies(movie_id)
    # store the information captured
    info = movie.info()
    releases = movie.releases()
    # look over the countries in the releases dictionary
    for c in releases['countries']:
        if c['iso_3166_1'] == 'US':
            # create a new key in the info dictionary with the rating
            info['rating'] = c['certification']
    
    # only selecting the items needed from the dictionary
    movie_details = {}       
    for data in ['title', 'revenue', 'budget', 'rating', 'imdb_id']:
        movie_details[data] = info[data]
        
    return movie_details

In [6]:
# creating a function to write new data to a json file
def write_json(input_data, filename):
    with open(filename, 'r+') as f:
        # load data we currently have
        file_data = json.load(f)
        # extend data already written or write for the first time
        if (type(input_data) == list) & (type(file_data) == list):
            file_data.extend(input_data)
        else:
            file_data.append(input_data)
        # sets the files current position at offset
        f.seek(0)
        # converting the file back to json
        json.dump(file_data, f)

### Creating Queries for selected years

In [7]:
# limiting number of years
YEARS = [2000, 2001]

In [8]:
# for loop for cycling years
for year in tqdm_notebook(YEARS, desc = 'YEARS', position = 0):
    # creating unique name for each year
    JSON_FILE = f'{FOLDER}tmdb_api_results_for_{year}.json'
    # checking for file and making if it hasn't been made yet
    file_exists = os.path.isfile(JSON_FILE)
    # if it does exist:
    if file_exists == False:

        # response to user
        print(f"[i] {JSON_FILE} not found. Saving empty list to file.")
    
        # save the empty dict with imdb_id key
        with open(JSON_FILE, 'w') as f:
            json.dump([{'imdb_id':0}],f)
    # if it exits, inform user
    else:
        print(f"[i] {JSON_FILE} already exists.")
        
    # loading in the basics file
    basics = pd.read_csv('Data/basics.csv.gz')
    # grab the data based on the year
    df = basics.loc[basics['startYear'] == year].copy()
    # getting the movie ids and saving to a list
    movie_ids = df['tconst'].copy()#.to_list()
    
    # loading any existing data
    previous_df = pd.read_json(JSON_FILE)
    # filter out any movie ids that may already be in the file
    movie_ids_needed = movie_ids[~movie_ids.isin(previous_df['imdb_id'])]
    
    # getting the movie id and index from list
    for movie_id in tqdm_notebook(movie_ids_needed, 
                                  desc = f'Movies from {year}',
                                 position = 1,
                                 leave = True):
        try:
            # using function to call api and get details from specific movie
            film = get_movie_details(movie_id)
            # using function to write new data to json file
            write_json(film, JSON_FILE)
            # time inbetween calls with tqdm
            time.sleep(0.02)
        
        except Exception as e:
            continue
            
    # saving the final file to csv
    final_year_df = pd.read_json(JSON_FILE)
    final_year_df.to_csv(f"{FOLDER}final_tmbd_data_{year}.csv.gz",
                         compression = "gzip",
                        index = False)
            

YEARS:   0%|          | 0/2 [00:00<?, ?it/s]

[i] Data/tmdb_api_results_for_2000.json already exists.


Movies from 2000:   0%|          | 0/1409 [00:00<?, ?it/s]

[i] Data/tmdb_api_results_for_2001.json already exists.


Movies from 2001:   0%|          | 0/1525 [00:00<?, ?it/s]