In [24]:
#https://www.kaggle.com/code/ursmaheshj/creating-dataset-using-tmdb-api
#https://www.kaggle.com/competitions/tmdb-box-office-prediction/overview
#https://www.kaggle.com/competitions/movie-votes-prediction/overview

In [25]:
import requests
from dotenv import load_dotenv
import os
import pandas as pd


from time import sleep
from datetime import datetime
from pathlib import Path

# Define API functions

In [26]:
def api_upcoming(token, page):

    # Define the path for env is it is in subfolder
    dotenv_path = Path('env/.env')
    load_dotenv(dotenv_path=dotenv_path)

    # Get the credit_id and api_key from environment variables
    bearer_token = os.getenv(token)


    headers = {
        "accept": "application/json",
        "Authorization": f"Bearer {bearer_token}"
    }

    params = {
        'language': 'en-US',
        #'append_to_response': 'credits',
        #'primary_release_date.gte':start_date,
        #'primary_release_date.lte': end_date,
        'page' : page
        }

    url = 'https://api.themoviedb.org/3/movie/upcoming'

    response = requests.get(url, headers=headers, params=params)

    return response
    

In [27]:
def api_credits(film_id, key):

    # Define the path for env is it is in subfolder
    dotenv_path = Path('env/.env')
    load_dotenv(dotenv_path=dotenv_path)

    # Get the credit_id and api_key from environment variables
    api_key = os.getenv(key)

    url = f'https://api.themoviedb.org/3/movie/{film_id}?api_key={api_key}&append_to_response=credits'

    response = requests.get(url)

    return response

In [28]:
def api_credits_loop(input_list: list, key: str, verbose=None) -> pd.DataFrame:

    # Create an empty DataFrame with the required columns
    df = pd.DataFrame(columns=[     'id',
                                    'budget',
                                    'production_companies', 
                                    'production_countries',
                                    'revenue', 
                                    'spoken_languages', 
                                    'genres', 
                                    'runtime',
                                    'tagline',
                                    'crew',
                                    'cast-name'
                                    ])

    counter_to_sleep = 0

    # Assuming df_upcoming is already defined and contains the 'id' column

    for item in input_list:

        try:
            response = api_credits(film_id=item, key=key)
            data = response.json()

        except Exception as err:
            print(f"An error occurred while processing the response: {err}")


        # Create the crew dictionary
        crew_dict = {i.get('job'): i.get('name') for i in data['credits']['crew']}


        # Create the new row with the crew dictionary
        new_row = {
            'id': item,
            'budget': data['budget'],
            'production_companies': [i.get('name') for i in data['production_companies']],
            'production_countries': [i.get('name') for i in data['production_countries']],
            'spoken_languages': [i.get('english_name') for i in data['spoken_languages']],
            'genres': [i.get('name') for i in data['genres']],
            'revenue': data['revenue'],
            'runtime': data['runtime'],
            'tagline': data['tagline'],
            'crew': crew_dict,
            'cast-name': [i.get('name') for i in data['credits']['cast']]
        }

        # Append the new row to the DataFrame
        df.loc[len(df)] = new_row

        counter_to_sleep += 1

        # Sleep after every certain number of iterations
        if counter_to_sleep % 40 == 0:
            sleep(1)


        if verbose ==True:    
            print(counter_to_sleep)
        
    return df

# Get data - Upcoming films

In [29]:
# We will first create an empty dataframe to store all the movie detail
df_upcoming = pd.DataFrame()

page_counter = 1

while True:

        response = api_upcoming(        token='BEARER_TOKEN', 
                                        page=page_counter
                                        )
        

        temporary_df = pd.DataFrame(response.json()['results'])

        if not temporary_df.empty:
                df_upcoming = pd.concat([df_upcoming, temporary_df],ignore_index=True)
       
        else:
                break

        # Increment the page number and sleep counter
        page_counter += 1
        
        # Sleep after every certain number of iterations
        if page_counter%40==0: sleep(1)

In [30]:
df_upcoming.head(1)

Unnamed: 0,adult,backdrop_path,genre_ids,id,original_language,original_title,overview,popularity,poster_path,release_date,title,video,vote_average,vote_count
0,False,/ga4OLm4qLxPqKLMzjJlqHxVjst3.jpg,"[28, 80, 53, 35]",573435,en,Bad Boys: Ride or Die,"After their late former Captain is framed, Low...",2585.897,/nP6RliHjxsz4irTKsxe8FRhKZYl.jpg,2024-06-05,Bad Boys: Ride or Die,False,7.2,172


In [31]:
df_upcoming.shape

(719, 14)

In [32]:
df_upcoming.to_csv('output/df_upcoming.csv')

# Get data - Credits

### Get list if ids

In [33]:
# Get the list of ids needed for API credits
id_list = df_upcoming['id'].to_list()
id_list[:5]

[573435, 1022789, 974635, 882059, 748783]

### Call function

In [34]:
df_credits = api_credits_loop(      input_list = id_list,
                                    key='API_KEY',
                    )
df_credits

Unnamed: 0,id,budget,production_companies,production_countries,revenue,spoken_languages,genres,runtime,tagline,crew,cast-name
0,573435,80000000,"[Westbrook Studios, Columbia Pictures, Don Sim...",[United States of America],104600000,[English],"[Action, Crime, Thriller, Comedy]",115,Miami's finest are now its most wanted.,"{'Producer': 'Chad Oman', 'Characters': 'Georg...","[Will Smith, Martin Lawrence, Vanessa Hudgens,..."
1,1022789,200000000,"[Pixar, Walt Disney Pictures]",[United States of America],0,[English],"[Animation, Family, Drama, Adventure, Comedy]",97,Make room for new emotions.,"{'Director': 'Kelsey Mann', 'Producer': 'Mark ...","[Amy Poehler, Maya Hawke, Kensington Tallman, ..."
2,974635,8800000,"[Aggregate Films, BarnStorm Productions, Detou...",[United States of America],1139025,[English],"[Romance, Comedy, Crime]",116,"He's not a killer, but he can pretend.","{'Director': 'Richard Linklater', 'Producer': ...","[Glen Powell, Adria Arjona, Austin Amelio, Ret..."
3,882059,18000000,"[Vertigo Entertainment, Hammerstone Studios, N...",[United States of America],3139717,[English],"[Action, Thriller, Crime, Science Fiction]",110,,"{'Director': 'Moritz Mohr', 'Producer': 'Simon...","[Bill Skarsgård, Jessica Rothe, Michelle Docke..."
4,748783,60000000,"[Alcon Entertainment, Prime Focus, DNEG Animat...","[Hong Kong, India, United Kingdom, United Stat...",192713000,[English],"[Animation, Comedy, Family, Adventure]",101,Indoor cat. Outdoor adventure.,"{'Storyboard Artist': 'Bob Scott', 'Production...","[Chris Pratt, Samuel L. Jackson, Hannah Waddin..."
...,...,...,...,...,...,...,...,...,...,...,...
714,1303677,0,[Oz Produtora],[Brazil],0,[Portuguese],[Animation],4,,"{'Director': 'Hygor Amorim', 'Producer': 'Aman...",[]
715,1303655,0,[],[],0,[],[Documentary],0,,{},[]
716,1303649,0,[],[],0,[],[],37,,{'Director': 'Nataliya Ilchuk'},[Khrystyna Savchuk]
717,1303578,0,[],[],0,[German],[],0,,{},[]


In [35]:
df_credits.head(5)

Unnamed: 0,id,budget,production_companies,production_countries,revenue,spoken_languages,genres,runtime,tagline,crew,cast-name
0,573435,80000000,"[Westbrook Studios, Columbia Pictures, Don Sim...",[United States of America],104600000,[English],"[Action, Crime, Thriller, Comedy]",115,Miami's finest are now its most wanted.,"{'Producer': 'Chad Oman', 'Characters': 'Georg...","[Will Smith, Martin Lawrence, Vanessa Hudgens,..."
1,1022789,200000000,"[Pixar, Walt Disney Pictures]",[United States of America],0,[English],"[Animation, Family, Drama, Adventure, Comedy]",97,Make room for new emotions.,"{'Director': 'Kelsey Mann', 'Producer': 'Mark ...","[Amy Poehler, Maya Hawke, Kensington Tallman, ..."
2,974635,8800000,"[Aggregate Films, BarnStorm Productions, Detou...",[United States of America],1139025,[English],"[Romance, Comedy, Crime]",116,"He's not a killer, but he can pretend.","{'Director': 'Richard Linklater', 'Producer': ...","[Glen Powell, Adria Arjona, Austin Amelio, Ret..."
3,882059,18000000,"[Vertigo Entertainment, Hammerstone Studios, N...",[United States of America],3139717,[English],"[Action, Thriller, Crime, Science Fiction]",110,,"{'Director': 'Moritz Mohr', 'Producer': 'Simon...","[Bill Skarsgård, Jessica Rothe, Michelle Docke..."
4,748783,60000000,"[Alcon Entertainment, Prime Focus, DNEG Animat...","[Hong Kong, India, United Kingdom, United Stat...",192713000,[English],"[Animation, Comedy, Family, Adventure]",101,Indoor cat. Outdoor adventure.,"{'Storyboard Artist': 'Bob Scott', 'Production...","[Chris Pratt, Samuel L. Jackson, Hannah Waddin..."


In [36]:
df_credits.shape

(719, 11)

# Save data

In [37]:
df_credits.to_csv('output/df_credits.csv', index=False)