In [32]:
import requests
import os
import json
import backoff

headers = {
    "accept": "application/json",
    "Authorization": f"Bearer {os.getenv('API_TOKEN')}"
}

#@backoff.on_exception(backoff.expo,
#                      requests.exceptions.RequestException,
#                      max_tries=10)
def call_get(url):
    response = requests.get(url, headers=headers)
    response.raise_for_status()  # This will raise a HTTPError for bad responses (4xx and 5xx)
    #print(response.json())
    return response.json()

def get_latest_movie():

    url = "https://api.themoviedb.org/3/movie/latest"
    return call_get(url)


def get_movie_credits_by_id(id):
    url = f"https://api.themoviedb.org/3/movie/{id}/credits?language=en-US"
    try:
        return call_get(url)
    except requests.exceptions.HTTPError as e:
        if e.response.status_code == 404:
            try:
                error_response = e.response.json()
                if error_response.get('status_code') == 34:
                    print(f"No credits found for ID: {id}")
                    return None
                
            except ValueError:
                print(f"Received unexpected response: {e.response.text}")
        else:
            print(f"An error occurred: {e}")
            raise e


def get_movie_by_id(id):
    url = f"https://api.themoviedb.org/3/movie/{id}?language=en-US"
    try:
        return call_get(url)
    except requests.exceptions.HTTPError as e:
        if e.response.status_code == 404:
            try:
                error_response = e.response.json()
                if error_response.get('status_code') == 34:
                    print(f"No movie found for ID: {id}")
                    return None
                
            except ValueError:
                print(f"Received unexpected response: {e.response.text}")
        elif e.response.status_code == 429:
            print("Time out. Waiting for 10 seconds")
            #TODO: move it to call_get add time sleep
        else:
            print(f"An error occurred: {e}")
            raise e

def get_movie_from_omdb(imdb_id, api_key=os.getenv('OMDB_KEY')):
    url = f"https://www.omdbapi.com/?i={imdb_id}&apikey={api_key}"
    response = requests.get(url)
    response.raise_for_status()  # This will raise a HTTPError for bad responses (4xx and 5xx)
    return response.json()

def fetch_all_movies(start_id, last_id):
    all_movies = [] 
    for id in range(start_id, last_id + 1):  # assuming IDs start at 0
        movie = get_movie_by_id(id)
        if movie is not None and movie["imdb_id"]:
            # check this movie in omdb
            try: 
                omdb_json = get_movie_from_omdb(movie["imdb_id"])
                #print(omdb_json)
                merged = {**movie, **omdb_json}
            except:
                print("no such movie in omdb")
            all_movies.append(merged)
    return all_movies

def fetch_all_credits(start_id, last_id):
    all_credits = [] 
    for id in range(start_id, last_id + 1):  # assuming IDs start at 0
        credit = get_movie_credits_by_id(id)
        if credit is not None:
            all_credits.append(credit)
    return all_credits


In [39]:
import pandas as pd
#last_movie = get_latest_movie()['id']
last_movie = 1600
all_movies = fetch_all_movies(1401, last_movie)


pd.set_option('display.max_columns', None)
df = pd.json_normalize(all_movies)
df.to_parquet(f"tmdb/movies_{last_movie}.parquet", compression='gzip')
df.head(1)


No movie found for ID: 1401
No movie found for ID: 1409
No movie found for ID: 1425
No movie found for ID: 1431
No movie found for ID: 1432
No movie found for ID: 1434
No movie found for ID: 1445
No movie found for ID: 1446
No movie found for ID: 1447
No movie found for ID: 1449
No movie found for ID: 1451
No movie found for ID: 1453
No movie found for ID: 1454
No movie found for ID: 1455
No movie found for ID: 1456
No movie found for ID: 1457
No movie found for ID: 1458
No movie found for ID: 1459
No movie found for ID: 1460
No movie found for ID: 1461
No movie found for ID: 1462
No movie found for ID: 1463
No movie found for ID: 1464
No movie found for ID: 1465
No movie found for ID: 1466
No movie found for ID: 1467
No movie found for ID: 1468
No movie found for ID: 1469
No movie found for ID: 1470
No movie found for ID: 1471
No movie found for ID: 1472
No movie found for ID: 1473
No movie found for ID: 1474
No movie found for ID: 1475
No movie found for ID: 1476
No movie found for I

Unnamed: 0,adult,backdrop_path,belongs_to_collection,budget,genres,homepage,id,imdb_id,original_language,original_title,overview,popularity,poster_path,production_companies,production_countries,release_date,revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count,Title,Year,Rated,Released,Runtime,Genre,Director,Writer,Actors,Plot,Language,Country,Awards,Poster,Ratings,Metascore,imdbRating,imdbVotes,imdbID,Type,DVD,BoxOffice,Production,Website,Response,belongs_to_collection.id,belongs_to_collection.name,belongs_to_collection.poster_path,belongs_to_collection.backdrop_path
0,False,/nKOQiWjhv6LXXSR3PiIab3LrKtU.jpg,,55000000,"[{'id': 18, 'name': 'Drama'}]",https://www.sonypictures.com/movies/thepursuit...,1402,tt0454921,en,The Pursuit of Happyness,A struggling salesman takes custody of his son...,45.24,/f6l9rghSHORkWLurUGJhaKAiyjY.jpg,"[{'id': 1423, 'logo_path': '/1rbAwGQzrNvXDICD6...","[{'iso_3166_1': 'US', 'name': 'United States o...",2006-12-14,307077295,117,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,,The Pursuit of Happyness,False,7.9,9172,The Pursuit of Happyness,2006,PG-13,15 Dec 2006,117 min,"Biography, Drama",Gabriele Muccino,Steve Conrad,"Will Smith, Thandiwe Newton, Jaden Smith",A struggling salesman takes custody of his son...,"English, Cantonese",United States,Nominated for 1 Oscar. 12 wins & 26 nomination...,https://m.media-amazon.com/images/M/MV5BMTQ5Nj...,"[{'Source': 'Internet Movie Database', 'Value'...",64,8.0,541055,tt0454921,movie,16 Apr 2012,"$163,566,459",,,True,,,,


In [40]:
import pandas as pd
#last_movie = get_latest_movie()['id']
last_movie = 1400
all_movies = fetch_all_credits(0, last_movie)


pd.set_option('display.max_columns', None)
df = pd.json_normalize(all_movies)
df.to_parquet(f"credits/credits_{last_movie}.parquet", compression='gzip')
df.head(1)

No credits found for ID: 0
No credits found for ID: 1
No credits found for ID: 4
No credits found for ID: 7
No credits found for ID: 10
No credits found for ID: 23
No credits found for ID: 29
No credits found for ID: 30
No credits found for ID: 31
No credits found for ID: 32
No credits found for ID: 34
No credits found for ID: 36
No credits found for ID: 37
No credits found for ID: 39
No credits found for ID: 40
No credits found for ID: 41
No credits found for ID: 42
No credits found for ID: 43
No credits found for ID: 44
No credits found for ID: 45
No credits found for ID: 46
No credits found for ID: 47
No credits found for ID: 48
No credits found for ID: 49
No credits found for ID: 50
No credits found for ID: 51
No credits found for ID: 52
No credits found for ID: 53
No credits found for ID: 54
No credits found for ID: 56
No credits found for ID: 57
No credits found for ID: 60
No credits found for ID: 61
No credits found for ID: 72
No credits found for ID: 84
No credits found for ID:

Unnamed: 0,id,cast,crew
0,2,"[{'adult': False, 'gender': 2, 'id': 54768, 'k...","[{'adult': False, 'gender': 2, 'id': 16767, 'k..."


In [45]:
pd.set_option("max_colwidth", None)
df = pd.read_parquet('credits/credits_1400.parquet')
#df.head(1)

In [63]:
import ast
pd.set_option("max_colwidth", None)
df = pd.read_parquet('tmdb/movies_100.parquet')
df.head(1)



Unnamed: 0,adult,backdrop_path,belongs_to_collection,budget,genres,homepage,id,imdb_id,original_language,original_title,overview,popularity,poster_path,production_companies,production_countries,release_date,revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count,Title,Year,Rated,Released,Runtime,Genre,Director,Writer,Actors,Plot,Language,Country,Awards,Poster,Ratings,Metascore,imdbRating,imdbVotes,imdbID,Type,DVD,BoxOffice,Production,Website,Response,belongs_to_collection.id,belongs_to_collection.name,belongs_to_collection.poster_path,belongs_to_collection.backdrop_path
0,False,/dQL2wJZo05GDd21VgOacMeCuyZy.jpg,,0,"[{'id': 18, 'name': 'Drama'}, {'id': 35, 'name': 'Comedy'}, {'id': 10749, 'name': 'Romance'}]",,2,tt0094675,fi,Ariel,"After the coal mine he works at closes and his father commits suicide, a Finnish man leaves for the city to make a living but there, he is framed and imprisoned for various crimes.",9.651,/ojDg0PGvs6R9xYFodRct2kdI6wC.jpg,"[{'id': 2303, 'logo_path': None, 'name': 'Villealfa Filmproductions', 'origin_country': 'FI'}]","[{'iso_3166_1': 'FI', 'name': 'Finland'}]",1988-10-21,0,73,"[{'english_name': 'Finnish', 'iso_639_1': 'fi', 'name': 'suomi'}]",Released,,Ariel,False,7.1,262,Ariel,1988,Not Rated,01 Oct 1990,72 min,"Comedy, Crime, Romance",Aki Kaurismäki,Aki Kaurismäki,"Turo Pajala, Susanna Haavisto, Matti Pellonpää",A Finnish man goes to the city to find a job after the mine where he worked is closed and his father commits suicide.,Finnish,Finland,3 wins & 1 nomination,https://m.media-amazon.com/images/M/MV5BOGU5OGVlNjEtNTE3Ny00YWZkLThlMmQtYjlkNmNjNTA1OGY5XkEyXkFqcGdeQXVyMjI0MjMwMzQ@._V1_SX300.jpg,"[{'Source': 'Internet Movie Database', 'Value': '7.5/10'}]",,7.5,7369,tt0094675,movie,,,,,True,,,,


In [65]:
data = df.copy()

def extract_rating(ratings, source):
    for rating in ratings:
        if rating['Source'] == source:
            return rating['Value']
    return None

data = data.drop(columns=['Title'])
data['Rotten_Tomatoes_Rating'] = data['Ratings'].apply(lambda x: extract_rating(x, 'Rotten Tomatoes')) 
data['Metacritic_Rating'] = data['Ratings'].apply(lambda x: extract_rating(x, 'Metacritic')) 
data['Internet_Movie_Database_Rating'] = data['Ratings'].apply(lambda x: extract_rating(x, 'Internet Movie Database')) 


# Standardize column names
data.columns = data.columns.str.lower().str.replace(' ', '_')
data = data.drop(columns=['backdrop_path', 'poster_path', 'video', 
                          'belongs_to_collection.name', 'belongs_to_collection.poster_path', 'belongs_to_collection.backdrop_path',
                          'belongs_to_collection.id','response'
                          , 'poster', 'homepage', 'imdbid', 'type', 'dvd', 'website'])



data['genres'] = data['genres'].apply(lambda x: ', '.join([d['name'] for d in x]))
data['production_companies'] = data['production_companies'].apply(lambda x: ', '.join([d['name'] for d in x]))
data['production_countries'] = data['production_countries'].apply(lambda x: ', '.join([d['iso_3166_1'] for d in x]))
data['spoken_languages'] = data['spoken_languages'].apply(lambda x: ', '.join([d['iso_639_1'] for d in x]))
data.head(20)





#df.to_csv('movies.csv')

Unnamed: 0,adult,belongs_to_collection,budget,genres,id,imdb_id,original_language,original_title,overview,popularity,production_companies,production_countries,release_date,revenue,runtime,spoken_languages,status,tagline,title,vote_average,vote_count,year,rated,released,runtime.1,genre,director,writer,actors,plot,language,country,awards,ratings,metascore,imdbrating,imdbvotes,boxoffice,production,rotten_tomatoes_rating,metacritic_rating,internet_movie_database_rating
0,False,,0,"Drama, Comedy, Romance",2,tt0094675,fi,Ariel,"After the coal mine he works at closes and his father commits suicide, a Finnish man leaves for the city to make a living but there, he is framed and imprisoned for various crimes.",9.651,Villealfa Filmproductions,FI,1988-10-21,0,73,fi,Released,,Ariel,7.1,262,1988,Not Rated,01 Oct 1990,72 min,"Comedy, Crime, Romance",Aki Kaurismäki,Aki Kaurismäki,"Turo Pajala, Susanna Haavisto, Matti Pellonpää",A Finnish man goes to the city to find a job after the mine where he worked is closed and his father commits suicide.,Finnish,Finland,3 wins & 1 nomination,"[{'Source': 'Internet Movie Database', 'Value': '7.5/10'}]",,7.5,7369,,,,,7.5/10
1,False,,0,"Drama, Comedy, Romance",3,tt0092149,fi,Varjoja paratiisissa,"Nikander, a rubbish collector and would-be entrepreneur finds his plans for success dashed when his business associate dies. One evening, he meets Ilona, a down-on-her luck cashier in a local supermarket—and, falteringly, a bond begins to develop between them.",9.185,Villealfa Filmproductions,FI,1986-10-17,0,74,"en, fi, sv",Released,,Shadows in Paradise,7.2,283,1986,Not Rated,17 Oct 1986,74 min,"Comedy, Drama, Music",Aki Kaurismäki,Aki Kaurismäki,"Matti Pellonpää, Kati Outinen, Sakari Kuosmanen","An episode in the life of Nikander, a garbage man, involving the death of a coworker, a love affair and much more.","Finnish, Swedish, English",Finland,1 win,"[{'Source': 'Internet Movie Database', 'Value': '7.5/10'}]",,7.5,5828,,,,,7.5/10
2,False,,4000000,Comedy,5,tt0113101,en,Four Rooms,It's Ted the Bellhop's first night on the job...and the hotel's very unusual guests are about to place him in some outrageous predicaments. It seems that this evening's room service is serving up one unbelievable happening after another.,24.796,"Miramax, A Band Apart",US,1995-12-09,4257354,98,en,Released,"Twelve outrageous guests. Four scandalous requests. And one lone bellhop, in his first day on the job, who's in for the wildest New year's Eve of his life.",Four Rooms,5.784,2436,1995,R,25 Dec 1995,98 min,Comedy,"Allison Anders, Alexandre Rockwell, Robert Rodriguez","Allison Anders, Alexandre Rockwell, Robert Rodriguez","Tim Roth, Antonio Banderas, Sammi Davis",Four interlocking tales that take place in a fading hotel on New Year's Eve.,English,United States,1 win & 1 nomination,"[{'Source': 'Internet Movie Database', 'Value': '6.7/10'}, {'Source': 'Rotten Tomatoes', 'Value': '13%'}]",,6.7,109016,"$4,257,354",,13%,,6.7/10
3,False,,21000000,"Action, Crime, Thriller",6,tt0107286,en,Judgment Night,"While racing to a boxing match, Frank, Mike, John and Rey get more than they bargained for. A wrong turn lands them directly in the path of Fallon, a vicious, wise-cracking drug lord. After accidentally witnessing Fallon murder a disloyal henchman, the four become his unwilling prey in a savage game of cat and mouse as they are mercilessly stalked through the urban jungle in this taut suspense drama.",12.517,"Largo Entertainment, JVC",US,1993-10-15,12136938,109,en,Released,Don't move. Don't whisper. Don't even breathe.,Judgment Night,6.533,302,1993,R,15 Oct 1993,110 min,"Action, Crime, Drama",Stephen Hopkins,"Lewis Colick, Jere Cunningham","Emilio Estevez, Cuba Gooding Jr., Denis Leary","Four young friends, while taking a shortcut en route to a local boxing match, witness a brutal murder which leaves them running for their lives.",English,"Japan, United States",,"[{'Source': 'Internet Movie Database', 'Value': '6.6/10'}, {'Source': 'Rotten Tomatoes', 'Value': '35%'}, {'Source': 'Metacritic', 'Value': '46/100'}]",46.0,6.6,18036,"$12,526,677",,35%,46/100,6.6/10
4,False,,42000,Documentary,8,tt0825671,en,Life in Loops (A Megacities RMX),"Timo Novotny labels his new project an experimental music documentary film, in a remix of the celebrated film Megacities (1997), a visually refined essay on the hidden faces of several world ""megacities"" by leading Austrian documentarist Michael Glawogger. Novotny complements 30 % of material taken straight from the film (and re-edited) with 70 % as yet unseen footage in which he blends original shots unused by Glawogger with his own sequences (shot by Megacities cameraman Wolfgang Thaler) from Tokyo. Alongside the Japanese metropolis, Life in Loops takes us right into the atmosphere of Mexico City, New York, Moscow and Bombay. This electrifying combination of fascinating film images and an equally compelling soundtrack from Sofa Surfers sets us off on a stunning audiovisual adventure across the continents. The film also makes an original contribution to the discussion on new trends in documentary filmmaking. Written by KARLOVY VARY IFF 2006",1.445,inLoops,AT,2006-01-01,0,80,"en, hi, ja, ru, es",Released,A Megacities remix.,Life in Loops (A Megacities RMX),7.7,25,2006,,04 Jul 2006,79 min,Documentary,Timo Novotny,"Michael Glawogger, Timo Novotny",,"A remix of images and sounds, using a films original material and mixing it with new imagery.","English, Hindi, Japanese, Russian, Spanish",Austria,8 wins & 17 nominations,"[{'Source': 'Internet Movie Database', 'Value': '8.1/10'}]",,8.1,278,,Paul Thiltges Distributions,,,8.1/10
5,False,,0,Drama,9,tt0425473,de,Sonntag im August,,1.946,,DE,2004-09-02,0,15,de,Released,,Sunday in August,7.6,22,2004,,22 Jan 2005,15 min,Short,Marc Meyer,Marc Meyer,"Rita Lengyel, Milton Welsh",A couple on a boat. Their love is burnt out. But how to let go when souls are entangled?,German,Germany,1 nomination,[],,,13,,,,,
6,False,,11000000,"Adventure, Action, Science Fiction",11,tt0076759,en,Star Wars,Princess Leia is captured and held hostage by the evil Imperial forces in their effort to take over the galactic Empire. Venturesome Luke Skywalker and dashing captain Han Solo team together with the loveable robot duo R2-D2 and C-3PO to rescue the beautiful princess and restore peace and justice in the Empire.,85.602,"Lucasfilm Ltd., 20th Century Fox",US,1977-05-25,775398007,121,en,Released,"A long time ago in a galaxy far, far away...",Star Wars,8.204,19182,1977,PG,25 May 1977,121 min,"Action, Adventure, Fantasy",George Lucas,George Lucas,"Mark Hamill, Harrison Ford, Carrie Fisher","Luke Skywalker joins forces with a Jedi Knight, a cocky pilot, a Wookiee and two droids to save the galaxy from the Empire's world-destroying battle station, while also attempting to rescue Princess Leia from the mysterious Darth ...",English,United States,Won 6 Oscars. 65 wins & 31 nominations total,"[{'Source': 'Internet Movie Database', 'Value': '8.6/10'}, {'Source': 'Rotten Tomatoes', 'Value': '93%'}, {'Source': 'Metacritic', 'Value': '90/100'}]",90.0,8.6,1415793,"$460,998,507",,93%,90/100,8.6/10
7,False,,94000000,"Animation, Family",12,tt0266543,en,Finding Nemo,"Nemo, an adventurous young clownfish, is unexpectedly taken from his Great Barrier Reef home to a dentist's office aquarium. It's up to his worrisome father Marlin and a friendly but forgetful fish Dory to bring Nemo home -- meeting vegetarian sharks, surfer dude turtles, hypnotic jellyfish, hungry seagulls, and more along the way.",85.819,Pixar,US,2003-05-30,940335536,100,en,Released,There are 3.7 trillion fish in the ocean. They're looking for one.,Finding Nemo,7.825,18084,2003,G,30 May 2003,100 min,"Animation, Adventure, Comedy","Andrew Stanton, Lee Unkrich","Andrew Stanton, Bob Peterson, David Reynolds","Albert Brooks, Ellen DeGeneres, Alexander Gould","After his son is captured in the Great Barrier Reef and taken to Sydney, a timid clownfish sets out on a journey to bring him home.",English,"United States, Japan",Won 1 Oscar. 49 wins & 63 nominations total,"[{'Source': 'Internet Movie Database', 'Value': '8.2/10'}, {'Source': 'Rotten Tomatoes', 'Value': '99%'}, {'Source': 'Metacritic', 'Value': '90/100'}]",90.0,8.2,1085322,"$380,843,261",,99%,90/100,8.2/10
8,False,,55000000,"Comedy, Drama, Romance",13,tt0109830,en,Forrest Gump,"A man with a low IQ has accomplished great things in his life and been present during significant historic events—in each case, far exceeding what anyone imagined he could do. But despite all he has achieved, his one true love eludes him.",69.696,"Paramount, The Steve Tisch Company, Wendy Finerman Productions",US,1994-06-23,677387716,142,en,Released,The world will never be the same once you've seen it through the eyes of Forrest Gump.,Forrest Gump,8.478,25448,1994,PG-13,06 Jul 1994,142 min,"Drama, Romance",Robert Zemeckis,"Winston Groom, Eric Roth","Tom Hanks, Robin Wright, Gary Sinise","The history of the United States from the 1950s to the '70s unfolds from the perspective of an Alabama man with an IQ of 75, who yearns to be reunited with his childhood sweetheart.",English,United States,Won 6 Oscars. 50 wins & 74 nominations total,"[{'Source': 'Internet Movie Database', 'Value': '8.8/10'}, {'Source': 'Rotten Tomatoes', 'Value': '71%'}, {'Source': 'Metacritic', 'Value': '82/100'}]",82.0,8.8,2174144,"$330,455,270",,71%,82/100,8.8/10
9,False,,15000000,Drama,14,tt0169547,en,American Beauty,"Lester Burnham, a depressed suburban father in a mid-life crisis, decides to turn his hectic life around after developing an infatuation with his daughter's attractive friend.",33.056,"Jinks/Cohen Company, DreamWorks Pictures",US,1999-09-15,356296601,122,en,Released,Look closer.,American Beauty,8.02,11274,1999,R,01 Oct 1999,122 min,Drama,Sam Mendes,Alan Ball,"Kevin Spacey, Annette Bening, Thora Birch",A sexually frustrated suburban father has a mid-life crisis after becoming infatuated with his daughter's best friend.,English,United States,Won 5 Oscars. 112 wins & 102 nominations total,"[{'Source': 'Internet Movie Database', 'Value': '8.3/10'}, {'Source': 'Rotten Tomatoes', 'Value': '87%'}, {'Source': 'Metacritic', 'Value': '84/100'}]",84.0,8.3,1184677,"$130,096,601",,87%,84/100,8.3/10


In [None]:
# Remove duplicate columns
#data = df.drop(columns=['title'])

# Explode JSON columns
def explode_json(column):
    data[column] = data[column].apply(ast.literal_eval)
    return data.explode(column)

data = explode_json('genres')
data = explode_json('production_companies')

# Parse awards column for nomination information
def parse_nominations(awards_text):
    if 'nomination' in awards_text:
        return int(awards_text.split()[0])
    return 0

data['nomination_count'] = data['awards'].apply(parse_nominations)