# API Practice

In [1]:
# import libraries
import pandas as pd
import requests

In [2]:
# Read in the Movies.csv to find the movies with missing values
movies_df = pd.read_csv("D:\Learning\Portfolio\Movie Python Project\\movies.csv")

In [3]:
# Find movies with missing data in any column
missing_data_movies = movies_df[movies_df.isnull().any(axis=1)]["name"].tolist()

# Remove any potential duplicates
unique_missing_movies = list(set(missing_data_movies))

print("Movies with missing data (unique):", unique_missing_movies)



In [4]:
# storing api key for TMDB
api_key = "b0cb9ab8feac8f1febed37016dd0f526"

In [5]:
# Empty list for storing missing movies
movie_data = []

In [6]:
# Using the TMDB Api to find data for the missing movie values
# Using unique_missing_movies to iterate through missing values

for movie_title in unique_missing_movies:
    try:
        # Search for the movie using the title
        search_url = f"https://api.themoviedb.org/3/search/movie?api_key={api_key}&query={movie_title}"
        search_response = requests.get(search_url)
        search_response.raise_for_status()  # Raise error for non-200 status codes
        search_data = search_response.json()

        # Check if any results were found
        if search_data["results"]:
            movie_id = search_data["results"][0]["id"]  # Get the ID of the first result

            # Fetch movie details using the ID
            detail_url = f"https://api.themoviedb.org/3/movie/{movie_id}?api_key={api_key}"
            detail_response = requests.get(detail_url)
            detail_response.raise_for_status()
            movie_data.append(detail_response.json())

        else:
            print(f"Movie '{movie_title}' not found.")

    except requests.exceptions.RequestException as e:
        print(f"Error fetching data for movie '{movie_title}': {e}")

# Create a Pandas DataFrame from the collected movie data
missing_df = pd.DataFrame(movie_data)

# Show Dataframe
missing_df

Error fetching data for movie 'The Way': ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
Movie 'Tee em el' not found.
Movie 'In God We Trust (or Gimme That Prime Time Religion)' not found.
Movie 'Geolkapseu' not found.


Unnamed: 0,adult,backdrop_path,belongs_to_collection,budget,genres,homepage,id,imdb_id,original_language,original_title,...,release_date,revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count
0,False,/zYeYZuqXuB2nfIR4zfKhaDWHeF6.jpg,,9000000,"[{'id': 35, 'name': 'Comedy'}, {'id': 10749, '...",,14347,tt0086973,en,Blame It on Rio,...,1984-02-17,18644570,100,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,She's the hottest thing on the beach. She's al...,Blame It on Rio,False,5.639,155
1,False,/zoL7dO6Uw8wOtkhfrwryevQudSK.jpg,,0,"[{'id': 12, 'name': 'Adventure'}, {'id': 9648,...",https://www.warnerbros.com/movies/trick-or-tre...,1015724,tt21919270,en,Trick or Treat Scooby-Doo!,...,2022-10-04,0,78,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,Trick or trap?!,Trick or Treat Scooby-Doo!,False,7.023,173
2,False,/a0OjGWE9ru1OlKLBujbz3mAYYxU.jpg,,0,"[{'id': 18, 'name': 'Drama'}, {'id': 35, 'name...",,695,tt0108122,en,Short Cuts,...,1993-09-05,6110979,188,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,Short Cuts raises the roof on America.,Short Cuts,False,7.226,545
3,False,/q83fNIvLYFYZ3SfRUoBACciOzMX.jpg,,0,"[{'id': 18, 'name': 'Drama'}, {'id': 53, 'name...",https://www.netflix.com/title/81345983,567748,tt9421570,en,The Guilty,...,2021-09-24,0,91,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,Listen carefully.,The Guilty,False,6.400,2447
4,False,/bkyugLf910U8TahWO9ChTunSMfU.jpg,,0,"[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...",,11796,tt0100112,fr,Le Mari de la coiffeuse,...,1990-10-03,0,82,"[{'english_name': 'Arabic', 'iso_639_1': 'ar',...",Released,,The Hairdresser's Husband,False,6.900,192
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2222,False,/fTPi0tXFf4BSDDdf7J9pq5V9XyG.jpg,,0,"[{'id': 27, 'name': 'Horror'}, {'id': 53, 'nam...",https://www.troma.com/films/def-by-temptation/,71853,tt0099395,en,Def by Temptation,...,1990-03-23,0,95,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,A terrifying tale of vampires and lust!,Def by Temptation,False,4.833,45
2223,False,/3CeWGZjH9jSU16sZFnxMHMiP7zS.jpg,,0,"[{'id': 28, 'name': 'Action'}, {'id': 80, 'nam...",,140605,tt0088895,en,Certain Fury,...,1985-03-01,1400000,87,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,Two Academy Award Winning Stars... In the one ...,Certain Fury,False,5.200,17
2224,False,/sEl2Z5m3KgmAUMLaNJG8gQpVPIb.jpg,,0,"[{'id': 18, 'name': 'Drama'}, {'id': 10749, 'n...",,81824,tt0091310,en,Just Between Friends,...,1986-03-21,1991930,110,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,They loved. They laughed. They lied. All this ...,Just Between Friends,False,5.222,9
2225,False,/gHtdmi1ZjwdOVcg7OExpLtfkDJF.jpg,,10000000,"[{'id': 18, 'name': 'Drama'}, {'id': 27, 'name...",,2212,tt0119791,en,Nightwatch,...,1997-01-31,1179002,102,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,,Nightwatch,False,6.043,370


In [13]:
# Inspecting the dataframe
missing_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2227 entries, 0 to 2226
Data columns (total 25 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   adult                  2227 non-null   bool   
 1   backdrop_path          2156 non-null   object 
 2   belongs_to_collection  236 non-null    object 
 3   budget                 2227 non-null   int64  
 4   genres                 2227 non-null   object 
 5   homepage               2227 non-null   object 
 6   id                     2227 non-null   int64  
 7   imdb_id                2218 non-null   object 
 8   original_language      2227 non-null   object 
 9   original_title         2227 non-null   object 
 10  overview               2227 non-null   object 
 11  popularity             2227 non-null   float64
 12  poster_path            2216 non-null   object 
 13  production_companies   2227 non-null   object 
 14  production_countries   2227 non-null   object 
 15  rele

In [15]:
#Removing unwanted columns from missing_df
missing_copy_df = missing_df.drop(['adult', 'backdrop_path','belongs_to_collection','homepage','id','imdb_id'
                              ,'original_language','original_title','overview','popularity','poster_path',
                             'spoken_languages','status','tagline','video', 'genres', 
                             'release_date', 'vote_average', 'vote_count','production_countries', 'revenue'
                            ,'production_companies', 'runtime'],axis = 1)

In [16]:
# Rename the title column to match the Movies column name
missing_copy_df = missing_copy_df.rename(columns = {'title': 'name'})

In [17]:
# Checking the changes made
missing_copy_df

Unnamed: 0,budget,name
0,9000000,Blame It on Rio
1,0,Trick or Treat Scooby-Doo!
2,0,Short Cuts
3,0,The Guilty
4,0,The Hairdresser's Husband
...,...,...
2222,0,Def by Temptation
2223,0,Certain Fury
2224,0,Just Between Friends
2225,10000000,Nightwatch


In [18]:
# Create Copies of both the main dataframe (movies) and the edited missing values dataframe

original_df = movies_df.copy()
update_df = missing_copy_df.copy()

In [19]:
# check the Dataframe works
original_df

Unnamed: 0,name,rating,genre,year,released,score,votes,director,writer,star,country,budget,gross,company,runtime
0,The Shining,R,Drama,1980,"June 13, 1980 (United States)",8.4,927000.0,Stanley Kubrick,Stephen King,Jack Nicholson,United Kingdom,19000000.0,46998772.0,Warner Bros.,146.0
1,The Blue Lagoon,R,Adventure,1980,"July 2, 1980 (United States)",5.8,65000.0,Randal Kleiser,Henry De Vere Stacpoole,Brooke Shields,United States,4500000.0,58853106.0,Columbia Pictures,104.0
2,Star Wars: Episode V - The Empire Strikes Back,PG,Action,1980,"June 20, 1980 (United States)",8.7,1200000.0,Irvin Kershner,Leigh Brackett,Mark Hamill,United States,18000000.0,538375067.0,Lucasfilm,124.0
3,Airplane!,PG,Comedy,1980,"July 2, 1980 (United States)",7.7,221000.0,Jim Abrahams,Jim Abrahams,Robert Hays,United States,3500000.0,83453539.0,Paramount Pictures,88.0
4,Caddyshack,R,Comedy,1980,"July 25, 1980 (United States)",7.3,108000.0,Harold Ramis,Brian Doyle-Murray,Chevy Chase,United States,6000000.0,39846344.0,Orion Pictures,98.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7663,More to Life,,Drama,2020,"October 23, 2020 (United States)",3.1,18.0,Joseph Ebanks,Joseph Ebanks,Shannon Bond,United States,7000.0,,,90.0
7664,Dream Round,,Comedy,2020,"February 7, 2020 (United States)",4.7,36.0,Dusty Dukatz,Lisa Huston,Michael Saquella,United States,,,Cactus Blue Entertainment,90.0
7665,Saving Mbango,,Drama,2020,"April 27, 2020 (Cameroon)",5.7,29.0,Nkanya Nkwai,Lynno Lovert,Onyama Laura,United States,58750.0,,Embi Productions,
7666,It's Just Us,,Drama,2020,"October 1, 2020 (United States)",,,James Randall,James Randall,Christina Roz,United States,15000.0,,,120.0


In [20]:
# check the Dataframe works
update_df

Unnamed: 0,budget,name
0,9000000,Blame It on Rio
1,0,Trick or Treat Scooby-Doo!
2,0,Short Cuts
3,0,The Guilty
4,0,The Hairdresser's Husband
...,...,...
2222,0,Def by Temptation
2223,0,Certain Fury
2224,0,Just Between Friends
2225,10000000,Nightwatch


In [21]:
# Create a dictionary called update that uses name as the key and budget as the value
update = {row['name']: row['budget'] for index, row in update_df.iterrows()}

In [22]:
#Check update values and format
update

{'Blame It on Rio': 9000000,
 'Trick or Treat Scooby-Doo!': 0,
 'Short Cuts': 0,
 'The Guilty': 0,
 "The Hairdresser's Husband": 0,
 'Loveless': 0,
 "Bennett's War": 0,
 'Calvary': 0,
 "Summer '03": 0,
 'Testament': 1500000,
 'The Silence': 0,
 'Career Opportunities': 6000000,
 'Dark Eyes': 0,
 'Time Walker': 750000,
 'Kiss of the Spider Woman': 1250000,
 'Point of No Return': 0,
 'Starred Up': 0,
 'Nowhere Boy': 0,
 'Diva': 0,
 'Free Fire': 7000000,
 'Kicking and Screaming': 0,
 'Water': 0,
 'House of Flying Daggers': 15000000,
 'Taking Care of Business': 14000000,
 'Cliffs of Freedom': 0,
 'Sincerely Charlotte': 0,
 'De De Pyaar De': 0,
 'Maelström': 0,
 'In a World...': 1000000,
 'Chattahoochee': 0,
 'Shiloh': 0,
 'Happy Together': 0,
 'Miami Blues': 0,
 'UFC Fight Night 180: Ortega vs. The Korean Zombie': 0,
 'The Bear': 0,
 'Zatoichi': 2500000,
 'The Minus Man': 0,
 'Anthropoid': 0,
 'Mark Felt: The Man Who Brought Down the White House': 0,
 'Ed Gein': 0,
 'Ernest Scared Stupid': 

In [24]:
# Find all the unqiue values in original_df
existing_movies = set(original_df['name'])

# Iterate through update to remove any movie names not in the movies.csv
update_filtered = {key: budget for key, budget in update.items() if key in existing_movies}

# Check for Null values in original and update the value using the budget of update_filtered
# If the budget value in update_filtered is 0, then ignore the value and keep Null
for key, budget in update_filtered.items():
    if budget > 0:
        original_df.loc[(original_df['name'] == key) & (original_df['budget'].isnull()), 'budget'] = budget
    else:
        continue

In [25]:
# Write a new Csv called Movies_updated.csv
original_df.to_csv('Movies_updated.csv')