# Add New Movies to the Cleaned Movie List

In [76]:
#test git commit
# import dependencies
import requests
import pandas as pd
from config import api_key
from geopy.geocoders import Nominatim
from geopy.extra.rate_limiter import RateLimiter

In [79]:
# Read cleaned movie list
old_movie_list = pd.read_csv('resources/cleaned/movie_list_cleaned.csv')
old_movie_list.head()

Unnamed: 0,Title,Release date,Time Period,Start Time,End Time,Diff,Notes on setting,Location,genre_ids,movie_id,...,title,video,vote_average,vote_count,name,keywords,genres,Full Location,latitude,longitude
0,Quest for Fire,1981,"80,000 BC",-80000,-80000,0,"The story is set in Paleolithic Europe, with i...",Europe,"[12, 18]",62204,...,Quest for Fire,False,7.1,359.0,"Everett McGill, Ron Perlman, Nicholas Kadi, Ra...","fire, based on novel or book, mammoth, stone a...","['Adventure', 'Drama']",أوروبا,51.0,10.0
1,The Clan of the Cave Bear,1986,"40,000 - 35,000 BC",-40000,-35000,5000,In times of�Neanderthal extinction,Europe,"[12, 18]",13853,...,The Clan of the Cave Bear,False,5.1,87.0,"Daryl Hannah, Pamela Reed, James Remar, Thomas...","stone age, animal attack, tribe, bear, cavemen...","['Adventure', 'Drama']",أوروبا,51.0,10.0
2,Conan the Barbarian,1982,"32,000 - 10,000 BC",-32000,-10000,22000,"Occurs in the pseudo-historical ""Hyborian Age""...",Middle East,"[12, 14, 28]",9387,...,Conan the Barbarian,False,6.8,1836.0,"Arnold Schwarzenegger, James Earl Jones, Max v...","gladiator, repayment, fight, mythology, magic,...","['Adventure', 'Fantasy', 'Action']","Middle East, Baltimore, Maryland, United States",39.301416,-76.588848
3,Conan the Destroyer,1984,"32,000 - 10,000 BC",-32000,-10000,22000,,Middle East,"[12, 14, 28]",9610,...,Conan the Destroyer,False,6.1,1115.0,"Arnold Schwarzenegger, Grace Jones, Wilt Chamb...","gladiator, swordplay, fight, sword, magic, war...","['Adventure', 'Fantasy', 'Action']","Middle East, Baltimore, Maryland, United States",39.301416,-76.588848
4,Alpha,2018,"20,000 BC",-20000,-20000,0,Dog domestication,Europe,"[12, 18]",399360,...,Alpha,False,6.4,2083.0,"Kodi Smit-McPhee, Jóhannes Haukur Jóhannesson,...","wolf, ice age, human animal relationship, wild...","['Adventure', 'Drama']",أوروبا,51.0,10.0


In [80]:
# Add New Movie

# Enter the movie information into the variables below:
title = 'Cleopatra'
release_date = 1917
time_period = 'length'
start_time = 'begin year'
end_time = 'end year'
location = 'location'

In [81]:
# Get the additional info for movies 

# Create DF for new movie
new_movie = pd.DataFrame({
                    'Title':[title],
                    'Release date':[release_date],
                    'Start Time':[start_time],
                    'End Time':[end_time],
                    'Location':[location]
                })
title = new_movie['Title'][0]
year = new_movie['Release date'][0]

title = title.replace(' ','+')

#Query TMDB data base for movies on wiki list
movie_tmdb = pd.DataFrame(requests.get(f'https://api.themoviedb.org/3/search/movie?api_key={api_key}&year={year}&query={title}').json()['results'])

# Clean new DF
movie_tmdb = movie_tmdb.drop_duplicates(subset=['title','release_date'])
movie_tmdb['release_year'] = movie_tmdb['release_date'].astype(str).str[0:4].astype(int)

movie = new_movie.merge(movie_tmdb, how='left', left_on=['Title','Release date'], right_on=['title','release_year'])
movie.drop(columns=['adult', 'backdrop_path', 'original_title', 'poster_path', 'release_date', 'release_year'], inplace=True)
movie.dropna(subset=['id','Start Time'], inplace=True)
movie.reset_index(drop=True, inplace= True)
movie.rename(columns={'id':'movie_id'}, inplace=True)
movie.sort_values('Start Time', inplace=True)
movie['movie_id'] = movie.movie_id.astype(int)

new_movie


Unnamed: 0,Title,Release date,Start Time,End Time,Location
0,Cleopatra,1917,begin year,end year,location


In [90]:
# Create list of movie id that will be used to make API calls for additional information
movie_id = movie.movie_id[0]

# Create blank DFs and lists
actors_df = pd.DataFrame()
directors_df = pd.DataFrame()
studio_df = pd.DataFrame()
actor_movie_id = []
director_movie_id = []
studio_movie_id = []

# Make API call for movie_id to get the actors, directors, and studios for the film

movie_credits = requests.get(f'https://api.themoviedb.org/3/movie/{movie_id}/credits?api_key={api_key}&language=en-US').json()

for actor in movie_credits['cast']:
    actors_df = actors_df.append(actor, ignore_index=True)
    actor_movie_id.append(movie_id)

for director in movie_credits['crew']:
    if director['job'] == "Director":
        directors_df = directors_df.append(director, ignore_index=True)
        director_movie_id.append(movie_id)

movie_studios = requests.get(f'https://api.themoviedb.org/3/movie/{movie_id}?api_key={api_key}&language=en-US').json()
for studio in movie_studios['production_companies']:
    studio_df = studio_df.append(studio, ignore_index=True)
    studio_movie_id.append(movie_id)

# Clean the new DFs
actors_df["movie_id"] = actor_movie_id
directors_df['movie_id'] = director_movie_id

actors = actors_df.rename(columns = {"id": "actor_id",'popularity': 'actor_popularity'})
actors = actors[['name','actor_id','gender','character','actor_popularity','movie_id']]
actors_clean = pd.merge(actors,movie[['movie_id', 'title']], on='movie_id', how='left')

directors_df['movie_id'] = director_movie_id
directors = directors_df.rename(columns={'id': 'director_id', 'popularity': 'director_popularity'})
directors = directors[['name','director_id','gender','director_popularity','movie_id']]
directors_clean = pd.merge(directors,movie[['movie_id', 'title']], on='movie_id', how='left')

studio_df['movie_id'] = studio_movie_id
studio = studio_df.drop(columns=['logo_path'])
studio = studio.rename(columns={'id': 'studio_id', 'name':'studio_name'})
studio = pd.merge(studio,movie[['movie_id', 'title']], on='movie_id', how='left')

studio_group = studio.groupby('studio_name',sort = False)
studio_group = studio_group['title'].agg(lambda column: ", ".join(column))
studio_group = studio_group.reset_index(name='title')
studio_group = studio_group.rename(columns={'title':'title list'})
studio_clean = studio_group.merge(studio,on='studio_name', how='right')



actors_clean

Unnamed: 0,name,actor_id,gender,character,actor_popularity,movie_id,title
0,Theda Bara,123584.0,1.0,Cleopatra,0.972,39950,Cleopatra
1,Fritz Leiber,103176.0,2.0,Caesar,0.788,39950,Cleopatra
2,Thurston Hall,34277.0,2.0,Antony,1.212,39950,Cleopatra
3,Alan Roscoe,123585.0,0.0,Pharon (as Albert Roscoe),1.024,39950,Cleopatra
4,Herschel Mayall,592836.0,2.0,Ventidius,0.6,39950,Cleopatra
5,Dorothy Drake,592837.0,0.0,Charmian,0.6,39950,Cleopatra
6,Delle Duncan,592838.0,0.0,Iras,0.6,39950,Cleopatra
7,Henri De Vries,592839.0,0.0,Octavius Caesar,0.98,39950,Cleopatra
8,Art Acord,592840.0,2.0,Kephren,1.015,39950,Cleopatra
9,Hector Sarno,592841.0,0.0,Messenger,0.6,39950,Cleopatra


In [89]:
# Add CSVs to update
old_movie_list = pd.read_csv('resources/cleaned/movie_list_cleaned.csv')
old_actors_clean = pd.read_csv('resources/cleaned/actors_cleaned.csv')
old_directors_cleaned = pd.read_csv('resources/cleaned/director_cleaned.csv')
old_studio_cleaned = pd.read_csv('resources/cleaned/studio_cleaned.csv')

old_actors_clean.drop(columns=['title list'], inplace=True)
old_actors_clean.head()

Unnamed: 0,name,actor_id,gender,character,actor_popularity,movie_id,title
0,Everett McGill,5616.0,2.0,Naoh,6.574,62204,Quest for Fire
1,Ron Perlman,2372.0,2.0,Amoukar,18.384,62204,Quest for Fire
2,Nicholas Kadi,152970.0,2.0,Gaw,1.712,62204,Quest for Fire
3,Rae Dawn Chong,13312.0,1.0,Ika,14.535,62204,Quest for Fire
4,Gary Schwartz,184330.0,2.0,Rouka - tribu Ulam,0.98,62204,Quest for Fire


In [83]:
# Add list of movies to actors
movie_group = actors_clean.groupby('name',sort = False)
movie_group = movie_group['title'].agg(lambda column: ", ".join(column))
movie_group = movie_group.reset_index(name='title')
movie_group = movie_group.rename(columns={'title':'title list'})
actors_clean = movie_group.merge(actors_clean,on='name', how='right')

# Add list of actors to movie_list
actors_group = actors_clean.groupby('movie_id',sort = False)
actor_lists = actors_group['name'].agg(lambda column: ", ".join(column))
actor_lists = actor_lists.reset_index(name='name')

movie = movie.merge(actor_lists,on='movie_id', how='left')

# Count each recurrance of an actor's name and create a DF.
actor_movie_count = pd.DataFrame({'count': actors_clean.groupby(['name','actor_id'],sort=False).size()}).reset_index()