In [312]:
# Import dependencies
import pandas as pd
import requests
from config import api_key

In [313]:
# Movies after processing
movie_list = pd.read_csv('resources/cleaned/movie_list_cleaned.csv', index_col = False)
movie_list = movie_list.rename(columns={'id':'movie_id'})
movie_list['date'].astype(str).astype(int)

movie_list = movie_list.sort_values('date')

In [314]:
# Code from The-Final-Project_F-PALS, Popular_Crew.ipynb, refractoring for this project.
# The code below will make an API call based on the movie list to get the leading actors and directors of each film.

# Create list of movie ids that will be used to make API calls for additional information
id_list = movie_list.movie_id.tolist()

actors = pd.DataFrame()
directors = pd.DataFrame()
id_for_movie = []
director_movie_id = []

for movie_id in id_list:
    movie = requests.get(f'https://api.themoviedb.org/3/movie/{movie_id}/credits?api_key={api_key}&language=en-US').json()

    for actor in movie['cast']:
        actors = actors.append(actor, ignore_index=True)
        id_for_movie.append(movie_id)

    for director in movie['crew']:
        if director['job'] == "Director":
            directors = directors.append(director, ignore_index=True)
            director_movie_id.append(movie_id)

actors["movie_id"] = id_for_movie
directors['movie_id'] = director_movie_id
actors = actors.rename(columns = {"id": "actor_id",'popularity': 'actor_popularity'})
actors = actors[['name','actor_id','gender','character','actor_popularity','movie_id']]
actors_clean = pd.merge(actors,movie_list[['movie_id', 'title']], on='movie_id', how='left')
directors['movie_id'] = director_movie_id
directors = directors.rename(columns={'id': 'director_id', 'popularity': 'director_popularity'})
director_clean = directors[['name','director_id','gender','director_popularity','movie_id']]

# Add list of actors to movie_list
actor_count = actors_clean.name.value_counts()
actors_group = actors_clean.groupby('movie_id',sort = False)
actor_lists = actors_group['name'].agg(lambda column: ", ".join(column))
actor_lists = actor_lists.reset_index(name='name')

movie_list = movie_list.merge(actor_lists,on='movie_id', how='left')

# Add list of movies to actors:
movie_group = actors_clean.groupby('name',sort = False)
movie_group = movie_group['title'].agg(lambda column: ", ".join(column))
movie_group = movie_group.reset_index(name='title')
movie_group = movie_group.rename(columns={'title':'title list'})
actors_clean = movie_group.merge(actors_clean,on='name', how='right')




In [338]:
id_list = movie_list.movie_id.tolist()
studio = pd.DataFrame()
id_for_movie = []

for movie_id in id_list:
    movieDetail = requests.get(f'https://api.themoviedb.org/3/movie/{movie_id}?api_key={api_key}&language=en-US').json()
    for movie in movieDetail['production_companies']:
        studio = studio.append(movie, ignore_index=True)
        id_for_movie.append(movie_id)


studio['movie_id'] = id_for_movie
studio = studio.drop(columns=['logo_path'])
studio = studio.rename(columns={'id': 'studio_id', 'name':'studio_name'})
studio = pd.merge(studio,movie_list[['movie_id', 'title']], on='movie_id', how='left')

studio_group = studio.groupby('studio_name',sort = False)
studio_group = studio_group['title'].agg(lambda column: ", ".join(column))
studio_group = studio_group.reset_index(name='title')
studio_group = studio_group.rename(columns={'title':'title list'})
studio = studio_group.merge(studio,on='studio_name', how='right')

studio.head(30)

Unnamed: 0,studio_name,title list,studio_id,origin_country,movie_id,title
0,Belstar Productions,Quest for Fire,999.0,,62204,Quest for Fire
1,Stéphan Films,Quest for Fire,2359.0,FR,62204,Quest for Fire
2,International Cinema Corporation (ICC),Quest for Fire,25765.0,CA,62204,Quest for Fire
3,Ciné Trail,Quest for Fire,111483.0,,62204,Quest for Fire
4,Gruskoff Film Organization,Quest for Fire,111484.0,,62204,Quest for Fire
5,Royal Bank of Canada,Quest for Fire,111485.0,,62204,Quest for Fire
6,Famous Players Limited,Quest for Fire,16914.0,,62204,Quest for Fire
7,20th Century Fox,"Quest for Fire, The Egyptian, David and Bathsh...",25.0,US,62204,Quest for Fire
8,Burnt Orange Productions,Homo Erectus,3637.0,,14641,Homo Erectus
9,Centropolis Entertainment,"10,000 BC",347.0,,7840,"10,000 BC"


In [339]:
# Add csv files to cleaned folder
actors_clean.to_csv('resources/cleaned/actors_cleaned.csv', index=False)
director_clean.to_csv('resources/cleaned/director_cleaned.csv', index=False)
studio.to_csv('resources/cleaned/studio_cleaned.csv', index=False)

In [317]:
genre = requests.get(f'https://api.themoviedb.org/3/genre/movie/list?api_key={api_key}&language=en-US').json()['genres']
genre_df = pd.DataFrame(genre)
genre_df

Unnamed: 0,id,name
0,28,Action
1,12,Adventure
2,16,Animation
3,35,Comedy
4,80,Crime
5,99,Documentary
6,18,Drama
7,10751,Family
8,14,Fantasy
9,36,History


In [378]:
actor_movie_count = pd.DataFrame({'count': actors_clean.groupby(['name','actor_id'],sort=False).size()}).reset_index()
actor_movie_count.loc[actor_movie_count['name']=='Ron Perlman']
actor_movie_count.to_csv('resources/cleaned/actor_movie_count.csv', index=False)

In [375]:
actors_clean.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1821 entries, 0 to 1820
Data columns (total 8 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   name              1821 non-null   object 
 1   title list        1821 non-null   object 
 2   actor_id          1821 non-null   float64
 3   gender            1821 non-null   float64
 4   character         1820 non-null   object 
 5   actor_popularity  1821 non-null   float64
 6   movie_id          1821 non-null   int64  
 7   title             1821 non-null   object 
dtypes: float64(3), int64(1), object(4)
memory usage: 192.6+ KB
