In [1]:
# Import dependencies
import requests
from pprint import pprint
import os
import csv
import pandas as pd
import numpy as np
from itertools import chain

omdb_key = '9b9d69c4'

In [2]:
# Read in csv file and create data frame
best_picture_csv = 'Output/newawardwinners.csv'
best_picture_df = pd.read_csv(best_picture_csv)
best_picture_df

Unnamed: 0,category,year,entity
0,OUTSTANDING PICTURE,1927,Wings
1,OUTSTANDING PICTURE,1928,The Broadway Melody
2,OUTSTANDING PRODUCTION,1929,All Quiet on the Western Front
3,OUTSTANDING PRODUCTION,1930,Cimarron
4,OUTSTANDING PRODUCTION,1931,Grand Hotel
...,...,...,...
88,BEST PICTURE,2015,Spotlight
89,BEST PICTURE,2016,Moonlight
90,BEST PICTURE,2017,The Shape of Water
91,BEST PICTURE,2018,Green Book


In [3]:
# Create a base url for API requests
url = f'http://www.omdbapi.com/?apikey={omdb_key}&t='

In [5]:
# Create columns needed in data frame to append to after API requests
best_picture_df['Movie Genre'] = ""
best_picture_df['Runtime'] = ""
best_picture_df['Actors'] = ""
best_picture_df['IMDB Rating'] = ""
best_picture_df['Rotten Tomatoes Rating'] = ""
best_picture_df.rename(columns = {'entity':'Movie Title','year':'Year','category':'Award'}, inplace = True)
best_picture_df.head()

Unnamed: 0,Award,Year,Movie Title,Movie Genre,Runtime,Actors,IMDB Rating,Rotten Tomatoes Rating
0,OUTSTANDING PICTURE,1927,Wings,,,,,
1,OUTSTANDING PICTURE,1928,The Broadway Melody,,,,,
2,OUTSTANDING PRODUCTION,1929,All Quiet on the Western Front,,,,,
3,OUTSTANDING PRODUCTION,1930,Cimarron,,,,,
4,OUTSTANDING PRODUCTION,1931,Grand Hotel,,,,,


In [6]:
# Make API requests while iterating through data frame and append the results to the data frame
for index, row in best_picture_df.iterrows():
    
    query_url = url + (row['Movie Title'])
    
    response_url = requests.get(query_url).json()
    
    try:
        best_picture_df.loc[index, 'Movie Genre'] = response_url['Genre']
        best_picture_df.loc[index, 'Actors'] = response_url['Actors']
        best_picture_df.loc[index, 'Runtime'] = response_url['Runtime']
        best_picture_df.loc[index, 'IMDB Rating'] = response_url['Ratings'][0]['Value']
        best_picture_df.loc[index, 'Rotten Tomatoes Rating'] = response_url['Ratings'][1]['Value']
        
    except (KeyError,IndexError):
        print(f"Problem with this film - '{row['Movie Title']}'")

In [7]:
# Check data frame for results
best_picture_df.head()

Unnamed: 0,Award,Year,Movie Title,Movie Genre,Runtime,Actors,IMDB Rating,Rotten Tomatoes Rating
0,OUTSTANDING PICTURE,1927,Wings,"Drama, Romance, War, Action",144 min,"Clara Bow, Charles 'Buddy' Rogers, Richard Arl...",7.5/10,93%
1,OUTSTANDING PICTURE,1928,The Broadway Melody,"Drama, Musical, Romance",100 min,"Charles King, Anita Page, Bessie Love",5.7/10,35%
2,OUTSTANDING PRODUCTION,1929,All Quiet on the Western Front,"Drama, War",152 min,"Louis Wolheim, Lew Ayres, John Wray, Arnold Lucy",8.0/10,98%
3,OUTSTANDING PRODUCTION,1930,Cimarron,"Drama, Western",123 min,"Richard Dix, Irene Dunne, Estelle Taylor, Nanc...",5.9/10,52%
4,OUTSTANDING PRODUCTION,1931,Grand Hotel,"Drama, Romance",112 min,"Greta Garbo, John Barrymore, Joan Crawford, Wa...",7.4/10,88%


In [9]:
# Create new data frame with year, movie & actors for cross referencing
actors_df = best_picture_df[['Year','Movie Title','Actors']]
actors_df

Unnamed: 0,Year,Movie Title,Actors
0,1927,Wings,"Clara Bow, Charles 'Buddy' Rogers, Richard Arl..."
1,1928,The Broadway Melody,"Charles King, Anita Page, Bessie Love"
2,1929,All Quiet on the Western Front,"Louis Wolheim, Lew Ayres, John Wray, Arnold Lucy"
3,1930,Cimarron,"Richard Dix, Irene Dunne, Estelle Taylor, Nanc..."
4,1931,Grand Hotel,"Greta Garbo, John Barrymore, Joan Crawford, Wa..."
...,...,...,...
88,2015,Spotlight,"Mark Ruffalo, Michael Keaton, Rachel McAdams, ..."
89,2016,Moonlight,"Mahershala Ali, Shariff Earp, Duan Sanderson, ..."
90,2017,The Shape of Water,"Sally Hawkins, Michael Shannon, Richard Jenkin..."
91,2018,Green Book,"Viggo Mortensen, Mahershala Ali, Linda Cardell..."


In [10]:
# Split actors column and duplicate rows so that each actor has own row for cross referencing
def column_splitter(column_to_split):
    return list(chain.from_iterable(column_to_split.str.split(',')))

lengths = actors_df['Actors'].str.split(',').map(len)

actors_df = pd.DataFrame({
    'Year': np.repeat(actors_df['Year'],lengths),
    'Movie Title': np.repeat(actors_df['Movie Title'],lengths),
    'Actor': column_splitter(actors_df['Actors'])
})

actors_df.head()

Unnamed: 0,Year,Movie Title,Actor
0,1927,Wings,Clara Bow
0,1927,Wings,Charles 'Buddy' Rogers
0,1927,Wings,Richard Arlen
0,1927,Wings,Jobyna Ralston
1,1928,The Broadway Melody,Charles King


In [11]:
# Export data frame to csv file for further analysis
actors_df.to_csv(r'Output/Award_Data_by_Actors.csv', index = False)