# Film Remake Data
## Description
This notebook uses Python libraries Pandas, BeautifulSoup, and urllib to scrape raw data from Wikipedia, and creates clean files for use in the database notebook.

### Import Dependencies & Libraries

In [1]:
import pandas as pd
import numpy as np
import os
from pprint import pprint
import json
import time
import requests
from api_keys import api_key
from sqlalchemy import create_engine

### Specify the urls
Due to the size of the list, Wikpedia divides up its list of film remakes onto two web pages, storing titles starting with letters A-M on one page and N-Z on another. 

In [2]:
wiki_A_M = "https://en.wikipedia.org/wiki/List_of_film_remakes_(A%E2%80%93M)"
wiki_N_Z = "https://en.wikipedia.org/wiki/List_of_film_remakes_(N%E2%80%93Z)"

### Scrape Film Titles for A-M

In [3]:
#use read_html function to return tabular data as lists
tables_A_M = pd.read_html(wiki_A_M)

#check table count to make sure all tables were read
len(tables_A_M)

tables_A_M

[                                             Remakes  \
 0                       13 (2010) dir. Géla Babluani   
 1         The 13th Letter (1951) dir. Otto Preminger   
 2           101 Dalmatians (1996) dir. Stephen Herek   
 3              2001 Maniacs (2005) dir. Tim Sullivan   
 4  Tokyo Godfathers (2003) dir. Satoshi Kon3 Godf...   
 5                3 Idiotas (2017) dir. Carlos Bolado   
 6             3:10 to Yuma (2007) dir. James Mangold   
 7  Nineteen Eighty-Four (1984) dir. Michael Radfo...   
 
                                     Original version  
 0               13 Tzameti (2005) dir. Géla Babluani  
 1       Le Corbeau (1943) dir. Henri-Georges Clouzot  
 2  One Hundred and One Dalmatians (1961) dir. Cly...  
 3  Two Thousand Maniacs! (1964) dir. Herschell Go...  
 4  The Three Godfathers (1916) dir. Edward LeSain...  
 5               3 Idiots (2009) dir. Rajkumar Hirani  
 6  3:10 to Yuma (1957) dir. Delmer DavesBased on ...  
 7  1984 (1953) dir. Paul NickellBase

In [4]:
#check to see if we can return the Remake title in the first table
title_A_M = tables_A_M[0].iloc[4]["Remakes"].split(" (")[0]
title_A_M

'Tokyo Godfathers'

In [5]:
#Loop through tables on page and store data in series
remakes_title_A_M = pd.Series()
remakes_year_A_M = pd.Series()
original_title_A_M = pd.Series()
original_year_A_M = pd.Series()

for i in tables_A_M:
    remakes_title_A_M = remakes_title_A_M.append(i["Remakes"].apply(lambda x:x.split(" (")[0]))
    remakes_year_A_M = remakes_year_A_M.append(i["Remakes"].apply(lambda x:x[x.find("(")+1:x.find(")")]))
    original_title_A_M = original_title_A_M.append(i["Original version"].apply(lambda x:x.split(" (")[0]))
    original_year_A_M = original_year_A_M.append(i["Original version"].apply(lambda x:x[x.find("(")+1:x.find(")")]))

In [6]:
remakes_df_A_M = pd.DataFrame({"Remake Title":remakes_title_A_M, "Remake Year":remakes_year_A_M})

remakes_df_A_M.head()

Unnamed: 0,Remake Title,Remake Year
0,13,2010
1,The 13th Letter,1951
2,101 Dalmatians,1996
3,2001 Maniacs,2005
4,Tokyo Godfathers,2003


In [7]:
originals_df_A_M = pd.DataFrame({"Original Title":original_title_A_M, "Original Year":original_year_A_M})

originals_df_A_M.head()

Unnamed: 0,Original Title,Original Year
0,13 Tzameti,2005
1,Le Corbeau,1943
2,One Hundred and One Dalmatians,1961
3,Two Thousand Maniacs!,1964
4,The Three Godfathers,1916


### Scrape Film Titles for N-Z

In [8]:
#use read_html function to return tabular data as lists
tables_N_Z = pd.read_html(wiki_N_Z)

#check table count to make sure all tables were read
len(tables_N_Z)

# #check table type
# print(type(tables_N_Z))

tables_N_Z

[                                              Remakes  \
 0            Naa Autograph (2004) dir. S. Gopal Reddy   
 1                 Nadja (1994) dir. Michael Almereyda   
 2                  Naina (2005) dir. Shripal Morakhia   
 3    Never Say Never Again (1983) dir. Irvin Kershner   
 4         The Next Three Days (2010) dir. Paul Haggis   
 5        Night and the City (1992) dir. Irwin Winkler   
 6             Night Must Fall (1964) dir. Karel Reisz   
 7        Night of the Hunter (1991) dir. David Greene   
 8   A Nightmare on Elm Street (2010) dir. Samuel B...   
 9       Night of the Demons (2009) dir. Adam Gierasch   
 10  Night of the Living Dead 3D (2006) dir. Jeff B...   
 11                Nightwatch (1997) dir. Ole Bornedal   
 12             Nine Months (1995) dir. Chris Columbus   
 13            No Reservations (2007) dir. Scott Hicks   
 14                Nosferatu (1979) dir. Werner Herzog   
 15  No Way Out (1987) dir. Roger DonaldsonPolice P...   
 16  Not of Th

In [9]:
#Loop through tables on page and store data in series
remakes_title_N_Z = pd.Series()
remakes_year_N_Z = pd.Series()
original_title_N_Z = pd.Series()
original_year_N_Z = pd.Series()

for i in tables_N_Z:
    remakes_title_N_Z = remakes_title_N_Z.append(i["Remakes"].apply(lambda x:x.split(" (")[0]))
    remakes_year_N_Z = remakes_year_N_Z.append(i["Remakes"].apply(lambda x:x[x.find("(")+1:x.find(")")]))
    original_title_N_Z = original_title_N_Z.append(i["Original version"].apply(lambda x:x.split(" (")[0]))
    original_year_N_Z = original_year_N_Z.append(i["Original version"].apply(lambda x:x[x.find("(")+1:x.find(")")]))

In [10]:
remakes_df_N_Z = pd.DataFrame({"Remake Title":remakes_title_N_Z, "Remake Year":remakes_year_N_Z})
remakes_df_N_Z.head()

Unnamed: 0,Remake Title,Remake Year
0,Naa Autograph,2004
1,Nadja,1994
2,Naina,2005
3,Never Say Never Again,1983
4,The Next Three Days,2010


In [11]:
originals_df_N_Z = pd.DataFrame({"Original Title":original_title_N_Z, "Original Year":original_year_N_Z})

originals_df_N_Z.head()

Unnamed: 0,Original Title,Original Year
0,Autograph,2004
1,Dracula's Daughter,1936
2,The Eye,2002
3,Thunderball,1965
4,Anything for Her,2008


In [19]:
#Append Remake Film Titles A-M with Remake Film Titles N-Z, and create new data frame
remakes_df = remakes_df_A_M.append(remakes_df_N_Z).reset_index()
remakes_df = remakes_df[["Remake Title", "Remake Year"]]
remakes_df.insert(2, "Type", "Remake") #insert column for film type

#checked record counts. 624 rows
remakes_df.count()

remakes_df.head()

Unnamed: 0,Remake Title,Remake Year,Type
0,13,2010,Remake
1,The 13th Letter,1951,Remake
2,101 Dalmatians,1996,Remake
3,2001 Maniacs,2005,Remake
4,Tokyo Godfathers,2003,Remake


In [21]:
#Append Original Film Titles A-M with Original Film Titles N-Z, and create new data frame
originals_df = originals_df_A_M.append(originals_df_N_Z).reset_index()
originals_df = originals_df[["Original Title", "Original Year"]]
originals_df.insert(2, "Type", "Original") #insert column for film type

#checked record counts. 624 rows
originals_df.count()

originals_df.head()

Unnamed: 0,Original Title,Original Year,Type
0,13 Tzameti,2005,Original
1,Le Corbeau,1943,Original
2,One Hundred and One Dalmatians,1961,Original
3,Two Thousand Maniacs!,1964,Original
4,The Three Godfathers,1916,Original


### Call OMDb API to retrieve stats/info for each movie for all original and film remake titles


In [22]:
#Combine Originals df and Remakes df to create one list to query OMDb API
remakes1 = remakes_df.rename(columns = {"Remake Title":"Title", "Remake Year":"Year", "Type":"Type"})
originals1 = originals_df.rename(columns = {"Original Title": "Title", "Original Year":"Year", "Type":"Type"})

movies_all = pd.concat([remakes1, originals1]).reset_index()
movies_all = movies_all[["Title", "Year", "Type"]]
movies_all.head() #there are 1248 rows total

Unnamed: 0,Title,Year,Type
0,13,2010,Remake
1,The 13th Letter,1951,Remake
2,101 Dalmatians,1996,Remake
3,2001 Maniacs,2005,Remake
4,Tokyo Godfathers,2003,Remake


In [15]:
#Store the API URL as a variable
base_url = "http://www.omdbapi.com/?apikey="

#Define search parameters as lists. Replace spaces in titles with plus signs to make sure full title is queried
titles = movies_all["Title"].apply(lambda x:x.replace(" ","+"))
years = movies_all["Year"]

#Create lists to store response info
response_title = []
response_year = []
response_rated = []
response_runtime = []
response_genre = []
response_director = []
response_writer = []
response_actors = []
response_plot = []
response_language = []
response_country = []
response_awards = []
response_metascore = []
response_imdbRating = []
response_imdbVotes = []
response_imdbID = []
response_production = []
response_boxoffice = []
response_rottentomatoes = []



#Create print log of each movie as it's being processed 
print("Beginning Data Retrieval")
print ("-------------------------------")

#Set initial row count
row=0

#Loop through list of all movies to extract their imdb data. 
#Use zip function to help iterate both time and year variables in parallel.
for title, year in zip(titles, years):
    query_url = base_url + "trilogy" + "&t=" + title + "&y=" + year
    movie_data = requests.get(query_url).json()
    
    row += 1
    
    print(f"Processing Record {row} of {len(titles)}| {title} ({year})")
    
    #print query urls for each city to QC each result
    print(f"{query_url}")
    
#     #This adds a second in the loop to not exceed call limit of API
#     time.sleep(1)
    
    try: 
        response_title.append(movie_data["Title"])
        response_year.append(movie_data["Year"])
        response_rated.append(movie_data["Rated"])
        response_runtime.append(movie_data["Runtime"])
        response_genre.append(movie_data["Genre"])
        response_director.append(movie_data["Director"])
        response_writer.append(movie_data["Writer"])
        response_actors.append(movie_data["Actors"])
        response_plot.append(movie_data["Plot"])
        response_language.append(movie_data["Language"])
        response_country.append(movie_data["Country"])
        response_awards.append(movie_data["Awards"])
        response_metascore.append(movie_data["Metascore"])
        response_imdbRating.append(movie_data["imdbRating"])
        response_imdbVotes.append(movie_data["imdbVotes"])
        response_imdbID.append(movie_data["imdbID"])
        response_production.append(movie_data["Production"])
        response_boxoffice.append(movie_data["BoxOffice"])
        response_rottentomatoes.append(movie_data["Ratings"][1]["Value"])


        
    except:
        print("Movie data elements are missing!")

print('--------------------------------')
print('Data Retrieval Complete')
print('--------------------------------')
        


Beginning Data Retrieval
-------------------------------
Processing Record 1 of 1248| 13 (2010)
http://www.omdbapi.com/?apikey=trilogy&t=13&y=2010
Processing Record 2 of 1248| The+13th+Letter (1951)
http://www.omdbapi.com/?apikey=trilogy&t=The+13th+Letter&y=1951
Processing Record 3 of 1248| 101+Dalmatians (1996)
http://www.omdbapi.com/?apikey=trilogy&t=101+Dalmatians&y=1996
Processing Record 4 of 1248| 2001+Maniacs (2005)
http://www.omdbapi.com/?apikey=trilogy&t=2001+Maniacs&y=2005
Movie data elements are missing!
Processing Record 5 of 1248| Tokyo+Godfathers (2003)
http://www.omdbapi.com/?apikey=trilogy&t=Tokyo+Godfathers&y=2003
Processing Record 6 of 1248| 3+Idiotas (2017)
http://www.omdbapi.com/?apikey=trilogy&t=3+Idiotas&y=2017
Movie data elements are missing!
Processing Record 7 of 1248| 3:10+to+Yuma (2007)
http://www.omdbapi.com/?apikey=trilogy&t=3:10+to+Yuma&y=2007
Processing Record 8 of 1248| Nineteen+Eighty-Four (1984)
http://www.omdbapi.com/?apikey=trilogy&t=Nineteen+Eighty-F

Processing Record 67 of 1248| Born+Yesterday (1993)
http://www.omdbapi.com/?apikey=trilogy&t=Born+Yesterday&y=1993
Processing Record 68 of 1248| Boudu (2005)
http://www.omdbapi.com/?apikey=trilogy&t=Boudu&y=2005
Movie data elements are missing!
Processing Record 69 of 1248| The+Bounty (1984)
http://www.omdbapi.com/?apikey=trilogy&t=The+Bounty&y=1984
Processing Record 70 of 1248| The+Brasher+Doubloon (1947)
http://www.omdbapi.com/?apikey=trilogy&t=The+Brasher+Doubloon&y=1947
Movie data elements are missing!
Processing Record 71 of 1248| Breathless (1983)
http://www.omdbapi.com/?apikey=trilogy&t=Breathless&y=1983
Processing Record 72 of 1248| Brewster's+Millions (1985)
http://www.omdbapi.com/?apikey=trilogy&t=Brewster's+Millions&y=1985
Processing Record 73 of 1248| Bride+Wars (2015)
http://www.omdbapi.com/?apikey=trilogy&t=Bride+Wars&y=2015
Movie data elements are missing!
Processing Record 74 of 1248| Brian's+Song (2001)
http://www.omdbapi.com/?apikey=trilogy&t=Brian's+Song&y=2001
Movie

Processing Record 134 of 1248| Le+Deuxième+Souffle (2007)
http://www.omdbapi.com/?apikey=trilogy&t=Le+Deuxième+Souffle&y=2007
Movie data elements are missing!
Processing Record 135 of 1248| Diabolique (1996)
http://www.omdbapi.com/?apikey=trilogy&t=Diabolique&y=1996
Processing Record 136 of 1248| Diamond+Horseshoe (1945)
http://www.omdbapi.com/?apikey=trilogy&t=Diamond+Horseshoe&y=1945
Movie data elements are missing!
Processing Record 137 of 1248| Dirty+Rotten+Scoundrels (1988)
http://www.omdbapi.com/?apikey=trilogy&t=Dirty+Rotten+Scoundrels&y=1988
Processing Record 138 of 1248| Dr.+Jekyll+and+Mr.+Hyde (1941)
http://www.omdbapi.com/?apikey=trilogy&t=Dr.+Jekyll+and+Mr.+Hyde&y=1941
Processing Record 139 of 1248| Dr.+M (1990)
http://www.omdbapi.com/?apikey=trilogy&t=Dr.+M&y=1990
Movie data elements are missing!
Processing Record 140 of 1248| Drums+of+Fu+Manchu (1940)
http://www.omdbapi.com/?apikey=trilogy&t=Drums+of+Fu+Manchu&y=1940
Movie data elements are missing!
Processing Record 141 

Processing Record 201 of 1248| Geronimo (1939)
http://www.omdbapi.com/?apikey=trilogy&t=Geronimo&y=1939
Movie data elements are missing!
Processing Record 202 of 1248| Gharshana (2004)
http://www.omdbapi.com/?apikey=trilogy&t=Gharshana&y=2004
Movie data elements are missing!
Processing Record 203 of 1248| Ghostbusters (2016)
http://www.omdbapi.com/?apikey=trilogy&t=Ghostbusters&y=2016
Processing Record 204 of 1248| Ghost+Theater (2015)
http://www.omdbapi.com/?apikey=trilogy&t=Ghost+Theater&y=2015
Movie data elements are missing!
Processing Record 205 of 1248| Gloria (1999)
http://www.omdbapi.com/?apikey=trilogy&t=Gloria&y=1999
Processing Record 206 of 1248| Godzilla (1998)
http://www.omdbapi.com/?apikey=trilogy&t=Godzilla&y=1998
Processing Record 207 of 1248| Going+in+Style (2017)
http://www.omdbapi.com/?apikey=trilogy&t=Going+in+Style&y=2017
Processing Record 208 of 1248| Gone+in+60+Seconds (2000)
http://www.omdbapi.com/?apikey=trilogy&t=Gone+in+60+Seconds&y=2000
Processing Record 209

Processing Record 262 of 1248| Intersection (1994)
http://www.omdbapi.com/?apikey=trilogy&t=Intersection&y=1994
Processing Record 263 of 1248| Intermezzo (1939)
http://www.omdbapi.com/?apikey=trilogy&t=Intermezzo&y=1939
Processing Record 264 of 1248| Interview (2007)
http://www.omdbapi.com/?apikey=trilogy&t=Interview&y=2007
Processing Record 265 of 1248| Invaders+from+Mars (1986)
http://www.omdbapi.com/?apikey=trilogy&t=Invaders+from+Mars&y=1986
Processing Record 266 of 1248| The+Invasion (2007)
http://www.omdbapi.com/?apikey=trilogy&t=The+Invasion&y=2007
Processing Record 267 of 1248| Irma+la+Douce (1972)
http://www.omdbapi.com/?apikey=trilogy&t=Irma+la+Douce&y=1972
Movie data elements are missing!
Processing Record 268 of 1248| Iron+Maze (1991)
http://www.omdbapi.com/?apikey=trilogy&t=Iron+Maze&y=1991
Movie data elements are missing!
Processing Record 269 of 1248| The+Island+of+Dr.+Moreau (1996)
http://www.omdbapi.com/?apikey=trilogy&t=The+Island+of+Dr.+Moreau&y=1996
Processing Recor

Processing Record 325 of 1248| Lure+of+the+Wilderness (1952)
http://www.omdbapi.com/?apikey=trilogy&t=Lure+of+the+Wilderness&y=1952
Movie data elements are missing!
Processing Record 326 of 1248| M (1951)
http://www.omdbapi.com/?apikey=trilogy&t=M&y=1951
Movie data elements are missing!
Processing Record 327 of 1248| Mädchen+in+Uniform (1958)
http://www.omdbapi.com/?apikey=trilogy&t=Mädchen+in+Uniform&y=1958
Movie data elements are missing!
Processing Record 328 of 1248| Mary (1931)
http://www.omdbapi.com/?apikey=trilogy&t=Mary&y=1931
Movie data elements are missing!
Processing Record 329 of 1248| The+Magnificent+Ambersons (2002)
http://www.omdbapi.com/?apikey=trilogy&t=The+Magnificent+Ambersons&y=2002
Movie data elements are missing!
Processing Record 330 of 1248| Magnificent+Obsession (1954)
http://www.omdbapi.com/?apikey=trilogy&t=Magnificent+Obsession&y=1954
Processing Record 331 of 1248| The+Magnificent+Seven (2016)
http://www.omdbapi.com/?apikey=trilogy&t=The+Magnificent+Seven&y=

Processing Record 388 of 1248| Night+and+the+City (1992)
http://www.omdbapi.com/?apikey=trilogy&t=Night+and+the+City&y=1992
Processing Record 389 of 1248| Night+Must+Fall (1964)
http://www.omdbapi.com/?apikey=trilogy&t=Night+Must+Fall&y=1964
Movie data elements are missing!
Processing Record 390 of 1248| Night+of+the+Hunter (1991)
http://www.omdbapi.com/?apikey=trilogy&t=Night+of+the+Hunter&y=1991
Movie data elements are missing!
Processing Record 391 of 1248| A+Nightmare+on+Elm+Street (2010)
http://www.omdbapi.com/?apikey=trilogy&t=A+Nightmare+on+Elm+Street&y=2010
Processing Record 392 of 1248| Night+of+the+Demons (2009)
http://www.omdbapi.com/?apikey=trilogy&t=Night+of+the+Demons&y=2009
Processing Record 393 of 1248| Night+of+the+Living+Dead+3D (2006)
http://www.omdbapi.com/?apikey=trilogy&t=Night+of+the+Living+Dead+3D&y=2006
Processing Record 394 of 1248| Nightwatch (1997)
http://www.omdbapi.com/?apikey=trilogy&t=Nightwatch&y=1997
Processing Record 395 of 1248| Nine+Months (1995)
ht

Processing Record 452 of 1248| Prom+Night (2008)
http://www.omdbapi.com/?apikey=trilogy&t=Prom+Night&y=2008
Processing Record 453 of 1248| Psycho (1998)
http://www.omdbapi.com/?apikey=trilogy&t=Psycho&y=1998
Processing Record 454 of 1248| Pulse (2006)
http://www.omdbapi.com/?apikey=trilogy&t=Pulse&y=2006
Processing Record 455 of 1248| Quarantine (2008)
http://www.omdbapi.com/?apikey=trilogy&t=Quarantine&y=2008
Processing Record 456 of 1248| The+Rainmaker (1982)
http://www.omdbapi.com/?apikey=trilogy&t=The+Rainmaker&y=1982
Movie data elements are missing!
Processing Record 457 of 1248| Ram+Gopal+Varma+Ki+Aag (2007)
http://www.omdbapi.com/?apikey=trilogy&t=Ram+Gopal+Varma+Ki+Aag&y=2007
Movie data elements are missing!
Processing Record 458 of 1248| Rangrezz (2013)
http://www.omdbapi.com/?apikey=trilogy&t=Rangrezz&y=2013
Movie data elements are missing!
Processing Record 459 of 1248| Ransom (1996)
http://www.omdbapi.com/?apikey=trilogy&t=Ransom&y=1996
Processing Record 460 of 1248| The+Ra

Processing Record 515 of 1248| The+Sign+of+the+Cross (1932)
http://www.omdbapi.com/?apikey=trilogy&t=The+Sign+of+the+Cross&y=1932
Processing Record 516 of 1248| Sleeping+Car+to+Trieste (1948)
http://www.omdbapi.com/?apikey=trilogy&t=Sleeping+Car+to+Trieste&y=1948
Movie data elements are missing!
Processing Record 517 of 1248| So+Big (1953)
http://www.omdbapi.com/?apikey=trilogy&t=So+Big&y=1953
Movie data elements are missing!
Processing Record 518 of 1248| Solaris (2002)
http://www.omdbapi.com/?apikey=trilogy&t=Solaris&y=2002
Processing Record 519 of 1248| Solstice (2008)
http://www.omdbapi.com/?apikey=trilogy&t=Solstice&y=2008
Movie data elements are missing!
Processing Record 520 of 1248| A+Song+Is+Born (1948)
http://www.omdbapi.com/?apikey=trilogy&t=A+Song+Is+Born&y=1948
Processing Record 521 of 1248| Sorcerer (1977)
http://www.omdbapi.com/?apikey=trilogy&t=Sorcerer&y=1977
Processing Record 522 of 1248| Sorority+Row (2009)
http://www.omdbapi.com/?apikey=trilogy&t=Sorority+Row&y=2009

Processing Record 581 of 1248| The+Unfaithful (1947)
http://www.omdbapi.com/?apikey=trilogy&t=The+Unfaithful&y=1947
Movie data elements are missing!
Processing Record 582 of 1248| The+Unholy+Three (1930)
http://www.omdbapi.com/?apikey=trilogy&t=The+Unholy+Three&y=1930
Movie data elements are missing!
Processing Record 583 of 1248| The+Unknown+Soldier (1985)
http://www.omdbapi.com/?apikey=trilogy&t=The+Unknown+Soldier&y=1985
Movie data elements are missing!
Processing Record 584 of 1248| The+Upside (2019)
http://www.omdbapi.com/?apikey=trilogy&t=The+Upside&y=2019
Movie data elements are missing!
Processing Record 585 of 1248| Vanilla+Sky (2001)
http://www.omdbapi.com/?apikey=trilogy&t=Vanilla+Sky&y=2001
Processing Record 586 of 1248| The+Vanishing (1993)
http://www.omdbapi.com/?apikey=trilogy&t=The+Vanishing&y=1993
Processing Record 587 of 1248| Vanishing+Point (1997)
http://www.omdbapi.com/?apikey=trilogy&t=Vanishing+Point&y=1997
Movie data elements are missing!
Processing Record 588 o

Processing Record 644 of 1248| The+Amityville+Horror (1979)
http://www.omdbapi.com/?apikey=trilogy&t=The+Amityville+Horror&y=1979
Processing Record 645 of 1248| Love (1927)
http://www.omdbapi.com/?apikey=trilogy&t=Love&y=1927
Movie data elements are missing!
Processing Record 646 of 1248| Et+Dieu...+créa+la+femme (1956)
http://www.omdbapi.com/?apikey=trilogy&t=Et+Dieu...+créa+la+femme&y=1956
Movie data elements are missing!
Processing Record 647 of 1248| And+Soon+the+Darkness (1970)
http://www.omdbapi.com/?apikey=trilogy&t=And+Soon+the+Darkness&y=1970
Processing Record 648 of 1248| Angels+in+the+Outfield (1951)
http://www.omdbapi.com/?apikey=trilogy&t=Angels+in+the+Outfield&y=1951
Movie data elements are missing!
Processing Record 649 of 1248| Angel+on+My+Shoulder (1946)
http://www.omdbapi.com/?apikey=trilogy&t=Angel+on+My+Shoulder&y=1946
Movie data elements are missing!
Processing Record 650 of 1248| Annie (1982)
http://www.omdbapi.com/?apikey=trilogy&t=Annie&y=1982
Processing Record 

Processing Record 707 of 1248| The+Cabinet+of+Dr.+Caligari (1920)
http://www.omdbapi.com/?apikey=trilogy&t=The+Cabinet+of+Dr.+Caligari&y=1920
Processing Record 708 of 1248| The+Kennel+Murder+Case (1933)
http://www.omdbapi.com/?apikey=trilogy&t=The+Kennel+Murder+Case&y=1933
Processing Record 709 of 1248| The+Call+of+the+Wild (1923)
http://www.omdbapi.com/?apikey=trilogy&t=The+Call+of+the+Wild&y=1923
Movie data elements are missing!
Processing Record 710 of 1248| Cape+Fear (1962)
http://www.omdbapi.com/?apikey=trilogy&t=Cape+Fear&y=1962
Processing Record 711 of 1248| The+Captain's+Table (1959)
http://www.omdbapi.com/?apikey=trilogy&t=The+Captain's+Table&y=1959
Movie data elements are missing!
Processing Record 712 of 1248| Carnival+of+Souls (1962)
http://www.omdbapi.com/?apikey=trilogy&t=Carnival+of+Souls&y=1962
Processing Record 713 of 1248| Carrie (1976)
http://www.omdbapi.com/?apikey=trilogy&t=Carrie&y=1976
Processing Record 714 of 1248| Casino+Royale (1954)
http://www.omdbapi.com/?ap

Processing Record 771 of 1248| Here+Comes+Mr.+Jordan (1941)
http://www.omdbapi.com/?apikey=trilogy&t=Here+Comes+Mr.+Jordan&y=1941
Processing Record 772 of 1248| Dracula (1931)
http://www.omdbapi.com/?apikey=trilogy&t=Dracula&y=1931
Processing Record 773 of 1248| Not+So+Dumb (1930)
http://www.omdbapi.com/?apikey=trilogy&t=Not+So+Dumb&y=1930
Movie data elements are missing!
Processing Record 774 of 1248| Dumbo (1941)
http://www.omdbapi.com/?apikey=trilogy&t=Dumbo&y=1941
Processing Record 775 of 1248| East+of+Eden (1955)
http://www.omdbapi.com/?apikey=trilogy&t=East+of+Eden&y=1955
Processing Record 776 of 1248| Easy+Virtue (1928)
http://www.omdbapi.com/?apikey=trilogy&t=Easy+Virtue&y=1928
Processing Record 777 of 1248| Sigaw (2004)
http://www.omdbapi.com/?apikey=trilogy&t=Sigaw&y=2004
Movie data elements are missing!
Processing Record 778 of 1248| Louis+19,+King+of+the+Airwaves (Louis 19, le roi des ondes)
http://www.omdbapi.com/?apikey=trilogy&t=Louis+19,+King+of+the+Airwaves&y=Louis 19,

Processing Record 838 of 1248| Great+Expectations (1917)
http://www.omdbapi.com/?apikey=trilogy&t=Great+Expectations&y=1917
Movie data elements are missing!
Processing Record 839 of 1248| Waltzes+from+Vienna (1934)
http://www.omdbapi.com/?apikey=trilogy&t=Waltzes+from+Vienna&y=1934
Movie data elements are missing!
Processing Record 840 of 1248| To+Have+and+Have+Not (1944)
http://www.omdbapi.com/?apikey=trilogy&t=To+Have+and+Have+Not&y=1944
Processing Record 841 of 1248| Guess+Who's+Coming+to+Dinner (1967)
http://www.omdbapi.com/?apikey=trilogy&t=Guess+Who's+Coming+to+Dinner&y=1967
Processing Record 842 of 1248| Gulliver's+Travels (1939)
http://www.omdbapi.com/?apikey=trilogy&t=Gulliver's+Travels&y=1939
Processing Record 843 of 1248| Halloween (1978)
http://www.omdbapi.com/?apikey=trilogy&t=Halloween&y=1978
Processing Record 844 of 1248| The+Beast+with+Five+Fingers (1946)
http://www.omdbapi.com/?apikey=trilogy&t=The+Beast+with+Five+Fingers&y=1946
Processing Record 845 of 1248| The+Hands

Processing Record 901 of 1248| The+Jazz+Singer (1927)
http://www.omdbapi.com/?apikey=trilogy&t=The+Jazz+Singer&y=1927
Processing Record 902 of 1248| Jigoku (1960)
http://www.omdbapi.com/?apikey=trilogy&t=Jigoku&y=1960
Movie data elements are missing!
Processing Record 903 of 1248| Journey+into+Fear (1943)
http://www.omdbapi.com/?apikey=trilogy&t=Journey+into+Fear&y=1943
Movie data elements are missing!
Processing Record 904 of 1248| Journey's+End (1930)
http://www.omdbapi.com/?apikey=trilogy&t=Journey's+End&y=1930
Movie data elements are missing!
Processing Record 905 of 1248| Jungle+Book (1942)
http://www.omdbapi.com/?apikey=trilogy&t=Jungle+Book&y=1942
Processing Record 906 of 1248| Un+indien+dans+la+ville (1994)
http://www.omdbapi.com/?apikey=trilogy&t=Un+indien+dans+la+ville&y=1994
Movie data elements are missing!
Processing Record 907 of 1248| Les+Visiteurs (1993)
http://www.omdbapi.com/?apikey=trilogy&t=Les+Visiteurs&y=1993
Movie data elements are missing!
Processing Record 908 o

Processing Record 965 of 1248| Marcelino+Pan+y+Vino (1955)
http://www.omdbapi.com/?apikey=trilogy&t=Marcelino+Pan+y+Vino&y=1955
Movie data elements are missing!
Processing Record 966 of 1248| Marilyn+Hotchkiss'+Ballroom+Dancing+and+Charm+School (1990)
http://www.omdbapi.com/?apikey=trilogy&t=Marilyn+Hotchkiss'+Ballroom+Dancing+and+Charm+School&y=1990
Movie data elements are missing!
Processing Record 967 of 1248| London+After+Midnight (1927)
http://www.omdbapi.com/?apikey=trilogy&t=London+After+Midnight&y=1927
Movie data elements are missing!
Processing Record 968 of 1248| The+Mark+of+Zorro (1920)
http://www.omdbapi.com/?apikey=trilogy&t=The+Mark+of+Zorro&y=1920
Processing Record 969 of 1248| Martyrs (2008)
http://www.omdbapi.com/?apikey=trilogy&t=Martyrs&y=2008
Processing Record 970 of 1248| Midnight (1939)
http://www.omdbapi.com/?apikey=trilogy&t=Midnight&y=1939
Processing Record 971 of 1248| The+Mechanic (1972)
http://www.omdbapi.com/?apikey=trilogy&t=The+Mechanic&y=1972
Processing 

Processing Record 1028 of 1248| It+Started+with+Eve (1941)
http://www.omdbapi.com/?apikey=trilogy&t=It+Started+with+Eve&y=1941
Movie data elements are missing!
Processing Record 1029 of 1248| Of+Human+Bondage (1934)
http://www.omdbapi.com/?apikey=trilogy&t=Of+Human+Bondage&y=1934
Processing Record 1030 of 1248| Of+Mice+and+Men (1939)
http://www.omdbapi.com/?apikey=trilogy&t=Of+Mice+and+Men&y=1939
Processing Record 1031 of 1248| The+Old+Dark+House (1932)
http://www.omdbapi.com/?apikey=trilogy&t=The+Old+Dark+House&y=1932
Processing Record 1032 of 1248| Oldboy (2003)
http://www.omdbapi.com/?apikey=trilogy&t=Oldboy&y=2003
Processing Record 1033 of 1248| Oliver+Twist (1922)
http://www.omdbapi.com/?apikey=trilogy&t=Oliver+Twist&y=1922
Movie data elements are missing!
Processing Record 1034 of 1248| The+Omen (1976)
http://www.omdbapi.com/?apikey=trilogy&t=The+Omen&y=1976
Processing Record 1035 of 1248| On+the+Beach (1959)
http://www.omdbapi.com/?apikey=trilogy&t=On+the+Beach&y=1959
Processing

Processing Record 1094 of 1248| Broadway+Bill (1934)
http://www.omdbapi.com/?apikey=trilogy&t=Broadway+Bill&y=1934
Processing Record 1095 of 1248| Ringu (1998)
http://www.omdbapi.com/?apikey=trilogy&t=Ringu&y=1998
Processing Record 1096 of 1248| Rio+Bravo (1959)
http://www.omdbapi.com/?apikey=trilogy&t=Rio+Bravo&y=1959
Processing Record 1097 of 1248| Robin+Hood (1922)
http://www.omdbapi.com/?apikey=trilogy&t=Robin+Hood&y=1922
Processing Record 1098 of 1248| RoboCop (1987)
http://www.omdbapi.com/?apikey=trilogy&t=RoboCop&y=1987
Processing Record 1099 of 1248| The+Miracle+of+Morgan's+Creek (1944)
http://www.omdbapi.com/?apikey=trilogy&t=The+Miracle+of+Morgan's+Creek&y=1944
Movie data elements are missing!
Processing Record 1100 of 1248| Rollerball (1975)
http://www.omdbapi.com/?apikey=trilogy&t=Rollerball&y=1975
Processing Record 1101 of 1248| Man+Hunt (1941)
http://www.omdbapi.com/?apikey=trilogy&t=Man+Hunt&y=1941
Processing Record 1102 of 1248| The+Roman+Spring+of+Mrs.+Stone (1961)
htt

Processing Record 1157 of 1248| Dark+Victory (1939)
http://www.omdbapi.com/?apikey=trilogy&t=Dark+Victory&y=1939
Processing Record 1158 of 1248| Straw+Dogs (1971)
http://www.omdbapi.com/?apikey=trilogy&t=Straw+Dogs&y=1971
Processing Record 1159 of 1248| The+Student+of+Prague (1913)
http://www.omdbapi.com/?apikey=trilogy&t=The+Student+of+Prague&y=1913
Movie data elements are missing!
Processing Record 1160 of 1248| Judge+Priest (1934)
http://www.omdbapi.com/?apikey=trilogy&t=Judge+Priest&y=1934
Processing Record 1161 of 1248| The+Sunshine+Boys (1975)
http://www.omdbapi.com/?apikey=trilogy&t=The+Sunshine+Boys&y=1975
Processing Record 1162 of 1248| Svengali (1931)
http://www.omdbapi.com/?apikey=trilogy&t=Svengali&y=1931
Processing Record 1163 of 1248| The+Swan (1925)
http://www.omdbapi.com/?apikey=trilogy&t=The+Swan&y=1925
Movie data elements are missing!
Processing Record 1164 of 1248| Miss+Granny (2014)
http://www.omdbapi.com/?apikey=trilogy&t=Miss+Granny&y=2014
Processing Record 1165 o

Processing Record 1219 of 1248| Walking+Tall (1973)
http://www.omdbapi.com/?apikey=trilogy&t=Walking+Tall&y=1973
Processing Record 1220 of 1248| The+War+of+the+Worlds (1953)
http://www.omdbapi.com/?apikey=trilogy&t=The+War+of+the+Worlds&y=1953
Processing Record 1221 of 1248| Castle+of+Blood (1964)
http://www.omdbapi.com/?apikey=trilogy&t=Castle+of+Blood&y=1964
Movie data elements are missing!
Processing Record 1222 of 1248| Stepmom (1998)
http://www.omdbapi.com/?apikey=trilogy&t=Stepmom&y=1998
Processing Record 1223 of 1248| Big+Deal+on+Madonna+Street (1958)
http://www.omdbapi.com/?apikey=trilogy&t=Big+Deal+on+Madonna+Street&y=1958
Processing Record 1224 of 1248| We're+No+Angels (1955)
http://www.omdbapi.com/?apikey=trilogy&t=We're+No+Angels&y=1955
Movie data elements are missing!
Processing Record 1225 of 1248| The+Reluctant+Debutante (1958)
http://www.omdbapi.com/?apikey=trilogy&t=The+Reluctant+Debutante&y=1958
Processing Record 1226 of 1248| When+a+Stranger+Calls (1979)
http://www.o

In [16]:
len(response_title) #all results return 1131 entries unless otherwise specified
len(response_year)
len(response_rated)
len(response_runtime)
len(response_genre)
len(response_director)
len(response_writer) 
len(response_actors)
len(response_plot)
len(response_language)
len(response_country)
len(response_awards)
len(response_metascore)
len(response_rottentomatoes) #returns 737
len(response_imdbRating) 
len(response_imdbVotes) 
len(response_imdbID) 
len(response_boxoffice) # returns 1116
len(response_production) # returns 1116

1116

In [17]:
#Create Data Frame 
imdb_df = pd.DataFrame({
                        "Title": response_title,
                        "Year": response_year,
                        "Rated": response_rated,
                        "Runtime": response_runtime,
                        "Genre": response_genre,
                        "Director": response_director,
                        "Writer": response_writer,
                        "Actors": response_actors,
                        "Plot": response_plot,
                        "Language": response_language,
                        "Country": response_country,
                        "Awards": response_awards,
                        "Metacritic Rating": response_metascore,
                        "IMDb Rating": response_imdbRating,
                        "IMDb Votes": response_imdbVotes,
                        "IMDb ID": response_imdbID
#                         "Production": response_production,
#                         "Box Office (Domestic Gross)": response_boxoffice
#                         "Rotten Tomatoes Rating": response_rottentomatoes
                        })


#Checked row count out of 1248 films passed through. 
#Returned 1131 out of 1248 films (~90.6% of results.)
#Better result than expected given that many of the films had special characters or slightly different film titles than listed in Wikipedia
imdb_df.count()

imdb_df

Unnamed: 0,Title,Year,Rated,Runtime,Genre,Director,Writer,Actors,Plot,Language,Country,Awards,Metacritic Rating,IMDb Rating,IMDb Votes,IMDb ID
0,13,2010,R,91 min,"Crime, Drama, Thriller",Géla Babluani,"Géla Babluani (screenplay), Greg Pruss (screen...","Sam Riley, Alice Barrett, Gaby Hoffmann, Jason...",A naive young man assumes a dead man's identit...,English,USA,1 win & 1 nomination.,29,6.1,38984,tt0798817
1,The 13th Letter,1951,,85 min,"Film-Noir, Mystery",Otto Preminger,"Howard Koch (screen play), Louis Chavance (sto...","Linda Darnell, Charles Boyer, Michael Rennie, ...",Respectable citizens receive anonymous letters...,English,USA,,,6.6,244,tt0043251
2,101 Dalmatians,1996,G,103 min,"Adventure, Comedy, Crime, Family",Stephen Herek,"Dodie Smith (novel), John Hughes (screenplay)","Glenn Close, Jeff Daniels, Joely Richardson, J...",An evil high-fashion designer plots to steal D...,"English, Spanish","USA, UK",Nominated for 1 Golden Globe. Another 3 wins &...,49,5.7,95152,tt0115433
3,2001 Maniacs,2005,R,87 min,"Comedy, Horror",Tim Sullivan,"Chris Kobin, Tim Sullivan","Robert Englund, Lin Shaye, Giuseppe Andrews, J...",Eight college students travelling to Florida f...,English,USA,1 nomination.,,5.3,11567,tt0264323
4,Tokyo Godfathers,2003,PG-13,92 min,"Animation, Adventure, Comedy, Drama","Satoshi Kon, Shôgo Furuya(co-director)","Satoshi Kon (screenplay), Satoshi Kon (story),...","Tôru Emori, Aya Okamoto, Yoshiaki Umegaki, Shô...","On Christmas Eve, three homeless people living...","Japanese, Spanish, English",Japan,8 wins & 1 nomination.,73,7.8,27570,tt0388473
5,3 Idiotas,2017,PG-13,106 min,"Comedy, Family",Carlos Bolado,"Antonio Abascal (screenplay by), Carlos Bolado...","Martha Higareda, Alfonso Dosal, Christian Vazq...",A group of friends embark on a fun adventure d...,Spanish,Mexico,1 nomination.,,4.2,707,tt3685624
6,3:10 to Yuma,2007,R,122 min,"Action, Crime, Drama, Western",James Mangold,"Halsted Welles (screenplay), Michael Brandt (s...","Russell Crowe, Christian Bale, Logan Lerman, D...",A small-time rancher agrees to hold a captured...,"English, Chinese",USA,Nominated for 2 Oscars. Another 3 wins & 30 no...,76,7.7,274864,tt0381849
7,A Southern Yankee,1948,NOT RATED,90 min,"Comedy, History, War, Western",Edward Sedgwick,"Melvin Frank (story), Norman Panama (story), H...","Red Skelton, Brian Donlevy, Arlene Dahl, Georg...",A hapless bellboy in a St. Louis hotel near th...,English,USA,,,6.8,424,tt0040825
8,Adventures in Babysitting,2016,TV-G,105 min,"Adventure, Comedy, Family",John Schultz,"Tiffany Paulsen, David Simkins (Original Story)","Sabrina Carpenter, Sofia Carson, Nikki Hahn, M...","Two teen rival babysitters, Jenny and Lola, te...",English,USA,2 nominations.,,6.0,2497,tt4456850
9,An Affair to Remember,1957,Not Rated,115 min,"Drama, Romance",Leo McCarey,"Delmer Daves (screenplay), Leo McCarey (screen...","Cary Grant, Deborah Kerr, Richard Denning, Nev...",A couple falls in love and agrees to meet in s...,"English, French, Italian",USA,Nominated for 4 Oscars. Another 2 wins & 2 nom...,,7.5,26079,tt0050105


In [42]:
#check for nulls in imdbRating and imdbID
missing_imdbRating = imdb_df.loc[imdb_df["IMDb Rating"].isnull()]
missing_imdbID = imdb_df.loc[imdb_df["IMDb ID"].isnull()]

missing_imdbRating_count = len(missing_imdbRating)
missing_imdbID_count = len(missing_imdbID)

print(f"There are {missing_imdbRating_count} null values in IMDb Rating & {missing_imdbID_count} null values in IMDb ID")
      
      

There are 0 null values in IMDb Rating & 0 null values in IMDb ID


In [43]:
#check for N/A in imdbRating and imdbID
na_imdbRating = imdb_df.loc[imdb_df["IMDb Rating"].isna()]
na_imdbID = imdb_df.loc[imdb_df["IMDb ID"].isna()]

na_imdbRating_count = len(na_imdbRating)
na_imdbID_count = len(na_imdbID)

print(f"There are {missing_imdbRating_count} N/A values in IMDb Rating & {missing_imdbID_count} N/A values in IMDb ID")
      

There are 0 N/A values in IMDb Rating & 0 N/A values in IMDb ID
