In [1]:
# dependencies
import pandas as pd
import requests
import json
from datetime import date
from omdb_config import api_key

In [2]:
# export dataframe of results with today's date
today = date.today()
d_today = today.strftime("%Y_%m_%d")

In [3]:
# read in the data for top 100 movies
top_100df = pd.read_csv("output_data/Top100_Movies_2010-2021.csv")

In [4]:
# set the year of movies we are collecting data for
year = 2020

In [5]:
# filter DataFrame for the 100 movies in the specified year
top_100 = top_100df.loc[top_100df["Year"] == year]
# create a list of the top 100 movies to iterate through
movies = top_100["Release"].tolist()
print(movies)

['Bad Boys for Life', '1917', 'Sonic the Hedgehog', 'Jumanji: The Next Level', 'Star Wars: Episode IX - The Rise of Skywalker', 'Birds of Prey', 'Dolittle', 'Little Women', 'The Invisible Man', 'The Call of the Wild', 'Onward', 'Knives Out', 'Frozen II', 'Tenet', 'Spies in Disguise', 'The Gentlemen', 'Just Mercy', 'The Croods: A New Age', 'Parasite', 'Fantasy Island', 'Uncut Gems', 'The New Mutants', 'Like a Boss', 'The Grudge', 'Unhinged', 'The Photograph', 'The War with Grandpa', 'Underwater', 'Wonder Woman 1984', 'The Turning', 'Gretel & Hansel', 'Honest Thief', 'My Hero Academia: Heroes Rising', 'Bombshell', 'The Way Back', 'Brahms: The Boy II', 'Jojo Rabbit', 'Impractical Jokers: The Movie', 'Ford v Ferrari', 'Emma.', 'Bloodshot', 'I Still Believe', 'Come Play', 'Let Him Go', 'Freaky', 'Downhill', 'Weathering with You', 'Cats', 'The Hunt', 'The Rhythm Section', 'Monster Hunter', 'A Beautiful Day in the Neighborhood', 'Hocus Pocus2020 Re-release', 'Richard Jewell', 'The SpongeBob M

In [6]:
# create variables to store url
url = "http://www.omdbapi.com/?t="
api_key = "&apikey=" + api_key

In [7]:
# for loop that iterates over the movie list, performing a request for each movie...
# ...and appending the data into empty lists for a DataFrame
title = []
year_ = []
rated = []
released = []
runtime = []
genre = []
director = []
actors = []
country = []
awards = []
rotten_tom = []
metascore = []
imdb_rating = []
imdb_votes = []
imdb_id = []

for movie in movies:
    try:
        response = requests.get(url + f"{movie}" + api_key)
        data = response.json()
    except:
        print(f"For some reason, OMDdb didn't have: {movie}.")
        
    try:
        title.append(data["Title"])
    except:
        title.append("NaN")
        print(f"No Title: {movie}.")
    try:
        year_.append(data["Year"])
    except:
        year_.append("NaN")
        print(f"No Year: {movie}.")
    try:
        rated.append(data["Rated"])
    except:
        rated.append("NaN")
        print(f"No Rated: {movie}.")
    try:
        released.append(data["Released"])
    except:
        released.append("NaN")
        print(f"No Released: {movie}.")
    try:
        runtime.append(data["Runtime"])
    except:
        runtime.append("NaN")
        print(f"No Runtime: {movie}.")
    try:
        genre.append(data["Genre"])
    except:
        genre.append("NaN")
        print(f"No Genre: {movie}.")
    try:
        director.append(data["Director"])
    except:
        director.append("NaN")
        print(f"No Director: {movie}.")
    try:
        actors.append(data["Actors"])
    except:
        actors.append("NaN")
        print(f"No Actors: {movie}.")
    try:
        country.append(data["Country"])
    except:
        country.append("NaN")
        print(f"No Country: {movie}.")
    try:
        awards.append(data["Awards"])
    except:
        awards.append("NaN")
        print(f"No Awards: {movie}.")
    try:
        rotten_tom.append(data["Ratings"][1]["Value"])
    except:
        rotten_tom.append("NaN")
        print(f"No Rotten Tomatoes: {movie}.")
    try:
        metascore.append(data["Metascore"])
    except:
        metascore.append("NaN")
        print(f"No Metascore: {movie}.")
    try:
        imdb_rating.append(data["imdbRating"])
    except:
        imdb_rating.append("NaN")
        print(f"No imdbRating: {movie}.")
    try:
        imdb_votes.append(data["imdbVotes"])
    except:
        imdb_votes.append("NaN")
        print(f"No imdbVotes: {movie}.")
    try:
        imdb_id.append(data["imdbID"])
    except:
        imdb_id.append("NaN")
        print(f"No imdbID: {movie}.")

No Rotten Tomatoes: Gretel & Hansel.
No Title: Hocus Pocus2020 Re-release.
No Year: Hocus Pocus2020 Re-release.
No Rated: Hocus Pocus2020 Re-release.
No Released: Hocus Pocus2020 Re-release.
No Runtime: Hocus Pocus2020 Re-release.
No Genre: Hocus Pocus2020 Re-release.
No Director: Hocus Pocus2020 Re-release.
No Actors: Hocus Pocus2020 Re-release.
No Country: Hocus Pocus2020 Re-release.
No Awards: Hocus Pocus2020 Re-release.
No Rotten Tomatoes: Hocus Pocus2020 Re-release.
No Metascore: Hocus Pocus2020 Re-release.
No imdbRating: Hocus Pocus2020 Re-release.
No imdbVotes: Hocus Pocus2020 Re-release.
No imdbID: Hocus Pocus2020 Re-release.
No Rotten Tomatoes: Infidel.
No Rotten Tomatoes: 2020 Oscar Nominated Short Films.
No Rotten Tomatoes: The Empty Man.
No Rotten Tomatoes: My Boyfriend's Meds.
No Title: Star Wars: Episode V - The Empire Strikes Back2020 Re-release.
No Year: Star Wars: Episode V - The Empire Strikes Back2020 Re-release.
No Rated: Star Wars: Episode V - The Empire Strikes Ba

In [8]:
print(len(title))

100


In [9]:
# create list of missing movies
#missing = ["Knight and Day2010 Re-release", "Percy Jackson & the Olympians: The Lightning Thief"]

# use list comprehension to remove missing movies
#movies = [movie for movie in movies if movie not in missing]

In [10]:
# creat DataFrame with the data collected in lists
omdb_df = pd.DataFrame({"Release": movies, "Title": title, "Year": year_,
                        "Rated": rated, "Released": released, "Runtime": runtime, "Genre": genre,
                        "Director": director, "Actors": actors, "Country": country, "Awards": awards,
                        "Rotten Tomatoes": rotten_tom, "Metascore": metascore, "IMDB": imdb_rating,
                        "IMDB Votes": imdb_votes, "IMDB id": imdb_id})
omdb_df

Unnamed: 0,Release,Title,Year,Rated,Released,Runtime,Genre,Director,Actors,Country,Awards,Rotten Tomatoes,Metascore,IMDB,IMDB Votes,IMDB id
0,Bad Boys for Life,Bad Boys for Life,2020,R,17 Jan 2020,124 min,"Action, Comedy, Crime, Thriller","Adil El Arbi, Bilall Fallah","Will Smith, Martin Lawrence, Vanessa Hudgens, ...","USA, Mexico",2 wins & 14 nominations.,77%,59,6.6,133505,tt1502397
1,1917,1917,2019,R,10 Jan 2020,119 min,"Drama, Thriller, War",Sam Mendes,"Dean-Charles Chapman, George MacKay, Daniel Ma...","USA, UK, India, Spain, Canada, China",Won 3 Oscars. Another 127 wins & 198 nominations.,89%,78,8.3,442036,tt8579674
2,Sonic the Hedgehog,Sonic the Hedgehog,2020,PG,14 Feb 2020,99 min,"Action, Adventure, Comedy, Sci-Fi",Jeff Fowler,"Ben Schwartz, James Marsden, Jim Carrey, Tika ...","USA, Japan, Canada",3 wins & 9 nominations.,63%,47,6.5,92987,tt3794354
3,Jumanji: The Next Level,Jumanji: The Next Level,2019,PG-13,13 Dec 2019,123 min,"Action, Adventure, Comedy, Fantasy",Jake Kasdan,"Dwayne Johnson, Kevin Hart, Jack Black, Karen ...",USA,1 win & 10 nominations.,71%,58,6.6,192270,tt7975244
4,Star Wars: Episode IX - The Rise of Skywalker,Star Wars: Episode IX - The Rise of Skywalker,2019,PG-13,20 Dec 2019,141 min,"Action, Adventure, Fantasy, Sci-Fi",J.J. Abrams,"Carrie Fisher, Mark Hamill, Adam Driver, Daisy...",USA,Nominated for 3 Oscars. Another 9 wins & 39 no...,51%,53,6.6,378434,tt2527338
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,Cut Throat City,Cut Throat City,2020,R,21 Aug 2020,123 min,"Action, Crime, Drama, Thriller",RZA,"Shameik Moore, Demetrius Shipp Jr., Denzel Whi...",USA,,67/100,67,4.9,1584,tt3547306
96,The Goonies2020 Re-release,,,,,,,,,,,,,,,
97,Sarileru Neekevvaru,Sarileru Neekevvaru,2020,Not Rated,10 Jan 2020,169 min,"Action, Comedy, Drama",Anil Ravipudi,"Mahesh Babu, Rashmika Mandanna, Vijayshanti, P...",India,,,,6.0,5364,tt10773090
98,Vanguard,Vanguard,2020,PG-13,20 Nov 2020,107 min,"Action, Adventure, Comedy, Thriller",Stanley Tong,"Jackie Chan, Yang Yang, Lun Ai, Miya Muqi",China,1 win & 1 nomination.,34/100,34,4.7,3126,tt9695722


In [11]:
# filter out the rows that are mostly NaN, using above intel
clean_omdb_df = omdb_df.loc[(omdb_df["Release"] != "Hocus Pocus2020 Re-release")\
                            & (omdb_df["Release"] != "Star Wars: Episode V - The Empire Strikes Back2020 Re-release")\
                            & (omdb_df["Release"] != "The Nightmare Before Christmas2020 Re-release")\
                            & (omdb_df["Release"] != "Elf2020 Re-release")\
                            & (omdb_df["Release"] != "Jurassic Park2020 Re-release")\
                            & (omdb_df["Release"] != "National Lampoon's Christmas Vacation2020 Re-release")\
                            & (omdb_df["Release"] != "The Goonies2020 Re-release")]
clean_omdb_df

Unnamed: 0,Release,Title,Year,Rated,Released,Runtime,Genre,Director,Actors,Country,Awards,Rotten Tomatoes,Metascore,IMDB,IMDB Votes,IMDB id
0,Bad Boys for Life,Bad Boys for Life,2020,R,17 Jan 2020,124 min,"Action, Comedy, Crime, Thriller","Adil El Arbi, Bilall Fallah","Will Smith, Martin Lawrence, Vanessa Hudgens, ...","USA, Mexico",2 wins & 14 nominations.,77%,59,6.6,133505,tt1502397
1,1917,1917,2019,R,10 Jan 2020,119 min,"Drama, Thriller, War",Sam Mendes,"Dean-Charles Chapman, George MacKay, Daniel Ma...","USA, UK, India, Spain, Canada, China",Won 3 Oscars. Another 127 wins & 198 nominations.,89%,78,8.3,442036,tt8579674
2,Sonic the Hedgehog,Sonic the Hedgehog,2020,PG,14 Feb 2020,99 min,"Action, Adventure, Comedy, Sci-Fi",Jeff Fowler,"Ben Schwartz, James Marsden, Jim Carrey, Tika ...","USA, Japan, Canada",3 wins & 9 nominations.,63%,47,6.5,92987,tt3794354
3,Jumanji: The Next Level,Jumanji: The Next Level,2019,PG-13,13 Dec 2019,123 min,"Action, Adventure, Comedy, Fantasy",Jake Kasdan,"Dwayne Johnson, Kevin Hart, Jack Black, Karen ...",USA,1 win & 10 nominations.,71%,58,6.6,192270,tt7975244
4,Star Wars: Episode IX - The Rise of Skywalker,Star Wars: Episode IX - The Rise of Skywalker,2019,PG-13,20 Dec 2019,141 min,"Action, Adventure, Fantasy, Sci-Fi",J.J. Abrams,"Carrie Fisher, Mark Hamill, Adam Driver, Daisy...",USA,Nominated for 3 Oscars. Another 9 wins & 39 no...,51%,53,6.6,378434,tt2527338
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
94,A Hidden Life,A Hidden Life,2019,PG-13,17 Jan 2020,174 min,"Biography, Drama, Romance, War",Terrence Malick,"August Diehl, Valerie Pachner, Maria Simon, Ka...","USA, UK, Germany",9 wins & 30 nominations.,81%,78,7.4,19103,tt5827916
95,Cut Throat City,Cut Throat City,2020,R,21 Aug 2020,123 min,"Action, Crime, Drama, Thriller",RZA,"Shameik Moore, Demetrius Shipp Jr., Denzel Whi...",USA,,67/100,67,4.9,1584,tt3547306
97,Sarileru Neekevvaru,Sarileru Neekevvaru,2020,Not Rated,10 Jan 2020,169 min,"Action, Comedy, Drama",Anil Ravipudi,"Mahesh Babu, Rashmika Mandanna, Vijayshanti, P...",India,,,,6.0,5364,tt10773090
98,Vanguard,Vanguard,2020,PG-13,20 Nov 2020,107 min,"Action, Adventure, Comedy, Thriller",Stanley Tong,"Jackie Chan, Yang Yang, Lun Ai, Miya Muqi",China,1 win & 1 nomination.,34/100,34,4.7,3126,tt9695722


In [None]:
# drop row by index
#clean_omdb_df = omdb_df.drop(1)
#clean_omdb_df

In [12]:
# create reference column to identify the top 100 films list they originated from
clean_omdb_df["Top100 Year"] = f"{year}"

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  clean_omdb_df["Top100 Year"] = f"{year}"


In [13]:
#export as csv for use elsewhere
clean_omdb_df.to_csv(f"output_data/raw/movie info/{d_today}_{year}_OMDb_Movie_details.csv", index=False, header=True)

In [14]:
clean_omdb_df

Unnamed: 0,Release,Title,Year,Rated,Released,Runtime,Genre,Director,Actors,Country,Awards,Rotten Tomatoes,Metascore,IMDB,IMDB Votes,IMDB id,Top100 Year
0,Bad Boys for Life,Bad Boys for Life,2020,R,17 Jan 2020,124 min,"Action, Comedy, Crime, Thriller","Adil El Arbi, Bilall Fallah","Will Smith, Martin Lawrence, Vanessa Hudgens, ...","USA, Mexico",2 wins & 14 nominations.,77%,59,6.6,133505,tt1502397,2020
1,1917,1917,2019,R,10 Jan 2020,119 min,"Drama, Thriller, War",Sam Mendes,"Dean-Charles Chapman, George MacKay, Daniel Ma...","USA, UK, India, Spain, Canada, China",Won 3 Oscars. Another 127 wins & 198 nominations.,89%,78,8.3,442036,tt8579674,2020
2,Sonic the Hedgehog,Sonic the Hedgehog,2020,PG,14 Feb 2020,99 min,"Action, Adventure, Comedy, Sci-Fi",Jeff Fowler,"Ben Schwartz, James Marsden, Jim Carrey, Tika ...","USA, Japan, Canada",3 wins & 9 nominations.,63%,47,6.5,92987,tt3794354,2020
3,Jumanji: The Next Level,Jumanji: The Next Level,2019,PG-13,13 Dec 2019,123 min,"Action, Adventure, Comedy, Fantasy",Jake Kasdan,"Dwayne Johnson, Kevin Hart, Jack Black, Karen ...",USA,1 win & 10 nominations.,71%,58,6.6,192270,tt7975244,2020
4,Star Wars: Episode IX - The Rise of Skywalker,Star Wars: Episode IX - The Rise of Skywalker,2019,PG-13,20 Dec 2019,141 min,"Action, Adventure, Fantasy, Sci-Fi",J.J. Abrams,"Carrie Fisher, Mark Hamill, Adam Driver, Daisy...",USA,Nominated for 3 Oscars. Another 9 wins & 39 no...,51%,53,6.6,378434,tt2527338,2020
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
94,A Hidden Life,A Hidden Life,2019,PG-13,17 Jan 2020,174 min,"Biography, Drama, Romance, War",Terrence Malick,"August Diehl, Valerie Pachner, Maria Simon, Ka...","USA, UK, Germany",9 wins & 30 nominations.,81%,78,7.4,19103,tt5827916,2020
95,Cut Throat City,Cut Throat City,2020,R,21 Aug 2020,123 min,"Action, Crime, Drama, Thriller",RZA,"Shameik Moore, Demetrius Shipp Jr., Denzel Whi...",USA,,67/100,67,4.9,1584,tt3547306,2020
97,Sarileru Neekevvaru,Sarileru Neekevvaru,2020,Not Rated,10 Jan 2020,169 min,"Action, Comedy, Drama",Anil Ravipudi,"Mahesh Babu, Rashmika Mandanna, Vijayshanti, P...",India,,,,6.0,5364,tt10773090,2020
98,Vanguard,Vanguard,2020,PG-13,20 Nov 2020,107 min,"Action, Adventure, Comedy, Thriller",Stanley Tong,"Jackie Chan, Yang Yang, Lun Ai, Miya Muqi",China,1 win & 1 nomination.,34/100,34,4.7,3126,tt9695722,2020
