In [1]:
# dependencies
import pandas as pd
import requests
import json
from datetime import date
from omdb_config import api_key

In [2]:
# export dataframe of results with today's date
today = date.today()
d_today = today.strftime("%Y_%m_%d")

In [3]:
# read in the data for top 100 movies
top_100df = pd.read_csv("output_data/Top100_Movies_2010-2021.csv")

In [4]:
# set the year of movies we are collecting data for
year = 2011

In [5]:
# filter DataFrame for the 100 movies in the specified year
top_100 = top_100df.loc[top_100df["Year"] == year]
# create a list of the top 100 movies to iterate through
movies = top_100["Release"].tolist()
print(movies)

['Harry Potter and the Deathly Hallows: Part 2', 'Transformers: Dark of the Moon', 'The Twilight Saga: Breaking Dawn - Part 1', 'The Hangover Part II', 'Pirates of the Caribbean: On Stranger Tides', 'Fast Five', 'Cars 2', 'Thor', 'Rise of the Planet of the Apes', 'Captain America: The First Avenger', 'The Help', 'Bridesmaids', 'Kung Fu Panda 2', 'X-Men: First Class', 'Puss in Boots', 'Rio', 'The Smurfs', 'Super 8', 'Sherlock Holmes: A Game of Shadows', 'Rango', 'Mission: Impossible - Ghost Protocol', "The King's Speech", 'Horrible Bosses', 'Green Lantern', 'Hop', 'Paranormal Activity 3', 'Just Go with It', 'True Grit', 'Bad Teacher', 'Cowboys & Aliens', 'Gnomeo & Juliet', 'The Green Hornet', 'The Lion King2011 3D Release', 'Alvin and the Chipmunks: Chipwrecked', 'Real Steel', 'Crazy, Stupid, Love.', 'Battle Los Angeles', 'Immortals', 'The Muppets', 'Zookeeper', 'Limitless', 'Tower Heist', 'Contagion', 'Moneyball', 'Justin Bieber: Never Say Never', 'Dolphin Tale', 'Jack and Jill', 'No S

In [6]:
# create variables to store url
url = "http://www.omdbapi.com/?t="
api_key = "&apikey=" + api_key

In [7]:
# for loop that iterates over the movie list, performing a request for each movie...
# ...and appending the data into empty lists for a DataFrame
title = []
year_ = []
rated = []
released = []
runtime = []
genre = []
director = []
actors = []
country = []
awards = []
rotten_tom = []
metascore = []
imdb_rating = []
imdb_votes = []
imdb_id = []

for movie in movies:
    try:
        response = requests.get(url + f"{movie}" + api_key)
        data = response.json()
    except:
        print(f"For some reason, OMDdb didn't have: {movie}.")
        
    try:
        title.append(data["Title"])
    except:
        title.append("NaN")
        print(f"No Title: {movie}.")
    try:
        year_.append(data["Year"])
    except:
        year_.append("NaN")
        print(f"No Year: {movie}.")
    try:
        rated.append(data["Rated"])
    except:
        rated.append("NaN")
        print(f"No Rated: {movie}.")
    try:
        released.append(data["Released"])
    except:
        released.append("NaN")
        print(f"No Released: {movie}.")
    try:
        runtime.append(data["Runtime"])
    except:
        runtime.append("NaN")
        print(f"No Runtime: {movie}.")
    try:
        genre.append(data["Genre"])
    except:
        genre.append("NaN")
        print(f"No Genre: {movie}.")
    try:
        director.append(data["Director"])
    except:
        director.append("NaN")
        print(f"No Director: {movie}.")
    try:
        actors.append(data["Actors"])
    except:
        actors.append("NaN")
        print(f"No Actors: {movie}.")
    try:
        country.append(data["Country"])
    except:
        country.append("NaN")
        print(f"No Country: {movie}.")
    try:
        awards.append(data["Awards"])
    except:
        awards.append("NaN")
        print(f"No Awards: {movie}.")
    try:
        rotten_tom.append(data["Ratings"][1]["Value"])
    except:
        rotten_tom.append("NaN")
        print(f"No Rotten Tomatoes: {movie}.")
    try:
        metascore.append(data["Metascore"])
    except:
        metascore.append("NaN")
        print(f"No Metascore: {movie}.")
    try:
        imdb_rating.append(data["imdbRating"])
    except:
        imdb_rating.append("NaN")
        print(f"No imdbRating: {movie}.")
    try:
        imdb_votes.append(data["imdbVotes"])
    except:
        imdb_votes.append("NaN")
        print(f"No imdbVotes: {movie}.")
    try:
        imdb_id.append(data["imdbID"])
    except:
        imdb_id.append("NaN")
        print(f"No imdbID: {movie}.")

No Rotten Tomatoes: Rise of the Planet of the Apes.
No Title: The Lion King2011 3D Release.
No Year: The Lion King2011 3D Release.
No Rated: The Lion King2011 3D Release.
No Released: The Lion King2011 3D Release.
No Runtime: The Lion King2011 3D Release.
No Genre: The Lion King2011 3D Release.
No Director: The Lion King2011 3D Release.
No Actors: The Lion King2011 3D Release.
No Country: The Lion King2011 3D Release.
No Awards: The Lion King2011 3D Release.
No Rotten Tomatoes: The Lion King2011 3D Release.
No Metascore: The Lion King2011 3D Release.
No imdbRating: The Lion King2011 3D Release.
No imdbVotes: The Lion King2011 3D Release.
No imdbID: The Lion King2011 3D Release.
No Title: Footloose2011 Re-release.
No Year: Footloose2011 Re-release.
No Rated: Footloose2011 Re-release.
No Released: Footloose2011 Re-release.
No Runtime: Footloose2011 Re-release.
No Genre: Footloose2011 Re-release.
No Director: Footloose2011 Re-release.
No Actors: Footloose2011 Re-release.
No Country: Footl

In [8]:
print(len(title))

100


In [None]:
# create list of missing movies
#missing = ["Knight and Day2010 Re-release", "Percy Jackson & the Olympians: The Lightning Thief"]

# use list comprehension to remove missing movies
#movies = [movie for movie in movies if movie not in missing]

In [9]:
# creat DataFrame with the data collected in lists
omdb_df = pd.DataFrame({"Release": movies, "Title": title, "Year": year_,
                        "Rated": rated, "Released": released, "Runtime": runtime, "Genre": genre,
                        "Director": director, "Actors": actors, "Country": country, "Awards": awards,
                        "Rotten Tomatoes": rotten_tom, "Metascore": metascore, "IMDB": imdb_rating,
                        "IMDB Votes": imdb_votes, "IMDB id": imdb_id})
omdb_df

Unnamed: 0,Release,Title,Year,Rated,Released,Runtime,Genre,Director,Actors,Country,Awards,Rotten Tomatoes,Metascore,IMDB,IMDB Votes,IMDB id
0,Harry Potter and the Deathly Hallows: Part 2,Harry Potter and the Deathly Hallows: Part 2,2011,PG-13,15 Jul 2011,130 min,"Adventure, Drama, Fantasy, Mystery",David Yates,"Ralph Fiennes, Michael Gambon, Alan Rickman, D...",UK,Nominated for 3 Oscars. Another 46 wins & 91 n...,96%,85,8.1,771332,tt1201607
1,Transformers: Dark of the Moon,Transformers: Dark of the Moon,2011,PG-13,29 Jun 2011,154 min,"Action, Adventure, Sci-Fi",Michael Bay,"Shia LaBeouf, Rosie Huntington-Whiteley, Josh ...",USA,Nominated for 3 Oscars. Another 10 wins & 39 n...,35%,42,6.2,386139,tt1399103
2,The Twilight Saga: Breaking Dawn - Part 1,The Twilight Saga: Breaking Dawn - Part 1,2011,PG-13,18 Nov 2011,117 min,"Adventure, Drama, Fantasy, Romance, Thriller",Bill Condon,"Taylor Lautner, Gil Birmingham, Billy Burke, S...",USA,11 wins & 22 nominations.,25%,45,4.9,225316,tt1324999
3,The Hangover Part II,The Hangover Part II,2011,R,26 May 2011,102 min,Comedy,Todd Phillips,"Bradley Cooper, Ed Helms, Zach Galifianakis, J...","USA, Thailand",5 wins & 18 nominations.,33%,44,6.5,466051,tt1411697
4,Pirates of the Caribbean: On Stranger Tides,Pirates of the Caribbean: On Stranger Tides,2011,PG-13,20 May 2011,136 min,"Action, Adventure, Fantasy",Rob Marshall,"Johnny Depp, Penélope Cruz, Geoffrey Rush, Ian...","USA, UK",3 wins & 31 nominations.,33%,45,6.6,484140,tt1298650
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,War Horse,War Horse,2011,PG-13,25 Dec 2011,146 min,"Action, Adventure, Drama, History, War",Steven Spielberg,"Jeremy Irvine, Peter Mullan, Emily Watson, Nie...","USA, India",Nominated for 6 Oscars. Another 15 wins & 71 n...,74%,72,7.2,147807,tt1568911
96,Larry Crowne,Larry Crowne,2011,PG-13,01 Jul 2011,98 min,"Comedy, Drama, Romance",Tom Hanks,"Tom Hanks, Sarah Mahoney, Roxana Ortega, Randa...","USA, France",3 nominations.,37%,41,6.1,63862,tt1583420
97,50/50,50/50,2011,R,30 Sep 2011,100 min,"Comedy, Drama, Romance",Jonathan Levine,"Joseph Gordon-Levitt, Seth Rogen, Anna Kendric...",USA,Nominated for 2 Golden Globes. Another 12 wins...,93%,72,7.6,316179,tt1306980
98,A Very Harold & Kumar Christmas,A Very Harold & Kumar Christmas,2011,R,04 Nov 2011,90 min,"Adventure, Comedy",Todd Strauss-Schulson,"Patton Oswalt, Isabella Gielniak, Kal Penn, Au...",USA,1 nomination.,68%,61,6.3,65983,tt1268799


In [11]:
# filter out the rows that are mostly NaN, using above intel
clean_omdb_df = omdb_df.loc[(omdb_df["Release"] != "The Lion King2011 3D Release")\
                            & (omdb_df["Release"] != "Footloose2011 Re-release")]
clean_omdb_df

Unnamed: 0,Release,Title,Year,Rated,Released,Runtime,Genre,Director,Actors,Country,Awards,Rotten Tomatoes,Metascore,IMDB,IMDB Votes,IMDB id
0,Harry Potter and the Deathly Hallows: Part 2,Harry Potter and the Deathly Hallows: Part 2,2011,PG-13,15 Jul 2011,130 min,"Adventure, Drama, Fantasy, Mystery",David Yates,"Ralph Fiennes, Michael Gambon, Alan Rickman, D...",UK,Nominated for 3 Oscars. Another 46 wins & 91 n...,96%,85,8.1,771332,tt1201607
1,Transformers: Dark of the Moon,Transformers: Dark of the Moon,2011,PG-13,29 Jun 2011,154 min,"Action, Adventure, Sci-Fi",Michael Bay,"Shia LaBeouf, Rosie Huntington-Whiteley, Josh ...",USA,Nominated for 3 Oscars. Another 10 wins & 39 n...,35%,42,6.2,386139,tt1399103
2,The Twilight Saga: Breaking Dawn - Part 1,The Twilight Saga: Breaking Dawn - Part 1,2011,PG-13,18 Nov 2011,117 min,"Adventure, Drama, Fantasy, Romance, Thriller",Bill Condon,"Taylor Lautner, Gil Birmingham, Billy Burke, S...",USA,11 wins & 22 nominations.,25%,45,4.9,225316,tt1324999
3,The Hangover Part II,The Hangover Part II,2011,R,26 May 2011,102 min,Comedy,Todd Phillips,"Bradley Cooper, Ed Helms, Zach Galifianakis, J...","USA, Thailand",5 wins & 18 nominations.,33%,44,6.5,466051,tt1411697
4,Pirates of the Caribbean: On Stranger Tides,Pirates of the Caribbean: On Stranger Tides,2011,PG-13,20 May 2011,136 min,"Action, Adventure, Fantasy",Rob Marshall,"Johnny Depp, Penélope Cruz, Geoffrey Rush, Ian...","USA, UK",3 wins & 31 nominations.,33%,45,6.6,484140,tt1298650
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,War Horse,War Horse,2011,PG-13,25 Dec 2011,146 min,"Action, Adventure, Drama, History, War",Steven Spielberg,"Jeremy Irvine, Peter Mullan, Emily Watson, Nie...","USA, India",Nominated for 6 Oscars. Another 15 wins & 71 n...,74%,72,7.2,147807,tt1568911
96,Larry Crowne,Larry Crowne,2011,PG-13,01 Jul 2011,98 min,"Comedy, Drama, Romance",Tom Hanks,"Tom Hanks, Sarah Mahoney, Roxana Ortega, Randa...","USA, France",3 nominations.,37%,41,6.1,63862,tt1583420
97,50/50,50/50,2011,R,30 Sep 2011,100 min,"Comedy, Drama, Romance",Jonathan Levine,"Joseph Gordon-Levitt, Seth Rogen, Anna Kendric...",USA,Nominated for 2 Golden Globes. Another 12 wins...,93%,72,7.6,316179,tt1306980
98,A Very Harold & Kumar Christmas,A Very Harold & Kumar Christmas,2011,R,04 Nov 2011,90 min,"Adventure, Comedy",Todd Strauss-Schulson,"Patton Oswalt, Isabella Gielniak, Kal Penn, Au...",USA,1 nomination.,68%,61,6.3,65983,tt1268799


In [12]:
# create reference column to identify the top 100 films list they originated from
clean_omdb_df["Top100 Year"] = f"{year}"

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  clean_omdb_df["Top100 Year"] = f"{year}"


In [13]:
#export as csv for use elsewhere
clean_omdb_df.to_csv(f"output_data/{d_today}_{year}_OMDb_Movie_details.csv", index=False, header=True)