In [1]:
# dependencies
import pandas as pd
import requests
import json
from datetime import date
from omdb_config import api_key

In [2]:
# export dataframe of results with today's date
today = date.today()
d_today = today.strftime("%Y_%m_%d")

In [3]:
# read in the data for top 100 movies
top_100df = pd.read_csv("output_data/Top100_Movies_2010-2021.csv")

In [4]:
# set the year of movies we are collecting data for
year = 2017

In [5]:
# filter DataFrame for the 100 movies in the specified year
top_100 = top_100df.loc[top_100df["Year"] == year]
# create a list of the top 100 movies to iterate through
movies = top_100["Release"].tolist()
print(movies)

['Star Wars: Episode VIII - The Last Jedi', 'Beauty and the Beast', 'Wonder Woman', 'Guardians of the Galaxy Vol. 2', 'Spider-Man: Homecoming', 'It', 'Thor: Ragnarok', 'Despicable Me 3', 'Logan', 'The Fate of the Furious', 'Justice League', 'Dunkirk', 'Coco', 'The Lego Batman Movie', 'Get Out', 'The Boss Baby', 'Pirates of the Caribbean: Dead Men Tell No Tales', 'Jumanji: Welcome to the Jungle', 'Kong: Skull Island', 'Hidden Figures', 'Cars 3', 'War for the Planet of the Apes', 'Split', 'Transformers: The Last Knight', 'Rogue One: A Star Wars Story', 'Wonder', 'La La Land', 'Sing', 'Girls Trip', 'Fifty Shades Darker', 'Baby Driver', 'Annabelle: Creation', "Daddy's Home 2", 'Murder on the Orient Express', 'Kingsman: The Golden Circle', 'John Wick: Chapter 2', 'Blade Runner 2049', 'The Emoji Movie', 'Power Rangers', 'The Mummy', "The Hitman's Bodyguard", 'Alien: Covenant', 'Captain Underpants: The First Epic Movie', 'A Bad Moms Christmas', "A Dog's Purpose", 'Pitch Perfect 3', 'The Lego 

In [6]:
# create variables to store url
url = "http://www.omdbapi.com/?t="
api_key = "&apikey=" + api_key

In [7]:
# for loop that iterates over the movie list, performing a request for each movie...
# ...and appending the data into empty lists for a DataFrame
title = []
year_ = []
rated = []
released = []
runtime = []
genre = []
director = []
actors = []
country = []
awards = []
rotten_tom = []
metascore = []
imdb_rating = []
imdb_votes = []
imdb_id = []

for movie in movies:
    try:
        response = requests.get(url + f"{movie}" + api_key)
        data = response.json()
    except:
        print(f"For some reason, OMDdb didn't have: {movie}.")
        
    try:
        title.append(data["Title"])
    except:
        title.append("NaN")
        print(f"No Title: {movie}.")
    try:
        year_.append(data["Year"])
    except:
        year_.append("NaN")
        print(f"No Year: {movie}.")
    try:
        rated.append(data["Rated"])
    except:
        rated.append("NaN")
        print(f"No Rated: {movie}.")
    try:
        released.append(data["Released"])
    except:
        released.append("NaN")
        print(f"No Released: {movie}.")
    try:
        runtime.append(data["Runtime"])
    except:
        runtime.append("NaN")
        print(f"No Runtime: {movie}.")
    try:
        genre.append(data["Genre"])
    except:
        genre.append("NaN")
        print(f"No Genre: {movie}.")
    try:
        director.append(data["Director"])
    except:
        director.append("NaN")
        print(f"No Director: {movie}.")
    try:
        actors.append(data["Actors"])
    except:
        actors.append("NaN")
        print(f"No Actors: {movie}.")
    try:
        country.append(data["Country"])
    except:
        country.append("NaN")
        print(f"No Country: {movie}.")
    try:
        awards.append(data["Awards"])
    except:
        awards.append("NaN")
        print(f"No Awards: {movie}.")
    try:
        rotten_tom.append(data["Ratings"][1]["Value"])
    except:
        rotten_tom.append("NaN")
        print(f"No Rotten Tomatoes: {movie}.")
    try:
        metascore.append(data["Metascore"])
    except:
        metascore.append("NaN")
        print(f"No Metascore: {movie}.")
    try:
        imdb_rating.append(data["imdbRating"])
    except:
        imdb_rating.append("NaN")
        print(f"No imdbRating: {movie}.")
    try:
        imdb_votes.append(data["imdbVotes"])
    except:
        imdb_votes.append("NaN")
        print(f"No imdbVotes: {movie}.")
    try:
        imdb_id.append(data["imdbID"])
    except:
        imdb_id.append("NaN")
        print(f"No imdbID: {movie}.")

In [8]:
print(len(title))

100


In [9]:
# create list of missing movies
#missing = ["Knight and Day2010 Re-release", "Percy Jackson & the Olympians: The Lightning Thief"]

# use list comprehension to remove missing movies
#movies = [movie for movie in movies if movie not in missing]

In [10]:
# creat DataFrame with the data collected in lists
omdb_df = pd.DataFrame({"Release": movies, "Title": title, "Year": year_,
                        "Rated": rated, "Released": released, "Runtime": runtime, "Genre": genre,
                        "Director": director, "Actors": actors, "Country": country, "Awards": awards,
                        "Rotten Tomatoes": rotten_tom, "Metascore": metascore, "IMDB": imdb_rating,
                        "IMDB Votes": imdb_votes, "IMDB id": imdb_id})
omdb_df

Unnamed: 0,Release,Title,Year,Rated,Released,Runtime,Genre,Director,Actors,Country,Awards,Rotten Tomatoes,Metascore,IMDB,IMDB Votes,IMDB id
0,Star Wars: Episode VIII - The Last Jedi,Star Wars: Episode VIII - The Last Jedi,2017,PG-13,15 Dec 2017,152 min,"Action, Adventure, Fantasy, Sci-Fi",Rian Johnson,"Mark Hamill, Carrie Fisher, Adam Driver, Daisy...",USA,Nominated for 4 Oscars. Another 26 wins & 98 n...,90%,84,7.0,570478,tt2527336
1,Beauty and the Beast,Beauty and the Beast,1991,G,22 Nov 1991,84 min,"Animation, Family, Fantasy, Musical, Romance","Gary Trousdale, Kirk Wise","Robby Benson, Jesse Corti, Rex Everhart, Angel...",USA,Won 2 Oscars. Another 26 wins & 32 nominations.,94%,95,8.0,418912,tt0101414
2,Wonder Woman,Wonder Woman,2017,PG-13,02 Jun 2017,141 min,"Action, Adventure, Fantasy, Sci-Fi, War",Patty Jenkins,"Gal Gadot, Chris Pine, Connie Nielsen, Robin W...","USA, UK",25 wins & 71 nominations.,93%,76,7.4,575048,tt0451279
3,Guardians of the Galaxy Vol. 2,Guardians of the Galaxy Vol. 2,2017,PG-13,05 May 2017,136 min,"Action, Adventure, Comedy, Sci-Fi",James Gunn,"Chris Pratt, Zoe Saldana, Dave Bautista, Vin D...",USA,Nominated for 1 Oscar. Another 15 wins & 57 no...,85%,67,7.6,578093,tt3896198
4,Spider-Man: Homecoming,Spider-Man: Homecoming,2017,PG-13,07 Jul 2017,133 min,"Action, Adventure, Sci-Fi",Jon Watts,"Tom Holland, Michael Keaton, Robert Downey Jr....",USA,7 wins & 10 nominations.,92%,73,7.4,520353,tt2250912
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,"Three Billboards Outside Ebbing, Missouri","Three Billboards Outside Ebbing, Missouri",2017,R,01 Dec 2017,115 min,"Comedy, Crime, Drama",Martin McDonagh,"Frances McDormand, Caleb Landry Jones, Kerry C...","UK, USA",Won 2 Oscars. Another 128 wins & 228 nominations.,90%,88,8.1,440890,tt5027774
96,The Bye Bye Man,The Bye Bye Man,2017,PG-13,13 Jan 2017,96 min,"Drama, Fantasy, Horror, Mystery, Thriller",Stacy Title,"Douglas Smith, Lucien Laviscount, Cressida Bon...","USA, China",,19%,37,4.2,22037,tt4030600
97,Victoria & Abdul,Victoria,2015,Not Rated,11 Jun 2015,138 min,"Crime, Drama, Romance, Thriller",Sebastian Schipper,"Laia Costa, Frederick Lau, Franz Rogowski, Bur...",Germany,17 wins & 21 nominations.,82%,77,7.6,53593,tt4226388
98,Rough Night,Rough Night,2017,R,16 Jun 2017,101 min,"Comedy, Crime, Thriller",Lucia Aniello,"Scarlett Johansson, Jillian Bell, Zoë Kravitz,...",USA,1 win & 1 nomination.,44%,51,5.2,47632,tt4799050


In [None]:
                            & (omdb_df["Release"] != "Beauty and the Beast2012 3D Release")\
                            & (omdb_df["Release"] != "Star Wars: Episode I - The Phantom Menace2012 3D Release")\
                            & (omdb_df["Release"] != "Finding Nemo2012 3D Release")

In [None]:
# filter out the rows that are mostly NaN, using above intel
clean_omdb_df = omdb_df.loc[(omdb_df["Release"] != "Jurassic Park2013 Re-release")]
clean_omdb_df

In [None]:
clean_omdb_df = omdb_df.drop(98)
clean_omdb_df

In [11]:
# create reference column to identify the top 100 films list they originated from
omdb_df["Top100 Year"] = f"{year}"

In [12]:
#export as csv for use elsewhere
omdb_df.to_csv(f"output_data/raw/movie info/{d_today}_{year}_OMDb_Movie_details.csv", index=False, header=True)