In [None]:
# Importing Dependencies
import pandas as pd
import pymongo
import numpy as np
from config import api_key
from pprint import pprint
import requests
import json

In [None]:
###################################
### PREPARING MONGODB FRAMEWORK ###
###################################

### SETTING UP CONNECTION TO MONGO ###
conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)

### CREATING MOVIES DATABASE called 'award_movies_db' ###
db = client.award_movies_db

db.oscars_coll.drop()
db.omdb_coll.drop()
db.golden_globe_coll.drop()

### CREATING COLLECTIONS ###
oscars_coll = db["oscars_coll"]
omdb_coll = db["omdb_coll"]
golden_globe_coll = db["golden_globe_coll"]

In [None]:
### BRINGING IN OSCAR FILE USING PANDAS, INITIALIZING DATAFRAME ###
oscars_file = "resources/the_oscar_award.csv"
oscars_df = pd.read_csv(oscars_file)

### dropping null values ###
oscars_df = oscars_df.dropna()

### locating only director nominations ###
oscars_df = oscars_df.loc[oscars_df['category'].str.startswith('DIRECTING'), :]

### renaming columns to correspond with the golden globes df ###
oscars_df = oscars_df.rename(columns = {"year_film" : "year",
                                        "year_ceremony" : "oscar_year",
                                        "name" : "nominee"
                                        })

### dropping unwanted columns ###
oscars_df = oscars_df.drop(columns = ['ceremony'])

### inserting many documents into database from the dataframe ###
oscars_coll.insert_many(oscars_df.to_dict('records'))

In [None]:
### BRINGING IN GOLDEN GLOBE FILE USING PANDAS, INITIALIZING DATAFRAME ###
gg_file = "resources/golden_globe_awards.csv"
golden_df = pd.read_csv(gg_file)

### dropping null values ###
golden_globe = golden_df.dropna()

### locating only best director nominations ###
golden_globe = golden_globe.loc[golden_globe["category"]=="Best Director - Motion Picture", :]

### dropping unwanted columns ###
golden_globe = golden_globe.drop(columns = ['ceremony'])

### renaming columns to correspond with the oscars df ###
golden_globe = golden_globe.rename(columns={"year_film": "year",
                            "year_award": "golden_globe_year"})

### inserting many documents into database from the dataframe ###
golden_globe_coll.insert_many(golden_globe.to_dict('records'))

In [None]:
### CREATING LIST OF FILMS FOR API CALL ###

### creating a list of film titles from golde globe df ###
titles = list(golden_globe['film'].unique())

# FINDING ALL UNIQUE FILM TITLES AND PLACING IN LIST CALLED 'titles'
oscars_titles = list(oscars_df['film'].unique())

# getting unique list of film titles between both award cermenoy dataframes
for title in oscars_titles:
    if title not in titles:
        titles.append(title)

In [None]:
### SETTING UP BASE URL TO API ###
base_url = f"http://www.omdbapi.com/?apikey={api_key}&"

### initializing documents list for db ###
documents = []

### FOR LOOP TO FIND MOVIE STATS FROM TITLES IN 'oscars_df' ###
for title in titles:
    
    ### DEFINING QUERY URL AND API RESPONSE ###
    query_url = base_url + f"t={title}"
    response = requests.get(query_url).json()
    
    # getting desired variables from api response
    try: 
        
        doc = { 'title' : response['Title'],
                'year' : response['Year'],
                'rating' : response['Rated'],
                'box_office' : response['BoxOffice'],
                'production_company' : response['Production'],
                'director' : response['Director']
               }
        
        documents.append(doc)

    except:
        KeyError: "value does not exist"

In [None]:
### loading movie documents from api into mongodb collection###
omdb_coll.insert_many(documents)