In [20]:
# Imports
import numpy
import pandas
import requests
import time

In [21]:
# Parameters
api_key = 'a1cce831'
api_url = 'http://www.omdbapi.com'

### Read in data

In [22]:
# Need latin-1, otherwise: 'utf-8' codec can't decode byte 0xd0 in position 22: invalid continuation byte
data = pandas.read_csv('movies.csv', encoding='latin-1')

In [23]:
already_done = pandas.read_csv('api_data_raw.csv')
i = data.query("title == '" + already_done.iloc[-1, :]['title'] + "'")
i = i.index.values[0]
data = data.iloc[i+1:, :]

In [24]:
data.head()

Unnamed: 0,title,release_year,director
46,Small Soldiers,1998,
47,Fight Club,1999,
48,October Sky,1999,
49,Star Wars: Episode I Ð The Phantom Menace,1999,
50,The Matrix,1999,


### Call API

In [25]:
error_titles = []
api_data = pandas.DataFrame()

In [26]:
for i in range(data.shape[0]):
    title        = data.iloc[i]['title']
    release_year = data.iloc[i]['release_year']
    params = {'apiKey': api_key, 't': title, 'y': release_year}   # get by title & year
    r = requests.get(url=api_url, params=params)
    if r.status_code == 524:
        time.sleep(60)
        r = requests.get(url=api_url, params=params)
    response = r.json()
    print([title, release_year], end = '')
    
    if response['Response'] == 'False':
        print(' | Error')
        error_titles.append(title)
    else:
        row = {}
        row['title']       = response['Title']
        row['type']        = response['Type']
        row['actors']      = response['Actors']
        row['box_office']  = response.get('BoxOffice')
        row['director']    = response['Director']
        row['genres']      = response['Genre']
        row['plot']        = response['Plot']
        row['production']  = response['Production']
        row['rated']       = response['Rated']
        ratings = {}
        for rating in response['Ratings']:
            ratings[rating['Source']] = rating['Value']
        row['rating_imdb'] = ratings.get('Internet Movie Database')
        row['rating_rt']   = ratings.get('Rotten Tomatoes')
        row['rating_mc']   = ratings.get('Metacritic')
        row['released']    = response['Released']
        row['runtime']     = response['Runtime']
        row['writers']     = response['Writer']        
        row['year']        = response['Year']
        row['imdb_votes']  = response['imdbVotes']
        row['imdb_id']     = response['imdbID']
        row = pandas.Series(row)
        api_data = api_data.append(row, ignore_index=True)
        print()
    time.sleep(2)

['Small Soldiers', 1998]
['Fight Club', 1999]
['October Sky', 1999]
['Star Wars: Episode I Ð The Phantom Menace', 1999] | Error
['The Matrix', 1999]
['The World Is Not Enough', 1999]
['Toy Story 2', 1999]
['American Psycho', 2000]
['Cast Away', 2000]
['Gladiator', 2000]
['Mission: Impossible II', 2000]
['O Brother, Where Art Thou?', 2000]
['Remember the Titans', 2000]
['Shanghai Noon', 2000]
['X-Men', 2000]
['A Beautiful Mind', 2001]
['Ali', 2001]
['Donnie Darko', 2001]
["Harry Potter and the Sorcerer's Stone", 2001]
['Jurassic Park III', 2001]
['Legally Blonde', 2001]
["Ocean's Eleven", 2001]
['Planet of the Apes', 2001]
['Shrek', 2001]
['Spirited Away', 2001]
['Spy Kids', 2001]
['The Fast and the Furious', 2001]
['Training Day', 2001]
['Zoolander', 2001]
['Blade II', 2002]
['Day Another Day', 2002] | Error
['Harry Potter and the Chamber of Secrets', 2002]
['Ice Age', 2002]
['Men in Black II', 2002]
['Minority Report', 2002]
['Scooby-Doo', 2002]
['Signs', 2002]
['Spider-Man', 2002]
['

['Mission: Impossible Ð Rogue Nation', 2015] | Error
['Scent of a Woman', 1992]
['Indiana Jones and Raiders of the Lost Ark', 1981] | Error
['The Martian', 2015]
['Back to the Future', 1985]
['Billy Madison', 1995]
['Indiana Jones and the Temple of Doom', 1984]
['Indiana Jones and the Last Crusade', 1989]
['Back to the Future Part II', 1989]
['Ant-Man', 2015]
['Rain Man', 1988]
['Kill Bill: Volume 1', 2003] | Error
['Kill Bill: Volume 2', 2004]
['Zero Dark Thirty', 2012]
['The Fugitive', 1993]
['Spectre', 2015]
['Star Wars: The Force Awakens', 2015]
['Creed', 2015]
['Fargo', 1996]
['The Notebook', 2004]
['500 Days of Summer', 2009]
['Deadpool', 2016]
['Whiplash', 2014]
['Pi', 1998]
['Groundhog Day', 1993]
['Dallas Buyers Club', 2013]
['Unbreakable', 2000]
['Say AnythingÉ', 1989] | Error
['Ghostbusters', 1984]
['No Strings Attached', 2011]
['Batman v Superman: Dawn of Justice', 2016]
['Ratatouille', 2007]
['Moon', 2009]
['The Raid: Redemption', 2011]
['Dope', 2015]
['Captain America: Ci

### Write

In [27]:
# api_data.to_csv('api_data_raw.csv', header=True, index=False)
api_data.to_csv('api_data_raw.csv', header=False, index=False, mode='a')

In [28]:
error_titles = pandas.Series(error_titles)
error_titles = pandas.DataFrame(error_titles)

In [29]:
# error_titles.to_csv('error_titles.csv', header=True, index=False)
error_titles.to_csv('error_titles.csv', header=False, index=False, mode='a')

### Later

In [None]:
def convert_to_datetime(x):
    date = None
    try: date = pandas.to_datetime(x)
    except: pass
    return date
data['date_added']   = data['date_added'].apply(lambda x: convert_to_datetime(x))
data['date_watched'] = data['date_watched'].apply(lambda x: convert_to_datetime(x))