In [91]:
# File for all valid movie ids with titles
# http://files.tmdb.org/p/exports/movie_ids_MM_DD_YYYY.json.gz
import numpy as np

In [92]:
import pandas as pd

In [93]:
import requests
import time

In [94]:
movie_titles = ['Avatar', 'Avengers: Endgame', 'Pulp Fiction', 'Superbad', 'Avengers: Age of Ultron', 'The Avengers', 'Joker', 'The Dark Knight', 'Scarface', 'The Dark Knight Rises', 'Iron Man', 'Iron Man 2', 'Thor', 'Get Out', 'It', 'Spider-Man Homecoming', 'Cars', 'Toy Story']

In [95]:
api_key = '04cf8c70677690f36ab3e4b1b0464548'

In [96]:
def get_movie(title):
    resp = requests.get('https://api.themoviedb.org/3/search/movie?api_key=' + api_key + '&language=en-US&query=' + title + '&page=1&include_adult=false&region=US') #get movies by title from search
    result = resp.json()['results'][0]
    
    movie = {
        'title': result['title'],
        'tmdb_id': result['id']
    }
    return movie

In [97]:
movies = []
for title in movie_titles:
    movies.append(get_movie(title))

In [98]:
movies

[{'title': 'Avatar', 'tmdb_id': 19995},
 {'title': 'Avengers: Endgame', 'tmdb_id': 299534},
 {'title': 'Pulp Fiction', 'tmdb_id': 680},
 {'title': 'Superbad', 'tmdb_id': 8363},
 {'title': 'Avengers: Age of Ultron', 'tmdb_id': 99861},
 {'title': 'The Avengers', 'tmdb_id': 24428},
 {'title': 'Joker', 'tmdb_id': 475557},
 {'title': 'The Dark Knight', 'tmdb_id': 155},
 {'title': 'Scarface', 'tmdb_id': 111},
 {'title': 'The Dark Knight Rises', 'tmdb_id': 49026},
 {'title': 'Iron Man', 'tmdb_id': 1726},
 {'title': 'Iron Man 2', 'tmdb_id': 10138},
 {'title': 'Thor', 'tmdb_id': 10195},
 {'title': 'Get Out', 'tmdb_id': 419430},
 {'title': 'It Chapter Two', 'tmdb_id': 474350},
 {'title': 'Spider-Man: Homecoming', 'tmdb_id': 315635},
 {'title': 'Cars', 'tmdb_id': 920},
 {'title': 'Toy Story 4', 'tmdb_id': 301528}]

In [99]:
def get_movie_info(id):
    resp = requests.get('https://api.themoviedb.org/3/movie/' + str(id) + '?api_key=' + api_key + '&language=en-US') #get movie by id
    result = resp.json()
    resp = requests.get('https://api.themoviedb.org/3/movie/' + str(id) + '/credits?api_key=' + api_key + '&language=en-US')
    credits = resp.json()
    resp = requests.get('https://api.themoviedb.org/3/movie/' + str(id) + '/keywords?api_key=' + api_key)
    keywords = resp.json()
    resp = requests.get('https://api.themoviedb.org/3/movie/' + str(id) + '/release_dates?api_key=' + api_key)
    release_dates = resp.json()
    rating = next(certification for certification in release_dates['results'] if certification['iso_3166_1'] == 'US')
    release = next(certification for certification in rating['release_dates'] if certification['certification'] != '')
    info = {
        'budget': result['budget'],
        'in_collection': 1 if result['belongs_to_collection'] else 0,
        'genres': [genre['name'] for genre in result['genres']],
        'imdb_id': result['imdb_id'],
        'overview': result['overview'],
        'production_companies': [producer['name'] for producer in result['production_companies']],
        'release_date': result['release_date'],
        'total_revenue': result['revenue'],
        'runtime': result['runtime'],
        'cast': [credit['name'] for credit in credits['cast']],
        'crew': [{credit['job']: credit['name']} for credit in credits['crew']],
        'keywords': [word['name'] for word in keywords['keywords']],
        'rating': release['certification']
    }
    time.sleep(0.1)
    return info

In [100]:
for movie in movies:
    movie.update(get_movie_info(movie['tmdb_id']))

In [101]:
movies

[{'title': 'Avatar',
  'tmdb_id': 19995,
  'budget': 237000000,
  'in_collection': 1,
  'genres': ['Action', 'Adventure', 'Fantasy', 'Science Fiction'],
  'imdb_id': 'tt0499549',
  'overview': 'In the 22nd century, a paraplegic Marine is dispatched to the moon Pandora on a unique mission, but becomes torn between following orders and protecting an alien civilization.',
  'production_companies': ['Dune Entertainment',
   'Lightstorm Entertainment',
   '20th Century Fox',
   'Ingenious Media'],
  'release_date': '2009-12-10',
  'total_revenue': 2787965087,
  'runtime': 162,
  'cast': ['Sam Worthington',
   'Zoe Saldana',
   'Stephen Lang',
   'Sigourney Weaver',
   'Joel David Moore',
   'Giovanni Ribisi',
   'Michelle Rodriguez',
   'Dileep Rao',
   'Laz Alonso',
   'Wes Studi',
   'CCH Pounder',
   'Matt Gerald',
   'Sean Anthony Moran',
   'Jason Whyte',
   'Scott Lawrence',
   'Kelly Kilgour',
   'James Patrick Pitt',
   'Sean Patrick Murphy',
   'Peter Dillon',
   'Kevin Dorman',
  

In [102]:
df = pd.DataFrame(movies)

In [103]:
df

Unnamed: 0,title,tmdb_id,budget,in_collection,genres,imdb_id,overview,production_companies,release_date,total_revenue,runtime,cast,crew,keywords,rating
0,Avatar,19995,237000000,1,"[Action, Adventure, Fantasy, Science Fiction]",tt0499549,"In the 22nd century, a paraplegic Marine is di...","[Dune Entertainment, Lightstorm Entertainment,...",2009-12-10,2787965087,162,"[Sam Worthington, Zoe Saldana, Stephen Lang, S...","[{'Director': 'James Cameron'}, {'Writer': 'Ja...","[culture clash, future, space war, space colon...",PG-13
1,Avengers: Endgame,299534,356000000,1,"[Adventure, Science Fiction, Action]",tt4154796,After the devastating events of Avengers: Infi...,[Marvel Studios],2019-04-24,2796274401,181,"[Robert Downey Jr., Chris Evans, Mark Ruffalo,...",[{'Original Music Composer': 'Alan Silvestri'}...,"[space travel, time travel, time machine, sequ...",PG-13
2,Pulp Fiction,680,8000000,0,"[Thriller, Crime]",tt0110912,"A burger-loving hit man, his philosophical par...","[Miramax, A Band Apart, Jersey Films]",1994-09-10,213928762,154,"[John Travolta, Samuel L. Jackson, Uma Thurman...","[{'Screenplay': 'Quentin Tarantino'}, {'Produc...","[transporter, drug dealer, boxer, massage, sto...",R
3,Superbad,8363,20000000,0,[Comedy],tt0829482,Two co-dependent high school seniors are force...,"[Columbia Pictures, Apatow Productions]",2007-08-17,121463226,113,"[Jonah Hill, Michael Cera, Christopher Mintz-P...","[{'Director': 'Greg Mottola'}, {'Producer': 'J...","[alcohol, chaos, nerd, police, high school, yo...",R
4,Avengers: Age of Ultron,99861,280000000,1,"[Action, Adventure, Science Fiction]",tt2395427,When Tony Stark tries to jumpstart a dormant p...,[Marvel Studios],2015-04-22,1405403694,141,"[Robert Downey Jr., Chris Evans, Chris Hemswor...","[{'Director': 'Joss Whedon'}, {'Writer': 'Joss...","[seoul, sequel, superhero, based on comic, goo...",PG-13
5,The Avengers,24428,220000000,1,"[Science Fiction, Action, Adventure]",tt0848228,When an unexpected enemy emerges and threatens...,[Marvel Studios],2012-04-25,1519557910,143,"[Robert Downey Jr., Chris Evans, Chris Hemswor...","[{'Director': 'Joss Whedon'}, {'Screenplay': '...","[new york, shield, superhero, based on comic, ...",PG-13
6,Joker,475557,55000000,0,"[Crime, Thriller, Drama]",tt7286456,"During the 1980s, a failed stand-up comedian i...","[DC Entertainment, Warner Bros. Pictures, DC C...",2019-10-02,234000000,122,"[Joaquin Phoenix, Robert De Niro, Zazie Beetz,...","[{'Producer': 'Todd Phillips'}, {'Producer': '...","[street gang, dc comics, clown, villain, joker...",R
7,The Dark Knight,155,185000000,1,"[Drama, Action, Crime, Thriller]",tt0468569,Batman raises the stakes in his war on crime. ...,"[DC Comics, Legendary Entertainment, Syncopy, ...",2008-07-16,1004558444,152,"[Christian Bale, Michael Caine, Heath Ledger, ...","[{'Screenplay': 'Jonathan Nolan'}, {'Original ...","[dc comics, crime fighter, secret identity, sc...",PG-13
8,Scarface,111,25000000,0,"[Action, Crime, Drama, Thriller]",tt0086250,After getting a green card in exchange for ass...,[Universal Pictures],1983-12-08,65884703,170,"[Al Pacino, Steven Bauer, Michelle Pfeiffer, M...","[{'Director': 'Brian De Palma'}, {'Producer': ...","[sibling relationship, miami, corruption, loss...",R
9,The Dark Knight Rises,49026,250000000,1,"[Action, Crime, Drama, Thriller]",tt1345836,Following the death of District Attorney Harve...,"[Syncopy, Legendary Entertainment, DC Entertai...",2012-07-16,1084939099,165,"[Christian Bale, Michael Caine, Gary Oldman, A...","[{'Director': 'Christopher Nolan'}, {'Producer...","[dc comics, crime fighter, terrorist, secret i...",PG-13


In [104]:
df.to_csv('test.csv')