# 7. What is an API - The Movie Database API

## web APIs are the most common way to get data from web application without knowing database details

## url structure for movie module:
## "https://api.themoviedb.org/3/movie/insert_movie_id?api_key=insert_api_key" (replace "insert_movie_id" with movie id and "insert_api_key" with your personal api-key)
##url structure for discover module:
##"https://api.themoviedb.org/3/discover/movie?api_key=insert_api_key&query1&query2..." (replace "insert_api_key" with your personal api-key and add appropriate queries) (query string)

# 8. Working with APIs and JSON (Part 1)

In [38]:
import pandas as pd
import requests
import json
pd.options.display.max_columns = 10
api_key = "api_key=93d535406a3ad38890f9d5de388e32a1" #free to use :))

In [3]:
movie_id = 140607

In [4]:
movie_api = "https://api.themoviedb.org/3/movie/{}?" #bracket form movie_id
movie_api

'https://api.themoviedb.org/3/movie/{}?'

In [5]:
url = movie_api.format(movie_id) + api_key
url

'https://api.themoviedb.org/3/movie/140607?api_key=93d535406a3ad38890f9d5de388e32a1'

In [6]:
r = requests.get(url)
r #ok

<Response [200]>

In [7]:
data = r.json()

In [8]:
data

{'adult': False,
 'backdrop_path': '/k6EOrckWFuz7I4z4wiRwz8zsj4H.jpg',
 'belongs_to_collection': {'id': 10,
  'name': 'Star Wars Collection',
  'poster_path': '/iTQHKziZy9pAAY4hHEDCGPaOvFC.jpg',
  'backdrop_path': '/d8duYyyC9J5T825Hg7grmaabfxQ.jpg'},
 'budget': 245000000,
 'genres': [{'id': 28, 'name': 'Action'},
  {'id': 12, 'name': 'Adventure'},
  {'id': 878, 'name': 'Science Fiction'},
  {'id': 14, 'name': 'Fantasy'}],
 'homepage': 'http://www.starwars.com/films/star-wars-episode-vii',
 'id': 140607,
 'imdb_id': 'tt2488496',
 'original_language': 'en',
 'original_title': 'Star Wars: The Force Awakens',
 'overview': 'Thirty years after defeating the Galactic Empire, Han Solo and his allies face a new threat from the evil Kylo Ren and his army of Stormtroopers.',
 'popularity': 50.389,
 'poster_path': '/wqnLdwVXoBjKibFRR5U3y0aDUhs.jpg',
 'production_companies': [{'id': 1,
   'logo_path': '/o86DbpburjxrqAzEDhXZcyE8pDb.png',
   'name': 'Lucasfilm Ltd.',
   'origin_country': 'US'},
  {'i

In [9]:
type(data)

dict

In [10]:
#pd.DataFrame(data) #fail only one column data

In [11]:
pd.Series(data)

adult                                                                False
backdrop_path                             /k6EOrckWFuz7I4z4wiRwz8zsj4H.jpg
belongs_to_collection    {'id': 10, 'name': 'Star Wars Collection', 'po...
budget                                                           245000000
genres                   [{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...
homepage                 http://www.starwars.com/films/star-wars-episod...
id                                                                  140607
imdb_id                                                          tt2488496
original_language                                                       en
original_title                                Star Wars: The Force Awakens
overview                 Thirty years after defeating the Galactic Empi...
popularity                                                          50.389
poster_path                               /wqnLdwVXoBjKibFRR5U3y0aDUhs.jpg
production_companies     

In [12]:
df = pd.Series(data).to_frame().T #T = transpose
df

Unnamed: 0,adult,backdrop_path,belongs_to_collection,budget,genres,...,tagline,title,video,vote_average,vote_count
0,False,/k6EOrckWFuz7I4z4wiRwz8zsj4H.jpg,"{'id': 10, 'name': 'Star Wars Collection', 'po...",245000000,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...",...,Every generation has a story.,Star Wars: The Force Awakens,False,7.4,15697


In [13]:
pd.json_normalize(data, sep = "_") #normalize  some columns

Unnamed: 0,adult,backdrop_path,budget,genres,homepage,...,vote_count,belongs_to_collection_id,belongs_to_collection_name,belongs_to_collection_poster_path,belongs_to_collection_backdrop_path
0,False,/k6EOrckWFuz7I4z4wiRwz8zsj4H.jpg,245000000,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...",http://www.starwars.com/films/star-wars-episod...,...,15697,10,Star Wars Collection,/iTQHKziZy9pAAY4hHEDCGPaOvFC.jpg,/d8duYyyC9J5T825Hg7grmaabfxQ.jpg


In [14]:
pd.json_normalize(data = data, record_path = "genres", meta = "title")

Unnamed: 0,id,name,title
0,28,Action,Star Wars: The Force Awakens
1,12,Adventure,Star Wars: The Force Awakens
2,878,Science Fiction,Star Wars: The Force Awakens
3,14,Fantasy,Star Wars: The Force Awakens


In [15]:
pd.json_normalize(data = data, record_path = "production_companies", meta = "title")

Unnamed: 0,id,logo_path,name,origin_country,title
0,1,/o86DbpburjxrqAzEDhXZcyE8pDb.png,Lucasfilm Ltd.,US,Star Wars: The Force Awakens
1,11461,/p9FoEt5shEKRWRKVIlvFaEmRnun.png,Bad Robot,US,Star Wars: The Force Awakens


# 9. How to work with your own API-KEY

# 10. Working with APIs and JSON (Part 2)

In [16]:
discover_api = "https://api.themoviedb.org/3/discover/movie?"

In [17]:
query = "&primary_release_date.gte=2020-01-01&primary_release_date.lte=2020-02-29" #can pass page parameter (&page=2)

In [18]:
url = discover_api+api_key+query

In [19]:
data = requests.get(url).json() #get and convert result to json data

In [20]:
data

{'page': 1,
 'results': [{'adult': False,
   'backdrop_path': '/jiqD14fg7UTZOT6qgvzTmfRYpWI.jpg',
   'genre_ids': [28, 80],
   'id': 495764,
   'original_language': 'en',
   'original_title': 'Birds of Prey (and the Fantabulous Emancipation of One Harley Quinn)',
   'overview': 'Harley Quinn joins forces with a singer, an assassin and a police detective to help a young girl who had a hit placed on her after she stole a rare diamond from a crime lord.',
   'popularity': 488.493,
   'poster_path': '/h4VB6m0RwcicVEZvzftYZyKXs6K.jpg',
   'release_date': '2020-02-05',
   'title': 'Birds of Prey (and the Fantabulous Emancipation of One Harley Quinn)',
   'video': False,
   'vote_average': 7.1,
   'vote_count': 7353},
  {'adult': False,
   'backdrop_path': '/3N316jUSdhvPyYTW29G4v9ebbcS.jpg',
   'genre_ids': [53, 28, 80],
   'id': 38700,
   'original_language': 'en',
   'original_title': 'Bad Boys for Life',
   'overview': 'Marcus and Mike are forced to confront new threats, career changes, an

In [21]:
pd.DataFrame(data)

Unnamed: 0,page,results,total_pages,total_results
0,1,"{'adult': False, 'backdrop_path': '/jiqD14fg7U...",194,3876
1,1,"{'adult': False, 'backdrop_path': '/3N316jUSdh...",194,3876
2,1,"{'adult': False, 'backdrop_path': '/1umKVgbjFG...",194,3876
3,1,"{'adult': False, 'backdrop_path': '/6mKAKhj8PO...",194,3876
4,1,"{'adult': False, 'backdrop_path': '/5VKquU8PNu...",194,3876
5,1,"{'adult': False, 'backdrop_path': '/gGwA6YErMj...",194,3876
6,1,"{'adult': False, 'backdrop_path': '/dT05ycGuf4...",194,3876
7,1,"{'adult': False, 'backdrop_path': '/ww7eC3BqSb...",194,3876
8,1,"{'adult': False, 'backdrop_path': '/4br4B8C0SR...",194,3876
9,1,"{'adult': False, 'backdrop_path': '/lsgYcIbcoQ...",194,3876


In [22]:
pd.DataFrame(data["results"])

Unnamed: 0,adult,backdrop_path,genre_ids,id,original_language,...,release_date,title,video,vote_average,vote_count
0,False,/jiqD14fg7UTZOT6qgvzTmfRYpWI.jpg,"[28, 80]",495764,en,...,2020-02-05,Birds of Prey (and the Fantabulous Emancipatio...,False,7.1,7353
1,False,/3N316jUSdhvPyYTW29G4v9ebbcS.jpg,"[53, 28, 80]",38700,en,...,2020-01-15,Bad Boys for Life,False,7.2,6180
2,False,/1umKVgbjFG5Cho5ZKTpcvRFJjuJ.jpg,"[35, 53, 80]",609242,es,...,2020-01-16,The Heist of the Century,False,8.0,481
3,False,/6mKAKhj8POVGqV1GsroS5mGIUe9.jpg,"[14, 28, 12]",666750,en,...,2020-02-04,Dragonheart: Vengeance,False,6.9,213
4,False,/5VKquU8PNujrxLmsYGHf2TCRNFQ.jpg,"[878, 28, 12, 9648, 36, 14]",582306,en,...,2020-01-24,Assassin 33 A.D.,False,5.2,58
5,False,/gGwA6YErMjiROavfGyxdciQnlTA.jpg,"[18, 53]",596247,es,...,2020-01-23,Pacto de Fuga,False,7.8,55
6,False,/dT05ycGuf4h1uYYAJttxTFKkfBQ.jpg,"[10752, 18]",662334,es,...,2020-01-28,Chaco,False,7.9,40
7,False,/ww7eC3BqSbFsyE5H5qMde8WkxJ2.jpg,"[28, 27, 878, 53]",443791,en,...,2020-01-08,Underwater,False,6.3,2017
8,False,/4br4B8C0SRIYcKHUgoaOlGo50MU.jpg,[27],575088,ru,...,2020-02-27,Baba Yaga: Terror of the Dark Forest,False,6.2,119
9,False,/lsgYcIbcoQeDZXsHYMOnkvk3sn0.jpg,"[18, 53]",505225,en,...,2020-02-14,The Last Thing He Wanted,False,5.0,324


# 11. Importing and Storing the Movies Dataset (Best Practice)

In [23]:
movie_id = [0,299534, 19995,140607, 299536, 597, 1353977,
           420818, 24428, 168259, 99861, 284054, 12445,
           181808, 330457, 351286, 109445, 321612, 260513]

In [24]:
basic_url = 'https://api.themoviedb.org/3/movie/{}?{}'  #two placeholders

In [29]:
json_list = [] #automate and scale with python
for movie in movie_id:
    url = basic_url.format(movie, api_key)
    r = requests.get(url)
    if r.status_code != 200:
        continue
    else:
        data = r.json()
        json_list.append(data)
df = pd.DataFrame(json_list)

In [30]:
requests.get(basic_url.format(0, api_key)).status_code

404

In [31]:
df

Unnamed: 0,adult,backdrop_path,belongs_to_collection,budget,genres,...,tagline,title,video,vote_average,vote_count
0,False,/7RyHsO4yDXtBv1zUU3mTpHeQ0d5.jpg,"{'id': 86311, 'name': 'The Avengers Collection...",356000000,"[{'id': 12, 'name': 'Adventure'}, {'id': 878, ...",...,Part of the journey is the end.,Avengers: Endgame,False,8.3,17858
1,False,/AmHOQ7rpHwiaUMRjKXztnauSJb7.jpg,"{'id': 87096, 'name': 'Avatar Collection', 'po...",237000000,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...",...,Enter the World of Pandora.,Avatar,False,7.5,23215
2,False,/k6EOrckWFuz7I4z4wiRwz8zsj4H.jpg,"{'id': 10, 'name': 'Star Wars Collection', 'po...",245000000,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...",...,Every generation has a story.,Star Wars: The Force Awakens,False,7.4,15697
3,False,/lmZFxXgJE3vgrciwuDib0N8CfQo.jpg,"{'id': 86311, 'name': 'The Avengers Collection...",300000000,"[{'id': 12, 'name': 'Adventure'}, {'id': 28, '...",...,An entire universe. Once and for all.,Avengers: Infinity War,False,8.3,21542
4,False,/6VmFqApQRyZZzmiGOQq2C92jyvH.jpg,,200000000,"[{'id': 18, 'name': 'Drama'}, {'id': 10749, 'n...",...,Nothing on Earth could come between them.,Titanic,False,7.9,19023
5,False,/nRXO2SnOA75OsWhNhXstHB8ZmI3.jpg,"{'id': 762512, 'name': 'The Lion King (Reboot)...",260000000,"[{'id': 12, 'name': 'Adventure'}, {'id': 10751...",...,The King has Returned.,The Lion King,False,7.2,7596
6,False,/nNmJRkg8wWnRmzQDe2FwKbPIsJV.jpg,"{'id': 86311, 'name': 'The Avengers Collection...",220000000,"[{'id': 878, 'name': 'Science Fiction'}, {'id'...",...,Some assembly required.,The Avengers,False,7.7,24471
7,False,/vkQvqH8WQkUiNklDcbUtHEUgSNQ.jpg,"{'id': 9485, 'name': 'The Fast and the Furious...",190000000,"[{'id': 28, 'name': 'Action'}, {'id': 53, 'nam...",...,Vengeance Hits Home,Furious 7,False,7.3,8259
8,False,/xnqust9Li4oxfhXD5kcPi3UC8i4.jpg,"{'id': 86311, 'name': 'The Avengers Collection...",250000000,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...",...,A New Age Has Come.,Avengers: Age of Ultron,False,7.3,17717
9,False,/AlFqBwJnokrp9zWTXOUv7uhkaeq.jpg,"{'id': 529892, 'name': 'Black Panther Collecti...",200000000,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...",...,Long live the king.,Black Panther,False,7.4,17174


In [33]:
df = df.loc[:, ["title", "id", "revenue", "genres", "belongs_to_collection", "runtime"]].sort_values(by = "revenue",
                                                                                                    ascending = False)

In [34]:
df

Unnamed: 0,title,id,revenue,genres,belongs_to_collection,runtime
0,Avengers: Endgame,299534,2797800564,"[{'id': 12, 'name': 'Adventure'}, {'id': 878, ...","{'id': 86311, 'name': 'The Avengers Collection...",181
1,Avatar,19995,2787965087,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...","{'id': 87096, 'name': 'Avatar Collection', 'po...",162
4,Titanic,597,2187463944,"[{'id': 18, 'name': 'Drama'}, {'id': 10749, 'n...",,194
2,Star Wars: The Force Awakens,140607,2068223624,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...","{'id': 10, 'name': 'Star Wars Collection', 'po...",136
3,Avengers: Infinity War,299536,2046239637,"[{'id': 12, 'name': 'Adventure'}, {'id': 28, '...","{'id': 86311, 'name': 'The Avengers Collection...",149
5,The Lion King,420818,1656943394,"[{'id': 12, 'name': 'Adventure'}, {'id': 10751...","{'id': 762512, 'name': 'The Lion King (Reboot)...",118
6,The Avengers,24428,1518815515,"[{'id': 878, 'name': 'Science Fiction'}, {'id'...","{'id': 86311, 'name': 'The Avengers Collection...",143
7,Furious 7,168259,1515047671,"[{'id': 28, 'name': 'Action'}, {'id': 53, 'nam...","{'id': 9485, 'name': 'The Fast and the Furious...",137
12,Frozen II,330457,1450026933,"[{'id': 10751, 'name': 'Family'}, {'id': 16, '...","{'id': 386382, 'name': 'Frozen Collection', 'p...",103
8,Avengers: Age of Ultron,99861,1405403694,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...","{'id': 86311, 'name': 'The Avengers Collection...",141


In [43]:
df.to_json("movies.json", orient = "records") #records orientation is best to work with pandas

In [44]:
with open("movies.json") as f:
    data = json.load(f)

In [45]:
pd.json_normalize(data)

Unnamed: 0,title,id,revenue,genres,runtime,belongs_to_collection.id,belongs_to_collection.name,belongs_to_collection.poster_path,belongs_to_collection.backdrop_path,belongs_to_collection
0,Avengers: Endgame,299534,2797800564,"[{'id': 12, 'name': 'Adventure'}, {'id': 878, ...",181,86311.0,The Avengers Collection,/yQpAleQ1KHebVem2vwWL6VPqILT.jpg,/zuW6fOiusv4X9nnW3paHGfXcSll.jpg,
1,Avatar,19995,2787965087,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...",162,87096.0,Avatar Collection,/gC3tW9a45RGOzzSh6wv91pFnmFr.jpg,/syGPZuzcHBBHMLiNDN0x0Tms4Fk.jpg,
2,Titanic,597,2187463944,"[{'id': 18, 'name': 'Drama'}, {'id': 10749, 'n...",194,,,,,
3,Star Wars: The Force Awakens,140607,2068223624,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...",136,10.0,Star Wars Collection,/iTQHKziZy9pAAY4hHEDCGPaOvFC.jpg,/d8duYyyC9J5T825Hg7grmaabfxQ.jpg,
4,Avengers: Infinity War,299536,2046239637,"[{'id': 12, 'name': 'Adventure'}, {'id': 28, '...",149,86311.0,The Avengers Collection,/yQpAleQ1KHebVem2vwWL6VPqILT.jpg,/zuW6fOiusv4X9nnW3paHGfXcSll.jpg,
5,The Lion King,420818,1656943394,"[{'id': 12, 'name': 'Adventure'}, {'id': 10751...",118,762512.0,The Lion King (Reboot) Collection,,,
6,The Avengers,24428,1518815515,"[{'id': 878, 'name': 'Science Fiction'}, {'id'...",143,86311.0,The Avengers Collection,/yQpAleQ1KHebVem2vwWL6VPqILT.jpg,/zuW6fOiusv4X9nnW3paHGfXcSll.jpg,
7,Furious 7,168259,1515047671,"[{'id': 28, 'name': 'Action'}, {'id': 53, 'nam...",137,9485.0,The Fast and the Furious Collection,/zQdytnqfsWKJlqazqfMBL2L7aql.jpg,/gC9BUFiROWtaMsluGYziZ6lR4OJ.jpg,
8,Frozen II,330457,1450026933,"[{'id': 10751, 'name': 'Family'}, {'id': 16, '...",103,386382.0,Frozen Collection,/dwdyvzFX9NEI7oFlRnZurRmlswQ.jpg,/6QonAoIN0jhWZZWZGJswSxHzUnU.jpg,
9,Avengers: Age of Ultron,99861,1405403694,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...",141,86311.0,The Avengers Collection,/yQpAleQ1KHebVem2vwWL6VPqILT.jpg,/zuW6fOiusv4X9nnW3paHGfXcSll.jpg,


In [46]:
pd.json_normalize(data, "genres", "title")

Unnamed: 0,id,name,title
0,12,Adventure,Avengers: Endgame
1,878,Science Fiction,Avengers: Endgame
2,28,Action,Avengers: Endgame
3,28,Action,Avatar
4,12,Adventure,Avatar
5,14,Fantasy,Avatar
6,878,Science Fiction,Avatar
7,18,Drama,Titanic
8,10749,Romance,Titanic
9,28,Action,Star Wars: The Force Awakens


# 12. Importing and Storing the Movies Dataset (Real World Scenario)

In [48]:
df #csv good in handling tabular data
#but they are pretty limited when it comes to handling nested data

Unnamed: 0,title,id,revenue,genres,belongs_to_collection,runtime
0,Avengers: Endgame,299534,2797800564,"[{'id': 12, 'name': 'Adventure'}, {'id': 878, ...","{'id': 86311, 'name': 'The Avengers Collection...",181
1,Avatar,19995,2787965087,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...","{'id': 87096, 'name': 'Avatar Collection', 'po...",162
4,Titanic,597,2187463944,"[{'id': 18, 'name': 'Drama'}, {'id': 10749, 'n...",,194
2,Star Wars: The Force Awakens,140607,2068223624,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...","{'id': 10, 'name': 'Star Wars Collection', 'po...",136
3,Avengers: Infinity War,299536,2046239637,"[{'id': 12, 'name': 'Adventure'}, {'id': 28, '...","{'id': 86311, 'name': 'The Avengers Collection...",149
5,The Lion King,420818,1656943394,"[{'id': 12, 'name': 'Adventure'}, {'id': 10751...","{'id': 762512, 'name': 'The Lion King (Reboot)...",118
6,The Avengers,24428,1518815515,"[{'id': 878, 'name': 'Science Fiction'}, {'id'...","{'id': 86311, 'name': 'The Avengers Collection...",143
7,Furious 7,168259,1515047671,"[{'id': 28, 'name': 'Action'}, {'id': 53, 'nam...","{'id': 9485, 'name': 'The Fast and the Furious...",137
12,Frozen II,330457,1450026933,"[{'id': 10751, 'name': 'Family'}, {'id': 16, '...","{'id': 386382, 'name': 'Frozen Collection', 'p...",103
8,Avengers: Age of Ultron,99861,1405403694,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...","{'id': 86311, 'name': 'The Avengers Collection...",141


In [49]:
df.to_csv("movies_raw.csv", index = False) #drop the range index

In [50]:
df = pd.read_csv("movies_raw.csv")

In [51]:
df

Unnamed: 0,title,id,revenue,genres,belongs_to_collection,runtime
0,Avengers: Endgame,299534,2797800564,"[{'id': 12, 'name': 'Adventure'}, {'id': 878, ...","{'id': 86311, 'name': 'The Avengers Collection...",181
1,Avatar,19995,2787965087,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...","{'id': 87096, 'name': 'Avatar Collection', 'po...",162
2,Titanic,597,2187463944,"[{'id': 18, 'name': 'Drama'}, {'id': 10749, 'n...",,194
3,Star Wars: The Force Awakens,140607,2068223624,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...","{'id': 10, 'name': 'Star Wars Collection', 'po...",136
4,Avengers: Infinity War,299536,2046239637,"[{'id': 12, 'name': 'Adventure'}, {'id': 28, '...","{'id': 86311, 'name': 'The Avengers Collection...",149
5,The Lion King,420818,1656943394,"[{'id': 12, 'name': 'Adventure'}, {'id': 10751...","{'id': 762512, 'name': 'The Lion King (Reboot)...",118
6,The Avengers,24428,1518815515,"[{'id': 878, 'name': 'Science Fiction'}, {'id'...","{'id': 86311, 'name': 'The Avengers Collection...",143
7,Furious 7,168259,1515047671,"[{'id': 28, 'name': 'Action'}, {'id': 53, 'nam...","{'id': 9485, 'name': 'The Fast and the Furious...",137
8,Frozen II,330457,1450026933,"[{'id': 10751, 'name': 'Family'}, {'id': 16, '...","{'id': 386382, 'name': 'Frozen Collection', 'p...",103
9,Avengers: Age of Ultron,99861,1405403694,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...","{'id': 86311, 'name': 'The Avengers Collection...",141


In [52]:
df.genres[0] #not a list but a string, cause problems so that's why it's data must be cleaned

"[{'id': 12, 'name': 'Adventure'}, {'id': 878, 'name': 'Science Fiction'}, {'id': 28, 'name': 'Action'}]"