# Part 2: Data Import - Working with Web APIs and JSON

## Importing Data from JSON files 

In [1]:
import pandas as pd
import json

In [2]:
with open("blockbusters.json") as f:
    data = json.load(f)

In [4]:
type(data)

list

In [5]:
len(data)

18

In [6]:
df = pd.DataFrame(data)
df.head()

Unnamed: 0,title,id,revenue,genres,belongs_to_collection,runtime
0,Avengers: Endgame,299534,2797800564,"[{'id': 12, 'name': 'Adventure'}, {'id': 878, ...","{'id': 86311, 'name': 'The Avengers Collection...",181
1,Avatar,19995,2787965087,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...","{'id': 87096, 'name': 'Avatar Collection', 'po...",162
2,Star Wars: The Force Awakens,140607,2068223624,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...","{'id': 10, 'name': 'Star Wars Collection', 'po...",136
3,Avengers: Infinity War,299536,2046239637,"[{'id': 12, 'name': 'Adventure'}, {'id': 28, '...","{'id': 86311, 'name': 'The Avengers Collection...",149
4,Titanic,597,1845034188,"[{'id': 18, 'name': 'Drama'}, {'id': 10749, 'n...",,194


In [10]:
pd.json_normalize(data = data, sep = "_").head()

Unnamed: 0,title,id,revenue,genres,runtime,belongs_to_collection_id,belongs_to_collection_name,belongs_to_collection_poster_path,belongs_to_collection_backdrop_path,belongs_to_collection
0,Avengers: Endgame,299534,2797800564,"[{'id': 12, 'name': 'Adventure'}, {'id': 878, ...",181,86311.0,The Avengers Collection,/yFSIUVTCvgYrpalUktulvk3Gi5Y.jpg,/zuW6fOiusv4X9nnW3paHGfXcSll.jpg,
1,Avatar,19995,2787965087,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...",162,87096.0,Avatar Collection,/nslJVsO58Etqkk17oXMuVK4gNOF.jpg,/8nCr9W7sKus2q9PLbYsnT7iCkuT.jpg,
2,Star Wars: The Force Awakens,140607,2068223624,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...",136,10.0,Star Wars Collection,/iTQHKziZy9pAAY4hHEDCGPaOvFC.jpg,/d8duYyyC9J5T825Hg7grmaabfxQ.jpg,
3,Avengers: Infinity War,299536,2046239637,"[{'id': 12, 'name': 'Adventure'}, {'id': 28, '...",149,86311.0,The Avengers Collection,/yFSIUVTCvgYrpalUktulvk3Gi5Y.jpg,/zuW6fOiusv4X9nnW3paHGfXcSll.jpg,
4,Titanic,597,1845034188,"[{'id': 18, 'name': 'Drama'}, {'id': 10749, 'n...",194,,,,,


In [12]:
pd.json_normalize(data = data, record_path = "genres", meta = ["title", "id"],
                  record_prefix = "genre_").head()

Unnamed: 0,genre_id,genre_name,title,id
0,12,Adventure,Avengers: Endgame,299534
1,878,Science Fiction,Avengers: Endgame,299534
2,28,Action,Avengers: Endgame,299534
3,28,Action,Avatar,19995
4,12,Adventure,Avatar,19995


## Getting a specific movie's details (Using APIs to generate JSON)

In [13]:
api_key  = "api_key=27b741b9b8013ce85b41e886fee44ed6"

In [14]:
import requests
pd.options.display.max_columns = 30

In [15]:
movie_id = 140607

In [16]:
movie_api = "https://api.themoviedb.org/3/movie/{}?"
movie_api

'https://api.themoviedb.org/3/movie/{}?'

In [17]:
url = movie_api.format(movie_id) + api_key
url

'https://api.themoviedb.org/3/movie/140607?api_key=27b741b9b8013ce85b41e886fee44ed6'

In [18]:
r = requests.get(url)
r

<Response [200]>

In [19]:
data = r.json()

In [21]:
type(data)

dict

In [22]:
pd.Series(data)

adult                                                                False
backdrop_path                             /8BTsTfln4jlQrLXUBquXJ0ASQy9.jpg
belongs_to_collection    {'id': 10, 'name': 'Star Wars Collection', 'po...
budget                                                           245000000
genres                   [{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...
homepage                 http://www.starwars.com/films/star-wars-episod...
id                                                                  140607
imdb_id                                                          tt2488496
original_language                                                       en
original_title                                Star Wars: The Force Awakens
overview                 Thirty years after defeating the Galactic Empi...
popularity                                                          83.803
poster_path                               /wqnLdwVXoBjKibFRR5U3y0aDUhs.jpg
production_companies     

In [23]:
df = pd.Series(data).to_frame().T
df

Unnamed: 0,adult,backdrop_path,belongs_to_collection,budget,genres,homepage,id,imdb_id,original_language,original_title,overview,popularity,poster_path,production_companies,production_countries,release_date,revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count
0,False,/8BTsTfln4jlQrLXUBquXJ0ASQy9.jpg,"{'id': 10, 'name': 'Star Wars Collection', 'po...",245000000,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...",http://www.starwars.com/films/star-wars-episod...,140607,tt2488496,en,Star Wars: The Force Awakens,Thirty years after defeating the Galactic Empi...,83.803,/wqnLdwVXoBjKibFRR5U3y0aDUhs.jpg,"[{'id': 1, 'logo_path': '/o86DbpburjxrqAzEDhXZ...","[{'iso_3166_1': 'US', 'name': 'United States o...",2015-12-15,2068223624,136,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,Every generation has a story.,Star Wars: The Force Awakens,False,7.3,16962


In [24]:
pd.json_normalize(data, sep = "_")

Unnamed: 0,adult,backdrop_path,budget,genres,homepage,id,imdb_id,original_language,original_title,overview,popularity,poster_path,production_companies,production_countries,release_date,revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count,belongs_to_collection_id,belongs_to_collection_name,belongs_to_collection_poster_path,belongs_to_collection_backdrop_path
0,False,/8BTsTfln4jlQrLXUBquXJ0ASQy9.jpg,245000000,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...",http://www.starwars.com/films/star-wars-episod...,140607,tt2488496,en,Star Wars: The Force Awakens,Thirty years after defeating the Galactic Empi...,83.803,/wqnLdwVXoBjKibFRR5U3y0aDUhs.jpg,"[{'id': 1, 'logo_path': '/o86DbpburjxrqAzEDhXZ...","[{'iso_3166_1': 'US', 'name': 'United States o...",2015-12-15,2068223624,136,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,Every generation has a story.,Star Wars: The Force Awakens,False,7.3,16962,10,Star Wars Collection,/tdQzRSk4PXX6hzjLcQWHafYtZTI.jpg,/d8duYyyC9J5T825Hg7grmaabfxQ.jpg


In [25]:
pd.json_normalize(data = data, record_path = "genres", meta = "title")

Unnamed: 0,id,name,title
0,28,Action,Star Wars: The Force Awakens
1,12,Adventure,Star Wars: The Force Awakens
2,878,Science Fiction,Star Wars: The Force Awakens
3,14,Fantasy,Star Wars: The Force Awakens


In [26]:
pd.json_normalize(data = data, record_path = "production_companies", meta = "title")

Unnamed: 0,id,logo_path,name,origin_country,title
0,1,/o86DbpburjxrqAzEDhXZcyE8pDb.png,Lucasfilm,US,Star Wars: The Force Awakens
1,11461,/p9FoEt5shEKRWRKVIlvFaEmRnun.png,Bad Robot,US,Star Wars: The Force Awakens


## Getting movies details within a date range (Using APIs to generate JSON)

In [27]:
discover_api = "https://api.themoviedb.org/3/discover/movie?"

In [28]:
query = "&primary_release_date.gte=2020-01-01&primary_release_date.lte=2020-02-29"

In [29]:
url = discover_api+api_key+query

In [30]:
data = requests.get(url).json()

In [32]:
pd.DataFrame(data).head()

Unnamed: 0,page,results,total_pages,total_results
0,1,"{'adult': False, 'backdrop_path': '/stmYfCUGd8...",240,4795
1,1,"{'adult': False, 'backdrop_path': '/5DNROrQwVr...",240,4795
2,1,"{'adult': False, 'backdrop_path': '/ySHlkHDbXz...",240,4795
3,1,"{'adult': False, 'backdrop_path': '/isBjNHBblz...",240,4795
4,1,"{'adult': False, 'backdrop_path': '/aiQICxiWNc...",240,4795


In [33]:
pd.DataFrame(data["results"]).head()

Unnamed: 0,adult,backdrop_path,genre_ids,id,original_language,original_title,overview,popularity,poster_path,release_date,title,video,vote_average,vote_count
0,False,/stmYfCUGd8Iy6kAMBr6AmWqx8Bq.jpg,"[28, 878, 35, 10751]",454626,en,Sonic the Hedgehog,"Powered with incredible speed, Sonic The Hedge...",988.72,/aQvJ5WPzZgYVDrxLX4R6cLJCEaQ.jpg,2020-02-12,Sonic the Hedgehog,False,7.4,7928
1,False,/5DNROrQwVrnFcaNN4GSKd5oLXW0.jpg,"[16, 28, 53, 12, 14]",571265,ja,デジモンアドベンチャー LAST EVOLUTION 絆,"Tai is now a university student, living alone,...",201.045,/z3l5iwWQLxcInVnNkC8k8hxqZ60.jpg,2020-02-21,Digimon Adventure: Last Evolution Kizuna,False,7.8,113
2,False,/ySHlkHDbXztjdjRJ0pZN7FXRnaW.jpg,"[16, 12, 28, 14]",631132,ja,ゴブリンスレイヤー -GOBLIN'S CROWN-,Goblin Slayer and his party head up to the sno...,173.348,/oyDaF4za9C9GxfYSyZUmj2aZGJT.jpg,2020-02-01,Goblin Slayer: Goblin's Crown,False,7.6,163
3,False,/isBjNHBblzxrzHrAtRbjgkYzAut.jpg,"[10749, 18]",664413,pl,365 dni,"A woman falls victim to a dominant mafia boss,...",130.341,/6KwrHucIE3CvNT7kTm2MAlZ4fYF.jpg,2020-02-07,365 Days,False,7.0,6809
4,False,/aiQICxiWNcOsJruYxdPuhb6WtWu.jpg,"[9648, 18, 27, 12, 16]",658558,ja,劇場版 巨蟲列島,"After an airplane crash during a school trip, ...",114.197,/uJFki1BLHvEi4gaPCJvy9SZQZpx.jpg,2020-01-10,The Island of Giant Insects,False,4.5,14


##  Importing and Saving the Movies Dataset

In [35]:
movie_id = [0, 299534, 19995, 140607, 299536, 597, 135397,
            420818, 24428, 168259, 99861, 284054, 12445,
            181808, 330457, 351286, 109445, 321612, 260513]

In [36]:
basic_url = 'https://api.themoviedb.org/3/movie/{}?{}' 

In [37]:
json_list = []
for movie in movie_id:
    url = basic_url.format(movie, api_key)
    r = requests.get(url)
    if r.status_code != 200:
        continue
    else:
        data = r.json()
        json_list.append(data) 
df = pd.DataFrame(json_list)

In [39]:
df.head()

Unnamed: 0,adult,backdrop_path,belongs_to_collection,budget,genres,homepage,id,imdb_id,original_language,original_title,overview,popularity,poster_path,production_companies,production_countries,release_date,revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count
0,False,/7RyHsO4yDXtBv1zUU3mTpHeQ0d5.jpg,"{'id': 86311, 'name': 'The Avengers Collection...",356000000,"[{'id': 12, 'name': 'Adventure'}, {'id': 878, ...",https://www.marvel.com/movies/avengers-endgame,299534,tt4154796,en,Avengers: Endgame,After the devastating events of Avengers: Infi...,296.707,/or06FN3Dka5tukK1e9sl16pB3iy.jpg,"[{'id': 420, 'logo_path': '/hUzeosd33nzE5MCNsZ...","[{'iso_3166_1': 'US', 'name': 'United States o...",2019-04-24,2797800564,181,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,Part of the journey is the end.,Avengers: Endgame,False,8.3,20953
1,False,/jlQJDD0L5ZojjlS0KYnApdO0n19.jpg,"{'id': 87096, 'name': 'Avatar Collection', 'po...",237000000,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...",https://www.avatar.com/movies/avatar,19995,tt0499549,en,Avatar,"In the 22nd century, a paraplegic Marine is di...",440.605,/jRXYjXNq0Cs2TcJjLkki24MLp7u.jpg,"[{'id': 444, 'logo_path': '/42UPdZl6B2cFXgNUAS...","[{'iso_3166_1': 'US', 'name': 'United States o...",2009-12-10,2847246203,162,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,Enter the world of Pandora.,Avatar,False,7.5,25339
2,False,/8BTsTfln4jlQrLXUBquXJ0ASQy9.jpg,"{'id': 10, 'name': 'Star Wars Collection', 'po...",245000000,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...",http://www.starwars.com/films/star-wars-episod...,140607,tt2488496,en,Star Wars: The Force Awakens,Thirty years after defeating the Galactic Empi...,83.803,/wqnLdwVXoBjKibFRR5U3y0aDUhs.jpg,"[{'id': 1, 'logo_path': '/o86DbpburjxrqAzEDhXZ...","[{'iso_3166_1': 'US', 'name': 'United States o...",2015-12-15,2068223624,136,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,Every generation has a story.,Star Wars: The Force Awakens,False,7.3,16962
3,False,/lmZFxXgJE3vgrciwuDib0N8CfQo.jpg,"{'id': 86311, 'name': 'The Avengers Collection...",300000000,"[{'id': 12, 'name': 'Adventure'}, {'id': 28, '...",https://www.marvel.com/movies/avengers-infinit...,299536,tt4154756,en,Avengers: Infinity War,As the Avengers and their allies have continue...,440.938,/7WsyChQLEftFiDOVTGkv3hFpyyt.jpg,"[{'id': 420, 'logo_path': '/hUzeosd33nzE5MCNsZ...","[{'iso_3166_1': 'US', 'name': 'United States o...",2018-04-25,2046239637,149,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,An entire universe. Once and for all.,Avengers: Infinity War,False,8.3,24514
4,False,/yDI6D5ZQh67YU4r2ms8qcSbAviZ.jpg,,200000000,"[{'id': 18, 'name': 'Drama'}, {'id': 10749, 'n...",,597,tt0120338,en,Titanic,101-year-old Rose DeWitt Bukater tells the sto...,135.744,/9xjZS2rlVxm8SFx8kPC3aIGCOYQ.jpg,"[{'id': 4, 'logo_path': '/gz66EfNoYPqHTYI4q9UE...","[{'iso_3166_1': 'US', 'name': 'United States o...",1997-11-18,2187463944,194,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,Nothing on Earth could come between them.,Titanic,False,7.9,20955


In [40]:
df = df.loc[:, ["title", "id", "revenue", "genres", "belongs_to_collection", "runtime"]].sort_values(by = "revenue",
                                                                                                ascending = False)

In [41]:
df.head()

Unnamed: 0,title,id,revenue,genres,belongs_to_collection,runtime
1,Avatar,19995,2847246203,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...","{'id': 87096, 'name': 'Avatar Collection', 'po...",162
0,Avengers: Endgame,299534,2797800564,"[{'id': 12, 'name': 'Adventure'}, {'id': 878, ...","{'id': 86311, 'name': 'The Avengers Collection...",181
4,Titanic,597,2187463944,"[{'id': 18, 'name': 'Drama'}, {'id': 10749, 'n...",,194
2,Star Wars: The Force Awakens,140607,2068223624,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...","{'id': 10, 'name': 'Star Wars Collection', 'po...",136
3,Avengers: Infinity War,299536,2046239637,"[{'id': 12, 'name': 'Adventure'}, {'id': 28, '...","{'id': 86311, 'name': 'The Avengers Collection...",149


In [47]:
df.to_json("movies.json", orient = "records")

In [48]:
with open("movies.json") as f:
    data = json.load(f)

In [51]:
pd.json_normalize(data).head()

Unnamed: 0,title,id,revenue,genres,runtime,belongs_to_collection.id,belongs_to_collection.name,belongs_to_collection.poster_path,belongs_to_collection.backdrop_path,belongs_to_collection
0,Avatar,19995,2847246203,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...",162,87096.0,Avatar Collection,/gC3tW9a45RGOzzSh6wv91pFnmFr.jpg,/syGPZuzcHBBHMLiNDN0x0Tms4Fk.jpg,
1,Avengers: Endgame,299534,2797800564,"[{'id': 12, 'name': 'Adventure'}, {'id': 878, ...",181,86311.0,The Avengers Collection,/yFSIUVTCvgYrpalUktulvk3Gi5Y.jpg,/zuW6fOiusv4X9nnW3paHGfXcSll.jpg,
2,Titanic,597,2187463944,"[{'id': 18, 'name': 'Drama'}, {'id': 10749, 'n...",194,,,,,
3,Star Wars: The Force Awakens,140607,2068223624,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...",136,10.0,Star Wars Collection,/tdQzRSk4PXX6hzjLcQWHafYtZTI.jpg,/d8duYyyC9J5T825Hg7grmaabfxQ.jpg,
4,Avengers: Infinity War,299536,2046239637,"[{'id': 12, 'name': 'Adventure'}, {'id': 28, '...",149,86311.0,The Avengers Collection,/yFSIUVTCvgYrpalUktulvk3Gi5Y.jpg,/zuW6fOiusv4X9nnW3paHGfXcSll.jpg,


In [52]:
pd.json_normalize(data, "genres", "title").head()

Unnamed: 0,id,name,title
0,28,Action,Avatar
1,12,Adventure,Avatar
2,14,Fantasy,Avatar
3,878,Science Fiction,Avatar
4,12,Adventure,Avengers: Endgame


In [53]:
df.to_csv("movies_raw.csv", index = False)

In [54]:
df = pd.read_csv("movies_raw.csv")
df.head()

Unnamed: 0,title,id,revenue,genres,belongs_to_collection,runtime
0,Avatar,19995,2847246203,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...","{'id': 87096, 'name': 'Avatar Collection', 'po...",162
1,Avengers: Endgame,299534,2797800564,"[{'id': 12, 'name': 'Adventure'}, {'id': 878, ...","{'id': 86311, 'name': 'The Avengers Collection...",181
2,Titanic,597,2187463944,"[{'id': 18, 'name': 'Drama'}, {'id': 10749, 'n...",,194
3,Star Wars: The Force Awakens,140607,2068223624,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...","{'id': 10, 'name': 'Star Wars Collection', 'po...",136
4,Avengers: Infinity War,299536,2046239637,"[{'id': 12, 'name': 'Adventure'}, {'id': 28, '...","{'id': 86311, 'name': 'The Avengers Collection...",149
