# Importing and saving the movies dataset (Best Practice)

In [1]:
import pandas as pd
import requests
import json
pd.options.display.max_columns = 30

### improt dataset

In [2]:
# list of 18 movies that we are interested in :
movie_id = [0, 299534, 19995, 140607, 299536, 597, 135397,
           420818, 24428, 168259, 99861, 284054, 12445,
           181808, 330457, 351286, 109445, 321612, 260513]

In [4]:
# the basic url that we're using to send api requests :
# we have brakets in 2 places {}?{} for movie id + api key
basic_url = "https://api.themoviedb.org/3/movie/{}?{}"

In [5]:
# api key :
api_key = "api_key=0966849bf653d5ee5428eabcf586c464"

In [6]:
# we will check the status code of the respose to our http request
# it's basically to know if the movie_is is available or not
# so we pass an unavailable id (0) to check the result:
requests.get(basic_url.format(0, api_key)).status_code

404

In [7]:
# and then we pass a correct id :
requests.get(basic_url.format(299534, api_key)).status_code

200

In [8]:
# we will make a function that builds list of JSONS (json for movie)
json_list =[]
for movie in movie_id :
    url = basic_url.format(movie, api_key)
    r = requests.get(url)
    if r.status_code != 200:
        continue
    else:
        data = r.json()
        json_list.append(data)
df = pd.DataFrame(json_list)

In [10]:
# let's have alook about our df :
df.head()

Unnamed: 0,adult,backdrop_path,belongs_to_collection,budget,genres,homepage,id,imdb_id,original_language,original_title,overview,popularity,poster_path,production_companies,production_countries,release_date,revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count
0,False,/7RyHsO4yDXtBv1zUU3mTpHeQ0d5.jpg,"{'id': 86311, 'name': 'The Avengers Collection...",356000000,"[{'id': 12, 'name': 'Adventure'}, {'id': 878, ...",https://www.marvel.com/movies/avengers-endgame,299534,tt4154796,en,Avengers: Endgame,After the devastating events of Avengers: Infi...,260.137,/or06FN3Dka5tukK1e9sl16pB3iy.jpg,"[{'id': 420, 'logo_path': '/hUzeosd33nzE5MCNsZ...","[{'iso_3166_1': 'US', 'name': 'United States o...",2019-04-24,2797800564,181,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,Part of the journey is the end.,Avengers: Endgame,False,8.3,21020
1,False,/jlQJDD0L5ZojjlS0KYnApdO0n19.jpg,"{'id': 87096, 'name': 'Avatar Collection', 'po...",237000000,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...",https://www.avatar.com/movies/avatar,19995,tt0499549,en,Avatar,"In the 22nd century, a paraplegic Marine is di...",560.57,/jRXYjXNq0Cs2TcJjLkki24MLp7u.jpg,"[{'id': 444, 'logo_path': '/42UPdZl6B2cFXgNUAS...","[{'iso_3166_1': 'US', 'name': 'United States o...",2009-12-10,2847246203,162,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,Enter the world of Pandora.,Avatar,False,7.5,25413
2,False,/8BTsTfln4jlQrLXUBquXJ0ASQy9.jpg,"{'id': 10, 'name': 'Star Wars Collection', 'po...",245000000,"[{'id': 12, 'name': 'Adventure'}, {'id': 28, '...",http://www.starwars.com/films/star-wars-episod...,140607,tt2488496,en,Star Wars: The Force Awakens,Thirty years after defeating the Galactic Empi...,69.52,/wqnLdwVXoBjKibFRR5U3y0aDUhs.jpg,"[{'id': 1, 'logo_path': '/o86DbpburjxrqAzEDhXZ...","[{'iso_3166_1': 'US', 'name': 'United States o...",2015-12-15,2068223624,136,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,Every generation has a story.,Star Wars: The Force Awakens,False,7.3,16985
3,False,/lmZFxXgJE3vgrciwuDib0N8CfQo.jpg,"{'id': 86311, 'name': 'The Avengers Collection...",300000000,"[{'id': 12, 'name': 'Adventure'}, {'id': 28, '...",https://www.marvel.com/movies/avengers-infinit...,299536,tt4154756,en,Avengers: Infinity War,As the Avengers and their allies have continue...,388.151,/7WsyChQLEftFiDOVTGkv3hFpyyt.jpg,"[{'id': 420, 'logo_path': '/hUzeosd33nzE5MCNsZ...","[{'iso_3166_1': 'US', 'name': 'United States o...",2018-04-25,2046239637,149,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,An entire universe. Once and for all.,Avengers: Infinity War,False,8.3,24575
4,False,/yDI6D5ZQh67YU4r2ms8qcSbAviZ.jpg,,200000000,"[{'id': 18, 'name': 'Drama'}, {'id': 10749, 'n...",,597,tt0120338,en,Titanic,101-year-old Rose DeWitt Bukater tells the sto...,191.95,/9xjZS2rlVxm8SFx8kPC3aIGCOYQ.jpg,"[{'id': 4, 'logo_path': '/gz66EfNoYPqHTYI4q9UE...","[{'iso_3166_1': 'US', 'name': 'United States o...",1997-11-18,2187463944,194,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,Nothing on Earth could come between them.,Titanic,False,7.9,20997


In [11]:
# let's try to make a useful df, select some columns and sort by revenue:
df = df.loc[:,["title", "id", "revenue", "genres",
              "belongs_to_collection",
               "runtime"]].sort_values(by="revenue", ascending=False)

In [13]:
df.head()

Unnamed: 0,title,id,revenue,genres,belongs_to_collection,runtime
1,Avatar,19995,2847246203,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...","{'id': 87096, 'name': 'Avatar Collection', 'po...",162
0,Avengers: Endgame,299534,2797800564,"[{'id': 12, 'name': 'Adventure'}, {'id': 878, ...","{'id': 86311, 'name': 'The Avengers Collection...",181
4,Titanic,597,2187463944,"[{'id': 18, 'name': 'Drama'}, {'id': 10749, 'n...",,194
2,Star Wars: The Force Awakens,140607,2068223624,"[{'id': 12, 'name': 'Adventure'}, {'id': 28, '...","{'id': 10, 'name': 'Star Wars Collection', 'po...",136
3,Avengers: Infinity War,299536,2046239637,"[{'id': 12, 'name': 'Adventure'}, {'id': 28, '...","{'id': 86311, 'name': 'The Avengers Collection...",149


### Store and save dataset

In [14]:
# export it into json file :
# to_json (name we want for the file, json orientation)
df.to_json("movies.json", orient ="records")