In [1]:
import pandas as pd
import json
import requests
import time

Gather Movie Data via TMDB API  
    

a. Set up the API    
    * Create a free [TMDB account](https://developer.themoviedb.org/docs/getting-started)  
    * Generate an API key are review their documentation, especially:  
        * /discover/movie  
        * /movie/{movie_id}  
        * /search/movie  
    

b. Collect top movies (2015-2024)  
    For each year from 2015 to 2024:  
        * Query TMDB for the top 100 movies (by vote count).  
        * For each movie, gather:  
            * Title  
            * Release Year  
            * Genre(s)  
            * Vote Average  
            * Vote Count  
            * Budget  
            * Revenue  
            * TMDB ID  
        

* Store all results in a single DataFrame and export to movies_2015_2024.csv.
* Hint: TMDB rate limits are generous for free accounts, but you should pause between requests (eg. time.sleep(0.25)). 
* Some Oscar films may not appear in the top 100 by vote count. For any missing, use the /search/movie endpoint to add it.  

In [2]:
with open('keys_api_movie.json') as fi:
    credentials = json.load(fi)
api_key = credentials['api_key']

In [3]:
endpoint = 'https://api.themoviedb.org/3/discover/movie'
movie_data = pd.DataFrame()
pd.set_option('display.max_colwidth', None)
for release_year in range(2015,2025):
    yearly_movies = pd. DataFrame()
    for page in range(1,6):
        params = {
        'page' :page,
        'api_key' : api_key,
        'sort_by' : 'vote_count.desc',
        'release_date.gte' : f"{release_year}-01-01",
        'release_date.lte' : f"{release_year}-12-31"
        }
        api_response = requests.get(endpoint, params=params)

        data = api_response.json()
        data
        
        page_data= pd.json_normalize(data['results'])
        yearly_movies = pd.concat([page_data,yearly_movies], ignore_index=True)
        
        yearly_movies["release_year"] = pd.to_datetime(yearly_movies["release_date"], errors="coerce").dt.year
        yearly_movies["release_year"] = release_year
        movie_data= pd.concat([yearly_movies,movie_data], ignore_index=True)
        
        #print(movie_data)
movie_data = movie_data[['id','title','genre_ids','vote_average','vote_count','release_year']]
#movie_data.columns
movie_data.head()

Unnamed: 0,id,title,genre_ids,vote_average,vote_count,release_year
0,119450,Dawn of the Planet of the Apes,"[878, 28, 18, 53]",7.335,11898,2024
1,49530,In Time,"[28, 53, 878]",6.971,11837,2024
2,1359,American Psycho,"[53, 18, 80]",7.409,11717,2024
3,93456,Despicable Me 2,"[16, 35, 10751]",6.937,11705,2024
4,812,Aladdin,"[16, 10751, 12, 14, 10749]",7.654,11679,2024


In [4]:

movie_id = 1198426

endpoint1 = f'https://api.themoviedb.org/3/movie/{movie_id}'
params = {
        'page' :page,
        'api_key' : api_key,
        
        }
movie_response = requests.get(endpoint1,params)
movie_response.json()

{'adult': False,
 'backdrop_path': '/4damV6u8Za9p03SH9jvKr3TwHQC.jpg',
 'belongs_to_collection': None,
 'budget': 0,
 'genres': [{'id': 16, 'name': 'Animation'}, {'id': 10751, 'name': 'Family'}],
 'homepage': '',
 'id': 1198426,
 'imdb_id': 'tt17423376',
 'origin_country': ['DO'],
 'original_language': 'es',
 'original_title': 'Capitán Avispa',
 'overview': "In Avispatropolis, the fearless Captain Avispa emerges as an exemplary protector of the city, whose courage and convictions always prevail over the forces of evil. His unbreakable power is nourished by absolute sincerity, as he would lose his strength if he dared to weave deceit. Only in pursuit of noble causes does he allow himself to deviate from the truth. As is common in stories of this kind, Captain Wasp is surrounded by a constellation of close friends and arch-enemies, whose stories have their origins in the artist's songs.",
 'popularity': 27.5096,
 'poster_path': '/zmthz3CuFljmBQcfuaz4hBNwbQ0.jpg',
 'production_companies':

In [None]:
all_movie_data = pd.DataFrame()
for movie_id in movie_data['id'].head():
    tmdb_endpoint = f'https://api.themoviedb.org/3/movie/{movie_id}'
    params = {
    'api_key' : api_key,
    
     }
  

    tmdb_response = requests.get(tmdb_endpoint,params)
    tmdb_data  = tmdb_response.json()
    tmdb_df = pd.json_normalize(tmdb_data)
    all_movie_data = pd.concat([all_movie_data,tmdb_df], ignore_index=True)
    time.sleep(0.25)
all_movie_data=all_movie_data[['title','vote_average','vote_count','budget','revenue','imdb_id']]
print(all_movie_data.head())   