# Step 1: Fetch Movie Data from API

In [11]:
import os
import requests
import pandas as pd
from dotenv import load_dotenv

load_dotenv()

ACCESS_TOKEN = os.getenv("ACCESS_TOKEN")

In [None]:
base_url = "https://api.themoviedb.org/3/movie/"

headers = {"accept": "application/json", "Authorization": f"Bearer {ACCESS_TOKEN}"}

movie_ids = [
    0,
    299534,
    19995,
    140607,
    299536,
    597,
    135397,
    420818,
    24428,
    168259,
    99861,
    284054,
    12445,
    181808,
    330457,
    351286,
    109445,
    321612,
    260513,
]

movies = []

# Fetch movie data for each ID
for movie_id in movie_ids:
    url = f"{base_url}{movie_id}"
    response = requests.get(url, headers=headers)
    
    if response.status_code == 200:
        movies.append(response.json())
    else:
        print(f"Failed to fetch data for movie ID {movie_id}: {response.status_code}")

# Create dataframe
movies_df = pd.DataFrame(movies)
print(f"Fetched {len(movies_df)} movies successfully.")
print(movies_df.info())

Failed to fetch data for movie ID 0: 404
Fetched 18 movies successfully.
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 18 entries, 0 to 17
Data columns (total 26 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   adult                  18 non-null     bool   
 1   backdrop_path          18 non-null     object 
 2   belongs_to_collection  16 non-null     object 
 3   budget                 18 non-null     int64  
 4   genres                 18 non-null     object 
 5   homepage               18 non-null     object 
 6   id                     18 non-null     int64  
 7   imdb_id                18 non-null     object 
 8   origin_country         18 non-null     object 
 9   original_language      18 non-null     object 
 10  original_title         18 non-null     object 
 11  overview               18 non-null     object 
 12  popularity             18 non-null     float64
 13  poster_path            18 non-null     

# Step 2: Data Cleaning and Preprocessing

In [None]:
movies_df.drop(columns=['adult', 'imdb_id', 'original_title', 'video', 'homepage', 'backdrop_path'], inplace=True)
print(movies_df.info())


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 18 entries, 0 to 17
Data columns (total 20 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   belongs_to_collection  16 non-null     object 
 1   budget                 18 non-null     int64  
 2   genres                 18 non-null     object 
 3   id                     18 non-null     int64  
 4   origin_country         18 non-null     object 
 5   original_language      18 non-null     object 
 6   overview               18 non-null     object 
 7   popularity             18 non-null     float64
 8   poster_path            18 non-null     object 
 9   production_companies   18 non-null     object 
 10  production_countries   18 non-null     object 
 11  release_date           18 non-null     object 
 12  revenue                18 non-null     int64  
 13  runtime                18 non-null     int64  
 14  spoken_languages       18 non-null     object 
 15  status  