In [1]:
import requests
import pandas as pd
from time import sleep
import os
import json

# Import API key from config file
from config import TMDB_API_KEY

# Get API key: from environment variable or config file
api_key = os.getenv("TMDB_API_KEY") or TMDB_API_KEY

# TMDB base URL
base_url = "https://api.themoviedb.org/3"

def fetch_movie_data(movie_id):
    """Fetch movie details from TMDB API."""
    url = f"{base_url}/movie/{movie_id}"
    params = {
        "api_key": api_key,
        "language": "en-US"
    }
    response = requests.get(url, params=params)
    
    if response.status_code == 200:
        return response.json()
    else:
        print(f"Error fetching data for movie ID {movie_id}: {response.status_code}")
        try:
            print(response.json())  # Print error details from API
        except:
            pass
        return None

def main():
    """Main pipeline to fetch movies and build a DataFrame."""
    # Example movie IDs (test small batch first)
    movie_ids = [550, 551, 552, 553, 554]
    
    movie_data_list = []

    for movie_id in movie_ids:
        movie_data = fetch_movie_data(movie_id)
        if movie_data:
            print("\n🔍 API Response for Movie ID:", movie_id)
            print(json.dumps(movie_data, indent=4))  # Pretty print JSON for clarity
            
            movie_data_list.append(movie_data)
        sleep(1)  # Be respectful of rate limits

    # Convert to DataFrame
    if movie_data_list:
        df = pd.DataFrame(movie_data_list)

        print("\n Fetched Movie DataFrame:")
        print(df.head())

        print("\n DataFrame Columns:")
        print(df.columns)

        # Save DataFrame (optional)
        # df.to_csv("tmdb_movies_raw.csv", index=False)

    else:
        print("No movie data fetched.")

if __name__ == "__main__":
    main()




🔍 API Response for Movie ID: 550
{
    "adult": false,
    "backdrop_path": "/xRyINp9KfMLVjRiO5nCsoRDdvvF.jpg",
    "belongs_to_collection": null,
    "budget": 63000000,
    "genres": [
        {
            "id": 18,
            "name": "Drama"
        }
    ],
    "homepage": "http://www.foxmovies.com/movies/fight-club",
    "id": 550,
    "imdb_id": "tt0137523",
    "origin_country": [
        "US"
    ],
    "original_language": "en",
    "original_title": "Fight Club",
    "overview": "A ticking-time-bomb insomniac and a slippery soap salesman channel primal male aggression into a shocking new form of therapy. Their concept catches on, with underground \"fight clubs\" forming in every town, until an eccentric gets in the way and ignites an out-of-control spiral toward oblivion.",
    "popularity": 27.2832,
    "poster_path": "/pB8BM7pdSp6B6Ih7QZ4DrQ3PmJK.jpg",
    "production_companies": [
        {
            "id": 711,
            "logo_path": "/tEiIH5QesdheJmDAqQwvtN60727.pn

In [2]:
url = f"https://api.themoviedb.org/3/discover/movie"
params = {
    "api_key": api_key,
    "language": "en-US",
    "primary_release_date.gte": "2000-01-01",
    "primary_release_date.lte": "2024-12-31",
    "page": 1
}

response = requests.get(url, params=params)
data = response.json()

print(f"Total movies matching query: {data['total_results']}")
print(f"Total pages: {data['total_pages']}")


Total movies matching query: 578444
Total pages: 28923


In [3]:
#  Storage for results
movie_data = []

# Loop through TMDB's /discover/movie endpoint
for page in range(1, 400):  # Start with 2 pages for quick testing; increase to 201 later
    discover_url = f"{base_url}/discover/movie"
    params = {
        "api_key": api_key,
        "language": "en-US",
        "sort_by": "popularity.desc",
        "include_adult": False,
        "include_video": False,
        "page": page,
        "primary_release_date.gte": "2000-01-01",
        "primary_release_date.lte": "2024-12-31"
    }

    response = requests.get(discover_url, params=params)
    if response.status_code != 200:
        print(f"Failed on page {page} — Status: {response.status_code}")
        print(response.json())  # Show error
        break

    movies = response.json().get("results", [])

    for movie in movies:
        movie_id = movie["id"]

        # Fetch movie details
        movie_details = requests.get(f"{base_url}/movie/{movie_id}", params={"api_key": api_key}).json()
        credits = requests.get(f"{base_url}/movie/{movie_id}/credits", params={"api_key": api_key}).json()

        # Get director and top actor
        director = next((c["name"] for c in credits.get("crew", []) if c["job"] == "Director"), None)
        cast = [c["name"] for c in credits.get("cast", [])][:1]

        # Collect relevant fields
        movie_data.append({
            "title": movie_details.get("title"),
            "release_date": movie_details.get("release_date"),
            "budget": movie_details.get("budget"),
            "revenue": movie_details.get("revenue"),
            "runtime": movie_details.get("runtime"),
            "genres": [g["name"] for g in movie_details.get("genres", [])],
            "popularity": movie_details.get("popularity"),
            "vote_average": movie_details.get("vote_average"),
            "vote_count": movie_details.get("vote_count"),
            "original_language": movie_details.get("original_language"),
            "production_companies": [pc["name"] for pc in movie_details.get("production_companies", [])],
            "belongs_to_collection": movie_details.get("belongs_to_collection") is not None,
            "director": director,
            "lead_actor": cast[0] if cast else None
        })

    print(f"Page {page} complete — movies collected: {len(movie_data)}")
    sleep(0.3)

# Convert to DataFrame
df_movies = pd.DataFrame(movie_data)

# Print the DataFrame
df_movies # Preview first few rows


Page 1 complete — movies collected: 20
Page 2 complete — movies collected: 40
Page 3 complete — movies collected: 60
Page 4 complete — movies collected: 80
Page 5 complete — movies collected: 100
Page 6 complete — movies collected: 120
Page 7 complete — movies collected: 140
Page 8 complete — movies collected: 160
Page 9 complete — movies collected: 180
Page 10 complete — movies collected: 200
Page 11 complete — movies collected: 220
Page 12 complete — movies collected: 240
Page 13 complete — movies collected: 260
Page 14 complete — movies collected: 280
Page 15 complete — movies collected: 300
Page 16 complete — movies collected: 320
Page 17 complete — movies collected: 340
Page 18 complete — movies collected: 360
Page 19 complete — movies collected: 380
Page 20 complete — movies collected: 400
Page 21 complete — movies collected: 420
Page 22 complete — movies collected: 440
Page 23 complete — movies collected: 460
Page 24 complete — movies collected: 480
Page 25 complete — movies col

Unnamed: 0,title,release_date,budget,revenue,runtime,genres,popularity,vote_average,vote_count,original_language,production_companies,belongs_to_collection,director,lead_actor
0,Lilo & Stitch,2002-06-21,80000000,273144151,85,"[Animation, Family, Comedy]",171.5098,7.500,6540,en,"[Walt Disney Pictures, Walt Disney Feature Ani...",True,Dean DeBlois,Daveigh Chase
1,The Great Escape,2023-05-26,200000,0,90,"[Action, Thriller]",142.7939,5.700,3,pt,[],False,,
2,Brave Citizen,2023-10-25,0,2116112,112,"[Action, Drama, Comedy]",128.8376,6.800,43,ko,"[Studio N, Oscar 10 Studio, Vol Media]",False,Park Jin-pyo,Shin Hye-sun
3,Moana 2,2024-11-21,150000000,1059544057,100,"[Animation, Adventure, Family, Comedy]",114.6447,7.081,2411,en,"[Walt Disney Pictures, Walt Disney Animation S...",True,David G. Derrick Jr.,Auliʻi Cravalho
4,Bambi: A Life in the Woods,2024-10-16,0,1419154,77,"[Adventure, Family, Documentary]",113.2598,5.900,23,fr,"[MC4, Gébéka Films, Kinology]",False,Michel Fessler,Mylène Farmer
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7975,Chasing Liberty,2004-01-09,23000000,12195626,111,"[Comedy, Romance]",1.8902,6.243,622,en,"[Micro Fusion 2003-2, Alcon Entertainment, Tra...",False,Andy Cadiff,Mandy Moore
7976,Escape from Germany,2024-04-11,1000000,2601719,97,"[History, Drama, War]",1.8899,5.250,14,en,[],False,T.C. Christensen,Paul Wuthrich
7977,Devil's Due,2014-01-08,7000000,36433975,89,[Horror],1.8899,4.625,534,en,"[TSG Entertainment, Davis Entertainment, 20th ...",False,Tyler Gillett,Zach Gilford
7978,Girls State,2024-01-18,0,0,95,[Documentary],1.8897,7.143,14,en,"[Concordia Studio, Mile End Films]",True,Amanda McBaine,Faith Glasgow


In [4]:
df_movies = df_movies[(df_movies['budget'] != 0) & (df_movies['revenue'] != 0)]

df_movies['profit'] = df_movies['revenue'] - df_movies['budget']
df_movies['profit_margin'] = df_movies['profit'] / df_movies['revenue']
print("\nFinal DataFrame with Profit and Profit Margin:")
print(df_movies[['title', 'release_date', 'budget', 'revenue', 'profit', 'profit_margin']].head())

df_movies


Final DataFrame with Profit and Profit Margin:
                                           title release_date     budget  \
0                                  Lilo & Stitch   2002-06-21   80000000   
3                                        Moana 2   2024-11-21  150000000   
5                           Panda Bear in Africa   2024-03-15   10000000   
7                          Mufasa: The Lion King   2024-12-18  200000000   
8  Mission: Impossible - Dead Reckoning Part One   2023-07-08  291000000   

      revenue     profit  profit_margin  
0   273144151  193144151       0.707114  
3  1059544057  909544057       0.858430  
5     5514627   -4485373      -0.813359  
7   721046090  521046090       0.722625  
8   571125435  280125435       0.490480  


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_movies['profit'] = df_movies['revenue'] - df_movies['budget']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_movies['profit_margin'] = df_movies['profit'] / df_movies['revenue']


Unnamed: 0,title,release_date,budget,revenue,runtime,genres,popularity,vote_average,vote_count,original_language,production_companies,belongs_to_collection,director,lead_actor,profit,profit_margin
0,Lilo & Stitch,2002-06-21,80000000,273144151,85,"[Animation, Family, Comedy]",171.5098,7.500,6540,en,"[Walt Disney Pictures, Walt Disney Feature Ani...",True,Dean DeBlois,Daveigh Chase,193144151,0.707114
3,Moana 2,2024-11-21,150000000,1059544057,100,"[Animation, Adventure, Family, Comedy]",114.6447,7.081,2411,en,"[Walt Disney Pictures, Walt Disney Animation S...",True,David G. Derrick Jr.,Auliʻi Cravalho,909544057,0.858430
5,Panda Bear in Africa,2024-03-15,10000000,5514627,94,"[Animation, Adventure, Comedy, Family]",111.1393,6.300,29,nl,"[Cool Beans, A. Film Production, Comet Films, ...",False,Karsten Kiilerich,Yootha Wong-Loi-Sing,-4485373,-0.813359
7,Mufasa: The Lion King,2024-12-18,200000000,721046090,118,"[Adventure, Family, Animation]",82.3976,7.400,2156,en,[Walt Disney Pictures],True,Barry Jenkins,Aaron Pierre,521046090,0.722625
8,Mission: Impossible - Dead Reckoning Part One,2023-07-08,291000000,571125435,164,"[Action, Adventure, Thriller]",83.0206,7.500,4294,en,"[Paramount Pictures, Skydance Media, TC Produc...",True,Christopher McQuarrie,Tom Cruise,280125435,0.490480
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7964,The Great Beauty,2013-05-21,9200000,24930592,142,[Drama],1.8916,7.509,3154,it,"[France 2 Cinéma, Indigo Film, Babe Films, Pathé]",False,Paolo Sorrentino,Toni Servillo,15730592,0.630975
7968,Fred Claus,2007-11-09,100000000,97800000,115,"[Action, Comedy, Drama]",1.8910,5.835,868,en,"[Jessie Nelson Productions, Silver Pictures, D...",False,David Dobkin,Vince Vaughn,-2200000,-0.022495
7975,Chasing Liberty,2004-01-09,23000000,12195626,111,"[Comedy, Romance]",1.8902,6.243,622,en,"[Micro Fusion 2003-2, Alcon Entertainment, Tra...",False,Andy Cadiff,Mandy Moore,-10804374,-0.885922
7976,Escape from Germany,2024-04-11,1000000,2601719,97,"[History, Drama, War]",1.8899,5.250,14,en,[],False,T.C. Christensen,Paul Wuthrich,1601719,0.615639


In [5]:
df_movies["release__year"] = pd.to_datetime(df_movies["release_date"], errors='coerce').dt.year

df_movies

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_movies["release__year"] = pd.to_datetime(df_movies["release_date"], errors='coerce').dt.year


Unnamed: 0,title,release_date,budget,revenue,runtime,genres,popularity,vote_average,vote_count,original_language,production_companies,belongs_to_collection,director,lead_actor,profit,profit_margin,release__year
0,Lilo & Stitch,2002-06-21,80000000,273144151,85,"[Animation, Family, Comedy]",171.5098,7.500,6540,en,"[Walt Disney Pictures, Walt Disney Feature Ani...",True,Dean DeBlois,Daveigh Chase,193144151,0.707114,2002
3,Moana 2,2024-11-21,150000000,1059544057,100,"[Animation, Adventure, Family, Comedy]",114.6447,7.081,2411,en,"[Walt Disney Pictures, Walt Disney Animation S...",True,David G. Derrick Jr.,Auliʻi Cravalho,909544057,0.858430,2024
5,Panda Bear in Africa,2024-03-15,10000000,5514627,94,"[Animation, Adventure, Comedy, Family]",111.1393,6.300,29,nl,"[Cool Beans, A. Film Production, Comet Films, ...",False,Karsten Kiilerich,Yootha Wong-Loi-Sing,-4485373,-0.813359,2024
7,Mufasa: The Lion King,2024-12-18,200000000,721046090,118,"[Adventure, Family, Animation]",82.3976,7.400,2156,en,[Walt Disney Pictures],True,Barry Jenkins,Aaron Pierre,521046090,0.722625,2024
8,Mission: Impossible - Dead Reckoning Part One,2023-07-08,291000000,571125435,164,"[Action, Adventure, Thriller]",83.0206,7.500,4294,en,"[Paramount Pictures, Skydance Media, TC Produc...",True,Christopher McQuarrie,Tom Cruise,280125435,0.490480,2023
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7964,The Great Beauty,2013-05-21,9200000,24930592,142,[Drama],1.8916,7.509,3154,it,"[France 2 Cinéma, Indigo Film, Babe Films, Pathé]",False,Paolo Sorrentino,Toni Servillo,15730592,0.630975,2013
7968,Fred Claus,2007-11-09,100000000,97800000,115,"[Action, Comedy, Drama]",1.8910,5.835,868,en,"[Jessie Nelson Productions, Silver Pictures, D...",False,David Dobkin,Vince Vaughn,-2200000,-0.022495,2007
7975,Chasing Liberty,2004-01-09,23000000,12195626,111,"[Comedy, Romance]",1.8902,6.243,622,en,"[Micro Fusion 2003-2, Alcon Entertainment, Tra...",False,Andy Cadiff,Mandy Moore,-10804374,-0.885922,2004
7976,Escape from Germany,2024-04-11,1000000,2601719,97,"[History, Drama, War]",1.8899,5.250,14,en,[],False,T.C. Christensen,Paul Wuthrich,1601719,0.615639,2024


In [6]:
df_movies = df_movies[df_movies["original_language"] == "en"]

df_movies

Unnamed: 0,title,release_date,budget,revenue,runtime,genres,popularity,vote_average,vote_count,original_language,production_companies,belongs_to_collection,director,lead_actor,profit,profit_margin,release__year
0,Lilo & Stitch,2002-06-21,80000000,273144151,85,"[Animation, Family, Comedy]",171.5098,7.500,6540,en,"[Walt Disney Pictures, Walt Disney Feature Ani...",True,Dean DeBlois,Daveigh Chase,193144151,0.707114,2002
3,Moana 2,2024-11-21,150000000,1059544057,100,"[Animation, Adventure, Family, Comedy]",114.6447,7.081,2411,en,"[Walt Disney Pictures, Walt Disney Animation S...",True,David G. Derrick Jr.,Auliʻi Cravalho,909544057,0.858430,2024
7,Mufasa: The Lion King,2024-12-18,200000000,721046090,118,"[Adventure, Family, Animation]",82.3976,7.400,2156,en,[Walt Disney Pictures],True,Barry Jenkins,Aaron Pierre,521046090,0.722625,2024
8,Mission: Impossible - Dead Reckoning Part One,2023-07-08,291000000,571125435,164,"[Action, Adventure, Thriller]",83.0206,7.500,4294,en,"[Paramount Pictures, Skydance Media, TC Produc...",True,Christopher McQuarrie,Tom Cruise,280125435,0.490480,2023
11,Sonic the Hedgehog 3,2024-12-19,122000000,486018457,110,"[Action, Science Fiction, Comedy, Family]",77.7636,7.700,2629,en,"[Paramount Pictures, Original Film, Marza Anim...",True,Jeff Fowler,Jim Carrey,364018457,0.748981,2024
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7952,Baby Mama,2008-04-25,30000000,64400000,99,"[Comedy, Romance]",1.8941,5.956,661,en,"[Relativity Media, Michaels-Goldwyn, Universal...",False,Michael McCullers,Amy Poehler,34400000,0.534161,2008
7968,Fred Claus,2007-11-09,100000000,97800000,115,"[Action, Comedy, Drama]",1.8910,5.835,868,en,"[Jessie Nelson Productions, Silver Pictures, D...",False,David Dobkin,Vince Vaughn,-2200000,-0.022495,2007
7975,Chasing Liberty,2004-01-09,23000000,12195626,111,"[Comedy, Romance]",1.8902,6.243,622,en,"[Micro Fusion 2003-2, Alcon Entertainment, Tra...",False,Andy Cadiff,Mandy Moore,-10804374,-0.885922,2004
7976,Escape from Germany,2024-04-11,1000000,2601719,97,"[History, Drama, War]",1.8899,5.250,14,en,[],False,T.C. Christensen,Paul Wuthrich,1601719,0.615639,2024


In [7]:
df_movies['production_company'] = df_movies['production_companies'].apply(
    lambda x: x[0] if isinstance(x, list) and len(x) > 0 else None
)

df_movies

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_movies['production_company'] = df_movies['production_companies'].apply(


Unnamed: 0,title,release_date,budget,revenue,runtime,genres,popularity,vote_average,vote_count,original_language,production_companies,belongs_to_collection,director,lead_actor,profit,profit_margin,release__year,production_company
0,Lilo & Stitch,2002-06-21,80000000,273144151,85,"[Animation, Family, Comedy]",171.5098,7.500,6540,en,"[Walt Disney Pictures, Walt Disney Feature Ani...",True,Dean DeBlois,Daveigh Chase,193144151,0.707114,2002,Walt Disney Pictures
3,Moana 2,2024-11-21,150000000,1059544057,100,"[Animation, Adventure, Family, Comedy]",114.6447,7.081,2411,en,"[Walt Disney Pictures, Walt Disney Animation S...",True,David G. Derrick Jr.,Auliʻi Cravalho,909544057,0.858430,2024,Walt Disney Pictures
7,Mufasa: The Lion King,2024-12-18,200000000,721046090,118,"[Adventure, Family, Animation]",82.3976,7.400,2156,en,[Walt Disney Pictures],True,Barry Jenkins,Aaron Pierre,521046090,0.722625,2024,Walt Disney Pictures
8,Mission: Impossible - Dead Reckoning Part One,2023-07-08,291000000,571125435,164,"[Action, Adventure, Thriller]",83.0206,7.500,4294,en,"[Paramount Pictures, Skydance Media, TC Produc...",True,Christopher McQuarrie,Tom Cruise,280125435,0.490480,2023,Paramount Pictures
11,Sonic the Hedgehog 3,2024-12-19,122000000,486018457,110,"[Action, Science Fiction, Comedy, Family]",77.7636,7.700,2629,en,"[Paramount Pictures, Original Film, Marza Anim...",True,Jeff Fowler,Jim Carrey,364018457,0.748981,2024,Paramount Pictures
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7952,Baby Mama,2008-04-25,30000000,64400000,99,"[Comedy, Romance]",1.8941,5.956,661,en,"[Relativity Media, Michaels-Goldwyn, Universal...",False,Michael McCullers,Amy Poehler,34400000,0.534161,2008,Relativity Media
7968,Fred Claus,2007-11-09,100000000,97800000,115,"[Action, Comedy, Drama]",1.8910,5.835,868,en,"[Jessie Nelson Productions, Silver Pictures, D...",False,David Dobkin,Vince Vaughn,-2200000,-0.022495,2007,Jessie Nelson Productions
7975,Chasing Liberty,2004-01-09,23000000,12195626,111,"[Comedy, Romance]",1.8902,6.243,622,en,"[Micro Fusion 2003-2, Alcon Entertainment, Tra...",False,Andy Cadiff,Mandy Moore,-10804374,-0.885922,2004,Micro Fusion 2003-2
7976,Escape from Germany,2024-04-11,1000000,2601719,97,"[History, Drama, War]",1.8899,5.250,14,en,[],False,T.C. Christensen,Paul Wuthrich,1601719,0.615639,2024,


In [8]:
df_movies.drop(columns=['release_date', 'original_language', 'production_companies'], inplace=True)

df_movies

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_movies.drop(columns=['release_date', 'original_language', 'production_companies'], inplace=True)


Unnamed: 0,title,budget,revenue,runtime,genres,popularity,vote_average,vote_count,belongs_to_collection,director,lead_actor,profit,profit_margin,release__year,production_company
0,Lilo & Stitch,80000000,273144151,85,"[Animation, Family, Comedy]",171.5098,7.500,6540,True,Dean DeBlois,Daveigh Chase,193144151,0.707114,2002,Walt Disney Pictures
3,Moana 2,150000000,1059544057,100,"[Animation, Adventure, Family, Comedy]",114.6447,7.081,2411,True,David G. Derrick Jr.,Auliʻi Cravalho,909544057,0.858430,2024,Walt Disney Pictures
7,Mufasa: The Lion King,200000000,721046090,118,"[Adventure, Family, Animation]",82.3976,7.400,2156,True,Barry Jenkins,Aaron Pierre,521046090,0.722625,2024,Walt Disney Pictures
8,Mission: Impossible - Dead Reckoning Part One,291000000,571125435,164,"[Action, Adventure, Thriller]",83.0206,7.500,4294,True,Christopher McQuarrie,Tom Cruise,280125435,0.490480,2023,Paramount Pictures
11,Sonic the Hedgehog 3,122000000,486018457,110,"[Action, Science Fiction, Comedy, Family]",77.7636,7.700,2629,True,Jeff Fowler,Jim Carrey,364018457,0.748981,2024,Paramount Pictures
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7952,Baby Mama,30000000,64400000,99,"[Comedy, Romance]",1.8941,5.956,661,False,Michael McCullers,Amy Poehler,34400000,0.534161,2008,Relativity Media
7968,Fred Claus,100000000,97800000,115,"[Action, Comedy, Drama]",1.8910,5.835,868,False,David Dobkin,Vince Vaughn,-2200000,-0.022495,2007,Jessie Nelson Productions
7975,Chasing Liberty,23000000,12195626,111,"[Comedy, Romance]",1.8902,6.243,622,False,Andy Cadiff,Mandy Moore,-10804374,-0.885922,2004,Micro Fusion 2003-2
7976,Escape from Germany,1000000,2601719,97,"[History, Drama, War]",1.8899,5.250,14,False,T.C. Christensen,Paul Wuthrich,1601719,0.615639,2024,


In [9]:
df_movies.reset_index(drop=True, inplace=True)

df_movies

Unnamed: 0,title,budget,revenue,runtime,genres,popularity,vote_average,vote_count,belongs_to_collection,director,lead_actor,profit,profit_margin,release__year,production_company
0,Lilo & Stitch,80000000,273144151,85,"[Animation, Family, Comedy]",171.5098,7.500,6540,True,Dean DeBlois,Daveigh Chase,193144151,0.707114,2002,Walt Disney Pictures
1,Moana 2,150000000,1059544057,100,"[Animation, Adventure, Family, Comedy]",114.6447,7.081,2411,True,David G. Derrick Jr.,Auliʻi Cravalho,909544057,0.858430,2024,Walt Disney Pictures
2,Mufasa: The Lion King,200000000,721046090,118,"[Adventure, Family, Animation]",82.3976,7.400,2156,True,Barry Jenkins,Aaron Pierre,521046090,0.722625,2024,Walt Disney Pictures
3,Mission: Impossible - Dead Reckoning Part One,291000000,571125435,164,"[Action, Adventure, Thriller]",83.0206,7.500,4294,True,Christopher McQuarrie,Tom Cruise,280125435,0.490480,2023,Paramount Pictures
4,Sonic the Hedgehog 3,122000000,486018457,110,"[Action, Science Fiction, Comedy, Family]",77.7636,7.700,2629,True,Jeff Fowler,Jim Carrey,364018457,0.748981,2024,Paramount Pictures
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3214,Baby Mama,30000000,64400000,99,"[Comedy, Romance]",1.8941,5.956,661,False,Michael McCullers,Amy Poehler,34400000,0.534161,2008,Relativity Media
3215,Fred Claus,100000000,97800000,115,"[Action, Comedy, Drama]",1.8910,5.835,868,False,David Dobkin,Vince Vaughn,-2200000,-0.022495,2007,Jessie Nelson Productions
3216,Chasing Liberty,23000000,12195626,111,"[Comedy, Romance]",1.8902,6.243,622,False,Andy Cadiff,Mandy Moore,-10804374,-0.885922,2004,Micro Fusion 2003-2
3217,Escape from Germany,1000000,2601719,97,"[History, Drama, War]",1.8899,5.250,14,False,T.C. Christensen,Paul Wuthrich,1601719,0.615639,2024,


In [10]:
# Function to extract up to 3 genres
def extract_genres(genre_list):
    # Pad with None if fewer than 3 genres
    genres = genre_list[:3] + [None] * (3 - len(genre_list))
    return pd.Series(genres, index=["genre1", "genre2", "genre3"])
# Apply the function
df_movies[["genre1", "genre2", "genre3"]] = df_movies["genres"].apply(extract_genres)
# Optional: drop the original genres column if no longer needed
# df_movies.drop(columns=["genres"], inplace=True)
# Preview the result
df_movies.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_movies[["genre1", "genre2", "genre3"]] = df_movies["genres"].apply(extract_genres)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_movies[["genre1", "genre2", "genre3"]] = df_movies["genres"].apply(extract_genres)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_movies[["genre1", "genre2", "g

Unnamed: 0,title,budget,revenue,runtime,genres,popularity,vote_average,vote_count,belongs_to_collection,director,lead_actor,profit,profit_margin,release__year,production_company,genre1,genre2,genre3
0,Lilo & Stitch,80000000,273144151,85,"[Animation, Family, Comedy]",171.5098,7.5,6540,True,Dean DeBlois,Daveigh Chase,193144151,0.707114,2002,Walt Disney Pictures,Animation,Family,Comedy
1,Moana 2,150000000,1059544057,100,"[Animation, Adventure, Family, Comedy]",114.6447,7.081,2411,True,David G. Derrick Jr.,Auliʻi Cravalho,909544057,0.85843,2024,Walt Disney Pictures,Animation,Adventure,Family
2,Mufasa: The Lion King,200000000,721046090,118,"[Adventure, Family, Animation]",82.3976,7.4,2156,True,Barry Jenkins,Aaron Pierre,521046090,0.722625,2024,Walt Disney Pictures,Adventure,Family,Animation
3,Mission: Impossible - Dead Reckoning Part One,291000000,571125435,164,"[Action, Adventure, Thriller]",83.0206,7.5,4294,True,Christopher McQuarrie,Tom Cruise,280125435,0.49048,2023,Paramount Pictures,Action,Adventure,Thriller
4,Sonic the Hedgehog 3,122000000,486018457,110,"[Action, Science Fiction, Comedy, Family]",77.7636,7.7,2629,True,Jeff Fowler,Jim Carrey,364018457,0.748981,2024,Paramount Pictures,Action,Science Fiction,Comedy


In [11]:
# Export the final DataFrame to a CSV file
output_file = "tmdb_movies_final.csv"
df_movies.to_csv(output_file, index=False)
print(f"\nData exported to {output_file}")


Data exported to tmdb_movies_final.csv
