In [17]:
import requests
import pandas as pd
from time import sleep
import os
import json

# Import API key from config file
from config import TMDB_API_KEY

# Get API key: from environment variable or config file
api_key = os.getenv("TMDB_API_KEY") or TMDB_API_KEY

# TMDB base URL
base_url = "https://api.themoviedb.org/3"

def fetch_movie_data(movie_id):
    """Fetch movie details from TMDB API."""
    url = f"{base_url}/movie/{movie_id}"
    params = {
        "api_key": api_key,
        "language": "en-US"
    }
    response = requests.get(url, params=params)
    
    if response.status_code == 200:
        return response.json()
    else:
        print(f"Error fetching data for movie ID {movie_id}: {response.status_code}")
        try:
            print(response.json())  # Print error details from API
        except:
            pass
        return None

def main():
    """Main pipeline to fetch movies and build a DataFrame."""
    # Example movie IDs (test small batch first)
    movie_ids = [550, 551, 552, 553, 554]
    
    movie_data_list = []

    for movie_id in movie_ids:
        movie_data = fetch_movie_data(movie_id)
        if movie_data:
            print("\n🔍 API Response for Movie ID:", movie_id)
            print(json.dumps(movie_data, indent=4))  # Pretty print JSON for clarity
            
            movie_data_list.append(movie_data)
        sleep(1)  # Be respectful of rate limits

    # Convert to DataFrame
    if movie_data_list:
        df = pd.DataFrame(movie_data_list)

        print("\n Fetched Movie DataFrame:")
        print(df.head())

        print("\n DataFrame Columns:")
        print(df.columns)

        # Save DataFrame (optional)
        # df.to_csv("tmdb_movies_raw.csv", index=False)

    else:
        print("No movie data fetched.")

if __name__ == "__main__":
    main()




🔍 API Response for Movie ID: 550
{
    "adult": false,
    "backdrop_path": "/xRyINp9KfMLVjRiO5nCsoRDdvvF.jpg",
    "belongs_to_collection": null,
    "budget": 63000000,
    "genres": [
        {
            "id": 18,
            "name": "Drama"
        }
    ],
    "homepage": "http://www.foxmovies.com/movies/fight-club",
    "id": 550,
    "imdb_id": "tt0137523",
    "origin_country": [
        "US"
    ],
    "original_language": "en",
    "original_title": "Fight Club",
    "overview": "A ticking-time-bomb insomniac and a slippery soap salesman channel primal male aggression into a shocking new form of therapy. Their concept catches on, with underground \"fight clubs\" forming in every town, until an eccentric gets in the way and ignites an out-of-control spiral toward oblivion.",
    "popularity": 28.1895,
    "poster_path": "/pB8BM7pdSp6B6Ih7QZ4DrQ3PmJK.jpg",
    "production_companies": [
        {
            "id": 711,
            "logo_path": "/tEiIH5QesdheJmDAqQwvtN60727.pn

In [18]:
url = f"https://api.themoviedb.org/3/discover/movie"
params = {
    "api_key": api_key,
    "language": "en-US",
    "primary_release_date.gte": "2000-01-01",
    "primary_release_date.lte": "2024-12-31",
    "page": 1
}

response = requests.get(url, params=params)
data = response.json()

print(f"Total movies matching query: {data['total_results']}")
print(f"Total pages: {data['total_pages']}")


Total movies matching query: 578169
Total pages: 28909


In [19]:
#  Storage for results
movie_data = []

# Loop through TMDB's /discover/movie endpoint
for page in range(1, 3):  # Start with 2 pages for quick testing; increase to 201 later
    discover_url = f"{base_url}/discover/movie"
    params = {
        "api_key": api_key,
        "language": "en-US",
        "sort_by": "popularity.desc",
        "include_adult": False,
        "include_video": False,
        "page": page,
        "primary_release_date.gte": "2000-01-01",
        "primary_release_date.lte": "2024-12-31"
    }

    response = requests.get(discover_url, params=params)
    if response.status_code != 200:
        print(f"Failed on page {page} — Status: {response.status_code}")
        print(response.json())  # Show error
        break

    movies = response.json().get("results", [])

    for movie in movies:
        movie_id = movie["id"]

        # Fetch movie details
        movie_details = requests.get(f"{base_url}/movie/{movie_id}", params={"api_key": api_key}).json()
        credits = requests.get(f"{base_url}/movie/{movie_id}/credits", params={"api_key": api_key}).json()

        # Get director and top actor
        director = next((c["name"] for c in credits.get("crew", []) if c["job"] == "Director"), None)
        cast = [c["name"] for c in credits.get("cast", [])][:1]

        # Collect relevant fields
        movie_data.append({
            "title": movie_details.get("title"),
            "release_date": movie_details.get("release_date"),
            "budget": movie_details.get("budget"),
            "revenue": movie_details.get("revenue"),
            "runtime": movie_details.get("runtime"),
            "genres": [g["name"] for g in movie_details.get("genres", [])],
            "popularity": movie_details.get("popularity"),
            "vote_average": movie_details.get("vote_average"),
            "vote_count": movie_details.get("vote_count"),
            "original_language": movie_details.get("original_language"),
            "production_companies": [pc["name"] for pc in movie_details.get("production_companies", [])],
            "belongs_to_collection": movie_details.get("belongs_to_collection") is not None,
            "director": director,
            "lead_actor": cast[0] if cast else None
        })

    print(f"Page {page} complete — movies collected: {len(movie_data)}")
    sleep(0.3)

# Convert to DataFrame
df_movies = pd.DataFrame(movie_data)

# Print the DataFrame
df_movies # Preview first few rows


Page 1 complete — movies collected: 20
Page 2 complete — movies collected: 40


Unnamed: 0,title,release_date,budget,revenue,runtime,genres,popularity,vote_average,vote_count,original_language,production_companies,belongs_to_collection,director,lead_actor
0,The Great Escape,2023-05-26,200000,0,90,"[Action, Thriller]",181.5985,6.2,2,pt,[],False,,
1,Lilo & Stitch,2002-06-21,80000000,273144151,85,"[Animation, Family, Comedy]",152.7018,7.5,6518,en,"[Walt Disney Pictures, Walt Disney Feature Ani...",True,Chris Sanders,Daveigh Chase
2,Brave Citizen,2023-10-25,0,2116112,112,"[Action, Drama, Comedy]",149.1418,7.049,41,ko,"[Studio N, Oscar 10 Studio, Vol Media]",False,Park Jin-pyo,Shin Hye-sun
3,Bambi: A Life in the Woods,2024-10-16,0,1419154,77,"[Adventure, Family, Documentary]",146.2852,5.87,23,fr,"[MC4, Gébéka Films, Kinology]",False,Michel Fessler,Mylène Farmer
4,Moana 2,2024-11-21,150000000,1059544057,100,"[Animation, Adventure, Family, Comedy]",118.3758,7.08,2402,en,"[Walt Disney Pictures, Walt Disney Animation S...",True,David G. Derrick Jr.,Auliʻi Cravalho
5,The Haunting at Saint Joseph's,2023-02-26,0,0,100,"[Horror, Thriller]",117.7351,4.6,11,en,[],False,Jon Cohen,Tim Spriggs
6,Conjuring the Cult,2024-10-01,0,0,93,"[Horror, Drama]",102.2868,5.423,26,en,[7th Street Productions],False,Calvin Morie McCarthy,Neil Green
7,Detective Chirp & the Golden Beehive,2022-11-17,0,0,85,"[Animation, Comedy, Mystery, Adventure, Family]",96.6719,0.0,0,ru,"[Central Partnership, Okko Studios, Cinema Fou...",False,Григорий Вожакин,Ivan Chaban
8,Mufasa: The Lion King,2024-12-18,200000000,721046090,118,"[Adventure, Family, Animation]",84.8384,7.392,2144,en,[Walt Disney Pictures],True,Barry Jenkins,Aaron Pierre
9,Mission: Impossible - Dead Reckoning Part One,2023-07-08,291000000,571125435,164,"[Action, Adventure, Thriller]",85.6323,7.5,4279,en,"[Paramount Pictures, Skydance Media, TC Produc...",True,Christopher McQuarrie,Tom Cruise


In [20]:
df_movies = df_movies[(df_movies['budget'] != 0) & (df_movies['revenue'] != 0)]

df_movies['profit'] = df_movies['revenue'] - df_movies['budget']
df_movies['profit_margin'] = df_movies['profit'] / df_movies['revenue']
print("\nFinal DataFrame with Profit and Profit Margin:")
print(df_movies[['title', 'release_date', 'budget', 'revenue', 'profit', 'profit_margin']].head())

df_movies


Final DataFrame with Profit and Profit Margin:
                                            title release_date     budget  \
1                                   Lilo & Stitch   2002-06-21   80000000   
4                                         Moana 2   2024-11-21  150000000   
8                           Mufasa: The Lion King   2024-12-18  200000000   
9   Mission: Impossible - Dead Reckoning Part One   2023-07-08  291000000   
10                              Final Destination   2000-03-17   23000000   

       revenue     profit  profit_margin  
1    273144151  193144151       0.707114  
4   1059544057  909544057       0.858430  
8    721046090  521046090       0.722625  
9    571125435  280125435       0.490480  
10   112880294   89880294       0.796244  


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_movies['profit'] = df_movies['revenue'] - df_movies['budget']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_movies['profit_margin'] = df_movies['profit'] / df_movies['revenue']


Unnamed: 0,title,release_date,budget,revenue,runtime,genres,popularity,vote_average,vote_count,original_language,production_companies,belongs_to_collection,director,lead_actor,profit,profit_margin
1,Lilo & Stitch,2002-06-21,80000000,273144151,85,"[Animation, Family, Comedy]",152.7018,7.5,6518,en,"[Walt Disney Pictures, Walt Disney Feature Ani...",True,Chris Sanders,Daveigh Chase,193144151,0.707114
4,Moana 2,2024-11-21,150000000,1059544057,100,"[Animation, Adventure, Family, Comedy]",118.3758,7.08,2402,en,"[Walt Disney Pictures, Walt Disney Animation S...",True,David G. Derrick Jr.,Auliʻi Cravalho,909544057,0.85843
8,Mufasa: The Lion King,2024-12-18,200000000,721046090,118,"[Adventure, Family, Animation]",84.8384,7.392,2144,en,[Walt Disney Pictures],True,Barry Jenkins,Aaron Pierre,521046090,0.722625
9,Mission: Impossible - Dead Reckoning Part One,2023-07-08,291000000,571125435,164,"[Action, Adventure, Thriller]",85.6323,7.5,4279,en,"[Paramount Pictures, Skydance Media, TC Produc...",True,Christopher McQuarrie,Tom Cruise,280125435,0.49048
10,Final Destination,2000-03-17,23000000,112880294,98,[Horror],82.8014,6.618,6001,en,"[Hard Eight Pictures, New Line Cinema, Zide-Pe...",True,James Wong,Devon Sawa,89880294,0.796244
11,Sonic the Hedgehog 3,2024-12-19,122000000,486018457,110,"[Action, Science Fiction, Comedy, Family]",80.9869,7.719,2613,en,"[Paramount Pictures, Original Film, Marza Anim...",True,Jeff Fowler,Jim Carrey,364018457,0.748981
13,Final Destination 5,2011-08-12,40000000,157887643,91,"[Horror, Mystery]",75.7244,6.123,3634,en,"[Parallel Zide, New Line Cinema, Practical Pic...",True,Steven Quale,Nicholas D'Agosto,117887643,0.746655
16,xXx,2002-08-09,70000000,277448382,124,"[Action, Adventure, Thriller, Crime]",63.9125,5.942,4422,en,"[Columbia Pictures, Original Film, Revolution ...",True,Rob Cohen,Vin Diesel,207448382,0.747701
19,The Wild Robot,2024-09-12,78000000,331982078,102,"[Animation, Science Fiction, Family]",58.1097,8.3,4954,en,[DreamWorks Animation],True,Chris Sanders,Lupita Nyong'o,253982078,0.765048
22,Final Destination 2,2003-01-31,26000000,90941129,90,"[Horror, Mystery]",53.5862,6.276,4325,en,"[New Line Cinema, Zide-Perry Productions]",True,David R. Ellis,Ali Larter,64941129,0.714101


In [26]:
df_movies["release__year"] = pd.to_datetime(df_movies["release_date"], errors='coerce').dt.year

df_movies

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_movies["release__year"] = pd.to_datetime(df_movies["release_date"], errors='coerce').dt.year


Unnamed: 0,title,release_date,budget,revenue,runtime,genres,popularity,vote_average,vote_count,original_language,production_companies,belongs_to_collection,director,lead_actor,profit,profit_margin,release_year,release__year
1,Lilo & Stitch,2002-06-21,80000000,273144151,85,"[Animation, Family, Comedy]",152.7018,7.5,6518,en,"[Walt Disney Pictures, Walt Disney Feature Ani...",True,Chris Sanders,Daveigh Chase,193144151,0.707114,2002,2002
4,Moana 2,2024-11-21,150000000,1059544057,100,"[Animation, Adventure, Family, Comedy]",118.3758,7.08,2402,en,"[Walt Disney Pictures, Walt Disney Animation S...",True,David G. Derrick Jr.,Auliʻi Cravalho,909544057,0.85843,2024,2024
8,Mufasa: The Lion King,2024-12-18,200000000,721046090,118,"[Adventure, Family, Animation]",84.8384,7.392,2144,en,[Walt Disney Pictures],True,Barry Jenkins,Aaron Pierre,521046090,0.722625,2024,2024
9,Mission: Impossible - Dead Reckoning Part One,2023-07-08,291000000,571125435,164,"[Action, Adventure, Thriller]",85.6323,7.5,4279,en,"[Paramount Pictures, Skydance Media, TC Produc...",True,Christopher McQuarrie,Tom Cruise,280125435,0.49048,2023,2023
10,Final Destination,2000-03-17,23000000,112880294,98,[Horror],82.8014,6.618,6001,en,"[Hard Eight Pictures, New Line Cinema, Zide-Pe...",True,James Wong,Devon Sawa,89880294,0.796244,2000,2000
11,Sonic the Hedgehog 3,2024-12-19,122000000,486018457,110,"[Action, Science Fiction, Comedy, Family]",80.9869,7.719,2613,en,"[Paramount Pictures, Original Film, Marza Anim...",True,Jeff Fowler,Jim Carrey,364018457,0.748981,2024,2024
13,Final Destination 5,2011-08-12,40000000,157887643,91,"[Horror, Mystery]",75.7244,6.123,3634,en,"[Parallel Zide, New Line Cinema, Practical Pic...",True,Steven Quale,Nicholas D'Agosto,117887643,0.746655,2011,2011
16,xXx,2002-08-09,70000000,277448382,124,"[Action, Adventure, Thriller, Crime]",63.9125,5.942,4422,en,"[Columbia Pictures, Original Film, Revolution ...",True,Rob Cohen,Vin Diesel,207448382,0.747701,2002,2002
19,The Wild Robot,2024-09-12,78000000,331982078,102,"[Animation, Science Fiction, Family]",58.1097,8.3,4954,en,[DreamWorks Animation],True,Chris Sanders,Lupita Nyong'o,253982078,0.765048,2024,2024
22,Final Destination 2,2003-01-31,26000000,90941129,90,"[Horror, Mystery]",53.5862,6.276,4325,en,"[New Line Cinema, Zide-Perry Productions]",True,David R. Ellis,Ali Larter,64941129,0.714101,2003,2003


In [24]:
df_movies = df_movies[df_movies["original_language"] == "en"]

df_movies

Unnamed: 0,title,release_date,budget,revenue,runtime,genres,popularity,vote_average,vote_count,original_language,production_companies,belongs_to_collection,director,lead_actor,profit,profit_margin,release_year
1,Lilo & Stitch,2002-06-21,80000000,273144151,85,"[Animation, Family, Comedy]",152.7018,7.5,6518,en,"[Walt Disney Pictures, Walt Disney Feature Ani...",True,Chris Sanders,Daveigh Chase,193144151,0.707114,2002
4,Moana 2,2024-11-21,150000000,1059544057,100,"[Animation, Adventure, Family, Comedy]",118.3758,7.08,2402,en,"[Walt Disney Pictures, Walt Disney Animation S...",True,David G. Derrick Jr.,Auliʻi Cravalho,909544057,0.85843,2024
8,Mufasa: The Lion King,2024-12-18,200000000,721046090,118,"[Adventure, Family, Animation]",84.8384,7.392,2144,en,[Walt Disney Pictures],True,Barry Jenkins,Aaron Pierre,521046090,0.722625,2024
9,Mission: Impossible - Dead Reckoning Part One,2023-07-08,291000000,571125435,164,"[Action, Adventure, Thriller]",85.6323,7.5,4279,en,"[Paramount Pictures, Skydance Media, TC Produc...",True,Christopher McQuarrie,Tom Cruise,280125435,0.49048,2023
10,Final Destination,2000-03-17,23000000,112880294,98,[Horror],82.8014,6.618,6001,en,"[Hard Eight Pictures, New Line Cinema, Zide-Pe...",True,James Wong,Devon Sawa,89880294,0.796244,2000
11,Sonic the Hedgehog 3,2024-12-19,122000000,486018457,110,"[Action, Science Fiction, Comedy, Family]",80.9869,7.719,2613,en,"[Paramount Pictures, Original Film, Marza Anim...",True,Jeff Fowler,Jim Carrey,364018457,0.748981,2024
13,Final Destination 5,2011-08-12,40000000,157887643,91,"[Horror, Mystery]",75.7244,6.123,3634,en,"[Parallel Zide, New Line Cinema, Practical Pic...",True,Steven Quale,Nicholas D'Agosto,117887643,0.746655,2011
16,xXx,2002-08-09,70000000,277448382,124,"[Action, Adventure, Thriller, Crime]",63.9125,5.942,4422,en,"[Columbia Pictures, Original Film, Revolution ...",True,Rob Cohen,Vin Diesel,207448382,0.747701,2002
19,The Wild Robot,2024-09-12,78000000,331982078,102,"[Animation, Science Fiction, Family]",58.1097,8.3,4954,en,[DreamWorks Animation],True,Chris Sanders,Lupita Nyong'o,253982078,0.765048,2024
22,Final Destination 2,2003-01-31,26000000,90941129,90,"[Horror, Mystery]",53.5862,6.276,4325,en,"[New Line Cinema, Zide-Perry Productions]",True,David R. Ellis,Ali Larter,64941129,0.714101,2003


In [27]:
df_movies['production_company'] = df_movies['production_companies'].apply(
    lambda x: x[0] if isinstance(x, list) and len(x) > 0 else None
)

df_movies

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_movies['production_company'] = df_movies['production_companies'].apply(


Unnamed: 0,title,release_date,budget,revenue,runtime,genres,popularity,vote_average,vote_count,original_language,production_companies,belongs_to_collection,director,lead_actor,profit,profit_margin,release_year,release__year,production_company
1,Lilo & Stitch,2002-06-21,80000000,273144151,85,"[Animation, Family, Comedy]",152.7018,7.5,6518,en,"[Walt Disney Pictures, Walt Disney Feature Ani...",True,Chris Sanders,Daveigh Chase,193144151,0.707114,2002,2002,Walt Disney Pictures
4,Moana 2,2024-11-21,150000000,1059544057,100,"[Animation, Adventure, Family, Comedy]",118.3758,7.08,2402,en,"[Walt Disney Pictures, Walt Disney Animation S...",True,David G. Derrick Jr.,Auliʻi Cravalho,909544057,0.85843,2024,2024,Walt Disney Pictures
8,Mufasa: The Lion King,2024-12-18,200000000,721046090,118,"[Adventure, Family, Animation]",84.8384,7.392,2144,en,[Walt Disney Pictures],True,Barry Jenkins,Aaron Pierre,521046090,0.722625,2024,2024,Walt Disney Pictures
9,Mission: Impossible - Dead Reckoning Part One,2023-07-08,291000000,571125435,164,"[Action, Adventure, Thriller]",85.6323,7.5,4279,en,"[Paramount Pictures, Skydance Media, TC Produc...",True,Christopher McQuarrie,Tom Cruise,280125435,0.49048,2023,2023,Paramount Pictures
10,Final Destination,2000-03-17,23000000,112880294,98,[Horror],82.8014,6.618,6001,en,"[Hard Eight Pictures, New Line Cinema, Zide-Pe...",True,James Wong,Devon Sawa,89880294,0.796244,2000,2000,Hard Eight Pictures
11,Sonic the Hedgehog 3,2024-12-19,122000000,486018457,110,"[Action, Science Fiction, Comedy, Family]",80.9869,7.719,2613,en,"[Paramount Pictures, Original Film, Marza Anim...",True,Jeff Fowler,Jim Carrey,364018457,0.748981,2024,2024,Paramount Pictures
13,Final Destination 5,2011-08-12,40000000,157887643,91,"[Horror, Mystery]",75.7244,6.123,3634,en,"[Parallel Zide, New Line Cinema, Practical Pic...",True,Steven Quale,Nicholas D'Agosto,117887643,0.746655,2011,2011,Parallel Zide
16,xXx,2002-08-09,70000000,277448382,124,"[Action, Adventure, Thriller, Crime]",63.9125,5.942,4422,en,"[Columbia Pictures, Original Film, Revolution ...",True,Rob Cohen,Vin Diesel,207448382,0.747701,2002,2002,Columbia Pictures
19,The Wild Robot,2024-09-12,78000000,331982078,102,"[Animation, Science Fiction, Family]",58.1097,8.3,4954,en,[DreamWorks Animation],True,Chris Sanders,Lupita Nyong'o,253982078,0.765048,2024,2024,DreamWorks Animation
22,Final Destination 2,2003-01-31,26000000,90941129,90,"[Horror, Mystery]",53.5862,6.276,4325,en,"[New Line Cinema, Zide-Perry Productions]",True,David R. Ellis,Ali Larter,64941129,0.714101,2003,2003,New Line Cinema


In [28]:
df_movies.drop(columns=['release_date', 'original_language', 'production_companies'], inplace=True)

df_movies

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_movies.drop(columns=['release_date', 'original_language', 'production_companies'], inplace=True)


Unnamed: 0,title,budget,revenue,runtime,genres,popularity,vote_average,vote_count,belongs_to_collection,director,lead_actor,profit,profit_margin,release_year,release__year,production_company
1,Lilo & Stitch,80000000,273144151,85,"[Animation, Family, Comedy]",152.7018,7.5,6518,True,Chris Sanders,Daveigh Chase,193144151,0.707114,2002,2002,Walt Disney Pictures
4,Moana 2,150000000,1059544057,100,"[Animation, Adventure, Family, Comedy]",118.3758,7.08,2402,True,David G. Derrick Jr.,Auliʻi Cravalho,909544057,0.85843,2024,2024,Walt Disney Pictures
8,Mufasa: The Lion King,200000000,721046090,118,"[Adventure, Family, Animation]",84.8384,7.392,2144,True,Barry Jenkins,Aaron Pierre,521046090,0.722625,2024,2024,Walt Disney Pictures
9,Mission: Impossible - Dead Reckoning Part One,291000000,571125435,164,"[Action, Adventure, Thriller]",85.6323,7.5,4279,True,Christopher McQuarrie,Tom Cruise,280125435,0.49048,2023,2023,Paramount Pictures
10,Final Destination,23000000,112880294,98,[Horror],82.8014,6.618,6001,True,James Wong,Devon Sawa,89880294,0.796244,2000,2000,Hard Eight Pictures
11,Sonic the Hedgehog 3,122000000,486018457,110,"[Action, Science Fiction, Comedy, Family]",80.9869,7.719,2613,True,Jeff Fowler,Jim Carrey,364018457,0.748981,2024,2024,Paramount Pictures
13,Final Destination 5,40000000,157887643,91,"[Horror, Mystery]",75.7244,6.123,3634,True,Steven Quale,Nicholas D'Agosto,117887643,0.746655,2011,2011,Parallel Zide
16,xXx,70000000,277448382,124,"[Action, Adventure, Thriller, Crime]",63.9125,5.942,4422,True,Rob Cohen,Vin Diesel,207448382,0.747701,2002,2002,Columbia Pictures
19,The Wild Robot,78000000,331982078,102,"[Animation, Science Fiction, Family]",58.1097,8.3,4954,True,Chris Sanders,Lupita Nyong'o,253982078,0.765048,2024,2024,DreamWorks Animation
22,Final Destination 2,26000000,90941129,90,"[Horror, Mystery]",53.5862,6.276,4325,True,David R. Ellis,Ali Larter,64941129,0.714101,2003,2003,New Line Cinema


In [29]:
df_movies.reset_index(drop=True, inplace=True)

df_movies

Unnamed: 0,title,budget,revenue,runtime,genres,popularity,vote_average,vote_count,belongs_to_collection,director,lead_actor,profit,profit_margin,release_year,release__year,production_company
0,Lilo & Stitch,80000000,273144151,85,"[Animation, Family, Comedy]",152.7018,7.5,6518,True,Chris Sanders,Daveigh Chase,193144151,0.707114,2002,2002,Walt Disney Pictures
1,Moana 2,150000000,1059544057,100,"[Animation, Adventure, Family, Comedy]",118.3758,7.08,2402,True,David G. Derrick Jr.,Auliʻi Cravalho,909544057,0.85843,2024,2024,Walt Disney Pictures
2,Mufasa: The Lion King,200000000,721046090,118,"[Adventure, Family, Animation]",84.8384,7.392,2144,True,Barry Jenkins,Aaron Pierre,521046090,0.722625,2024,2024,Walt Disney Pictures
3,Mission: Impossible - Dead Reckoning Part One,291000000,571125435,164,"[Action, Adventure, Thriller]",85.6323,7.5,4279,True,Christopher McQuarrie,Tom Cruise,280125435,0.49048,2023,2023,Paramount Pictures
4,Final Destination,23000000,112880294,98,[Horror],82.8014,6.618,6001,True,James Wong,Devon Sawa,89880294,0.796244,2000,2000,Hard Eight Pictures
5,Sonic the Hedgehog 3,122000000,486018457,110,"[Action, Science Fiction, Comedy, Family]",80.9869,7.719,2613,True,Jeff Fowler,Jim Carrey,364018457,0.748981,2024,2024,Paramount Pictures
6,Final Destination 5,40000000,157887643,91,"[Horror, Mystery]",75.7244,6.123,3634,True,Steven Quale,Nicholas D'Agosto,117887643,0.746655,2011,2011,Parallel Zide
7,xXx,70000000,277448382,124,"[Action, Adventure, Thriller, Crime]",63.9125,5.942,4422,True,Rob Cohen,Vin Diesel,207448382,0.747701,2002,2002,Columbia Pictures
8,The Wild Robot,78000000,331982078,102,"[Animation, Science Fiction, Family]",58.1097,8.3,4954,True,Chris Sanders,Lupita Nyong'o,253982078,0.765048,2024,2024,DreamWorks Animation
9,Final Destination 2,26000000,90941129,90,"[Horror, Mystery]",53.5862,6.276,4325,True,David R. Ellis,Ali Larter,64941129,0.714101,2003,2003,New Line Cinema


In [30]:
# Function to extract up to 3 genres
def extract_genres(genre_list):
    # Pad with None if fewer than 3 genres
    genres = genre_list[:3] + [None] * (3 - len(genre_list))
    return pd.Series(genres, index=["genre1", "genre2", "genre3"])
# Apply the function
df_movies[["genre1", "genre2", "genre3"]] = df_movies["genres"].apply(extract_genres)
# Optional: drop the original genres column if no longer needed
# df_movies.drop(columns=["genres"], inplace=True)
# Preview the result
df_movies.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_movies[["genre1", "genre2", "genre3"]] = df_movies["genres"].apply(extract_genres)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_movies[["genre1", "genre2", "genre3"]] = df_movies["genres"].apply(extract_genres)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_movies[["genre1", "genre2", "g

Unnamed: 0,title,budget,revenue,runtime,genres,popularity,vote_average,vote_count,belongs_to_collection,director,lead_actor,profit,profit_margin,release_year,release__year,production_company,genre1,genre2,genre3
0,Lilo & Stitch,80000000,273144151,85,"[Animation, Family, Comedy]",152.7018,7.5,6518,True,Chris Sanders,Daveigh Chase,193144151,0.707114,2002,2002,Walt Disney Pictures,Animation,Family,Comedy
1,Moana 2,150000000,1059544057,100,"[Animation, Adventure, Family, Comedy]",118.3758,7.08,2402,True,David G. Derrick Jr.,Auliʻi Cravalho,909544057,0.85843,2024,2024,Walt Disney Pictures,Animation,Adventure,Family
2,Mufasa: The Lion King,200000000,721046090,118,"[Adventure, Family, Animation]",84.8384,7.392,2144,True,Barry Jenkins,Aaron Pierre,521046090,0.722625,2024,2024,Walt Disney Pictures,Adventure,Family,Animation
3,Mission: Impossible - Dead Reckoning Part One,291000000,571125435,164,"[Action, Adventure, Thriller]",85.6323,7.5,4279,True,Christopher McQuarrie,Tom Cruise,280125435,0.49048,2023,2023,Paramount Pictures,Action,Adventure,Thriller
4,Final Destination,23000000,112880294,98,[Horror],82.8014,6.618,6001,True,James Wong,Devon Sawa,89880294,0.796244,2000,2000,Hard Eight Pictures,Horror,,
