In [13]:
import pandas as pd
import ast

# Load CSV (genres column is string here)
df = pd.read_csv("../data/movie_details.csv")

# Convert genre strings -> Python lists
df["genres"] = df["genres"].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else [])

# Explode safely
df = df.explode("genres").reset_index(drop=True)

# Extract id and name safely
df["genre_id"] = df["genres"].apply(lambda x: x.get("id") if isinstance(x, dict) else None)
df["genre_name"] = df["genres"].apply(lambda x: x.get("name") if isinstance(x, dict) else None)

df.drop(columns=["genres"], inplace=True)

df.to_csv("../data/movie_details_flat.csv", index=False)


In [14]:
df.columns

Index(['adult', 'backdrop_path', 'belongs_to_collection', 'budget', 'homepage',
       'id', 'imdb_id', 'origin_country', 'original_language',
       'original_title', 'overview', 'popularity', 'poster_path',
       'production_companies', 'production_countries', 'release_date',
       'revenue', 'runtime', 'spoken_languages', 'status', 'tagline', 'title',
       'video', 'vote_average', 'vote_count', 'genre_id', 'genre_name'],
      dtype='object')

In [15]:
df = pd.json_normalize(movie_details_raw)
df.columns


Index([], dtype='object')

In [16]:
print(type(movie_details_raw))
print(len(movie_details_raw))
print(movie_details_raw[:2])

<class 'pandas.core.frame.DataFrame'>
2581
   adult                     backdrop_path  \
0  False  /4PKfa0zltSrp1BJoLl8zfvYXaac.jpg   
1  False  /3UbaCMmqOd7mca4Y5DOzY2ZVTyX.jpg   

                               belongs_to_collection     budget  \
0  {'id': 913777, 'name': 'New Gods Collection', ...          0   
1  {'id': 121938, 'name': 'The Hobbit Collection'...  250000000   

                                              genres  \
0  [{'id': 16, 'name': 'Animation'}, {'id': 14, '...   
1  [{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...   

                                            homepage      id     imdb_id  \
0             https://cmc-pictures.com/nezha-reborn/  663558  tt13269670   
1  https://www.warnerbros.com/movies/hobbit-battl...  122917   tt2310332   

  origin_country original_language  ... release_date    revenue  runtime  \
0         ['CN']                zh  ...   2021-02-06   70000000      117   
1         ['US']                en  ...   2014-12-10  956019788   

In [11]:
movie_details_raw[:2]

Unnamed: 0,adult,backdrop_path,belongs_to_collection,budget,genres,homepage,id,imdb_id,origin_country,original_language,...,release_date,revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count
0,False,/4PKfa0zltSrp1BJoLl8zfvYXaac.jpg,"{'id': 913777, 'name': 'New Gods Collection', ...",0,"[{'id': 16, 'name': 'Animation'}, {'id': 14, '...",https://cmc-pictures.com/nezha-reborn/,663558,tt13269670,['CN'],zh,...,2021-02-06,70000000,117,"[{'english_name': 'Mandarin', 'iso_639_1': 'zh...",Released,Rebirth of a hero,New Gods: Nezha Reborn,False,8.1,486
1,False,/3UbaCMmqOd7mca4Y5DOzY2ZVTyX.jpg,"{'id': 121938, 'name': 'The Hobbit Collection'...",250000000,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...",https://www.warnerbros.com/movies/hobbit-battl...,122917,tt2310332,['US'],en,...,2014-12-10,956019788,144,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,Will you follow me... one last time?,The Hobbit: The Battle of the Five Armies,False,7.329,14909


In [17]:
df = pd.read_csv("../data/movie_details_flat.csv")

In [18]:
df.head()

Unnamed: 0,adult,backdrop_path,belongs_to_collection,budget,homepage,id,imdb_id,origin_country,original_language,original_title,...,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count,genre_id,genre_name
0,False,/4PKfa0zltSrp1BJoLl8zfvYXaac.jpg,"{'id': 913777, 'name': 'New Gods Collection', ...",0,https://cmc-pictures.com/nezha-reborn/,663558,tt13269670,['CN'],zh,新神榜：哪吒重生,...,117,"[{'english_name': 'Mandarin', 'iso_639_1': 'zh...",Released,Rebirth of a hero,New Gods: Nezha Reborn,False,8.1,486,16.0,Animation
1,False,/4PKfa0zltSrp1BJoLl8zfvYXaac.jpg,"{'id': 913777, 'name': 'New Gods Collection', ...",0,https://cmc-pictures.com/nezha-reborn/,663558,tt13269670,['CN'],zh,新神榜：哪吒重生,...,117,"[{'english_name': 'Mandarin', 'iso_639_1': 'zh...",Released,Rebirth of a hero,New Gods: Nezha Reborn,False,8.1,486,14.0,Fantasy
2,False,/4PKfa0zltSrp1BJoLl8zfvYXaac.jpg,"{'id': 913777, 'name': 'New Gods Collection', ...",0,https://cmc-pictures.com/nezha-reborn/,663558,tt13269670,['CN'],zh,新神榜：哪吒重生,...,117,"[{'english_name': 'Mandarin', 'iso_639_1': 'zh...",Released,Rebirth of a hero,New Gods: Nezha Reborn,False,8.1,486,28.0,Action
3,False,/3UbaCMmqOd7mca4Y5DOzY2ZVTyX.jpg,"{'id': 121938, 'name': 'The Hobbit Collection'...",250000000,https://www.warnerbros.com/movies/hobbit-battl...,122917,tt2310332,['US'],en,The Hobbit: The Battle of the Five Armies,...,144,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,Will you follow me... one last time?,The Hobbit: The Battle of the Five Armies,False,7.329,14909,28.0,Action
4,False,/3UbaCMmqOd7mca4Y5DOzY2ZVTyX.jpg,"{'id': 121938, 'name': 'The Hobbit Collection'...",250000000,https://www.warnerbros.com/movies/hobbit-battl...,122917,tt2310332,['US'],en,The Hobbit: The Battle of the Five Armies,...,144,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,Will you follow me... one last time?,The Hobbit: The Battle of the Five Armies,False,7.329,14909,12.0,Adventure


In [23]:
import pandas as pd
import ast

df = pd.read_csv("../data/movie_details.csv")

# Convert string columns to lists
for col in ["production_companies", "production_countries"]:
    df[col] = df[col].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else [])

# Flatten production_companies
companies_flat = []
for _, row in df.iterrows():
    movie_id = row['id']
    for company in row['production_companies']:
        company['movie_id'] = movie_id
        companies_flat.append(company)

companies_df = pd.DataFrame(companies_flat)
companies_df.to_csv("../data/movie_production_companies.csv", index=False)

# Flatten production_countries
countries_flat = []
for _, row in df.iterrows():
    movie_id = row['id']
    for country in row['production_countries']:
        country['movie_id'] = movie_id
        countries_flat.append(country)

countries_df = pd.DataFrame(countries_flat)
countries_df.to_csv("../data/movie_production_countries.csv", index=False)

print("Production companies and countries flattened and saved!")


Production companies and countries flattened and saved!
