In [25]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

# Set a professional plot style
sns.set_style("whitegrid")

In [26]:
# --- 1. Data Loading and Preprocessing ---
# FIX 1: Load the dataset using only the filename to avoid FileNotFoundError
df = pd.read_csv('/content/Netflix Dataset (4).csv')
df


Unnamed: 0,Show_Id,Category,Title,Director,Cast,Country,Release_Date,Rating,Duration,Type,Description
0,s1,TV Show,3%,,"João Miguel, Bianca Comparato, Michel Gomes, R...",Brazil,"August 14, 2020",TV-MA,4 Seasons,"International TV Shows, TV Dramas, TV Sci-Fi &...",In a future where the elite inhabit an island ...
1,s2,Movie,07:19,Jorge Michel Grau,"Demián Bichir, Héctor Bonilla, Oscar Serrano, ...",Mexico,"December 23, 2016",TV-MA,93 min,"Dramas, International Movies",After a devastating earthquake hits Mexico Cit...
2,s3,Movie,23:59,Gilbert Chan,"Tedd Chan, Stella Chung, Henley Hii, Lawrence ...",Singapore,"December 20, 2018",R,78 min,"Horror Movies, International Movies","When an army recruit is found dead, his fellow..."
3,s4,Movie,9,Shane Acker,"Elijah Wood, John C. Reilly, Jennifer Connelly...",United States,"November 16, 2017",PG-13,80 min,"Action & Adventure, Independent Movies, Sci-Fi...","In a postapocalyptic world, rag-doll robots hi..."
4,s5,Movie,21,Robert Luketic,"Jim Sturgess, Kevin Spacey, Kate Bosworth, Aar...",United States,"January 1, 2020",PG-13,123 min,Dramas,A brilliant group of students become card-coun...
...,...,...,...,...,...,...,...,...,...,...,...
7784,s7783,Movie,Zozo,Josef Fares,"Imad Creidi, Antoinette Turk, Elias Gergi, Car...","Sweden, Czech Republic, United Kingdom, Denmar...","October 19, 2020",TV-MA,99 min,"Dramas, International Movies",When Lebanon's Civil War deprives Zozo of his ...
7785,s7784,Movie,Zubaan,Mozez Singh,"Vicky Kaushal, Sarah-Jane Dias, Raaghav Chanan...",India,"March 2, 2019",TV-14,111 min,"Dramas, International Movies, Music & Musicals",A scrappy but poor boy worms his way into a ty...
7786,s7785,Movie,Zulu Man in Japan,,Nasty C,,"September 25, 2020",TV-MA,44 min,"Documentaries, International Movies, Music & M...","In this documentary, South African rapper Nast..."
7787,s7786,TV Show,Zumbo's Just Desserts,,"Adriano Zumbo, Rachel Khoo",Australia,"October 31, 2020",TV-PG,1 Season,"International TV Shows, Reality TV",Dessert wizard Adriano Zumbo looks for the nex...


In [27]:
# FIX 2: Strip whitespace and specify date format to handle inconsistent date strings.
df['Release_Date'] = pd.to_datetime(df['Release_Date'].str.strip(), format="%B %d, %Y", errors='coerce')
df['Release_Year'] = df['Release_Date'].dt.year.astype('Int64')

In [28]:
# Drop rows where Release_Year is missing
df.dropna(subset=['Release_Year'], inplace=True)

In [29]:
# For 'Country' analysis, we'll only consider the first listed country
df['Country_Main'] = df['Country'].str.split(',').str[0].str.strip()

In [30]:
# --- 2. Objective 1: Movies vs. TV Shows Distribution Over Years ---
content_over_time = df.groupby(['Release_Year', 'Category']).size().unstack(fill_value=0)
content_over_time = content_over_time.sort_index()
plt.figure(figsize=(12, 6))
content_over_time[['Movie', 'TV Show']].plot(kind='area', stacked=True, ax=plt.gca(),
                                            color=['#e50914', '#221f1f'])
plt.title('Netflix Content Production Trends: Movies vs. TV Shows by Release Year', fontsize=16)
plt.xlabel('Release Year', fontsize=12)
plt.ylabel('Number of Titles Added', fontsize=12)
plt.legend(title='Category', loc='upper left')
plt.tight_layout()
plt.savefig('netflix_content_trends_by_year.png')
plt.close()


In [31]:
# --- 3. Objective 2: Top 10 Most Common Genres ---
df_genres = df.assign(Genre = df['Type'].str.split(', ')).explode('Genre')
df_genres['Genre'] = df_genres['Genre'].str.strip()
top_genres = df_genres['Genre'].value_counts().nlargest(10)

plt.figure(figsize=(12, 6))
# FIX 3: Added hue and legend=False to resolve FutureWarning
sns.barplot(x=top_genres.index, y=top_genres.values, hue=top_genres.index, palette='viridis', legend=False)
plt.title('Top 10 Most Common Genres on Netflix', fontsize=16)
plt.xlabel('Genre', fontsize=12)
plt.ylabel('Number of Titles', fontsize=12)
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.savefig('netflix_top_10_genres.png')
plt.close()

In [32]:
# --- 4. Objective 3: Top 10 Country-wise Contributions ---
country_contributions = df.dropna(subset=['Country_Main'])
top_countries = country_contributions['Country_Main'].value_counts().nlargest(10)

plt.figure(figsize=(12, 6))
# FIX 4: Added hue and legend=False to resolve FutureWarning
sns.barplot(x=top_countries.index, y=top_countries.values, hue=top_countries.index, palette='rocket', legend=False)
plt.title('Top 10 Countries Contributing to Netflix Content', fontsize=16)
plt.xlabel('Country', fontsize=12)
plt.ylabel('Number of Titles', fontsize=12)
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.savefig('netflix_top_10_countries.png')
plt.close()
