In [1]:
# Netflix Movies Dashboard 📊
# Author: Subrat Tandon (2025 Project)

# Step 1: Libraries Import
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# To show plots inside notebook
%matplotlib inline  

# Step 2: Load Dataset
# Dataset source: https://www.kaggle.com/datasets/shivamb/netflix-shows
df = pd.read_csv("data/netflix_titles.csv")

# Step 3: Basic Info
print("Dataset Shape:", df.shape)
print("\nDataset Columns:\n", df.columns)
print("\nMissing Values:\n", df.isnull().sum())

# Step 4: Data Cleaning
df['date_added'] = pd.to_datetime(df['date_added'])
df['year_added'] = df['date_added'].dt.year
df['month_added'] = df['date_added'].dt.month

# Step 5: Distribution of Movies vs TV Shows
plt.figure(figsize=(6,5))
sns.countplot(data=df, x='type', palette="Set2")
plt.title("Movies vs TV Shows on Netflix")
plt.xlabel("Type")
plt.ylabel("Count")
plt.show()

# Step 6: Top 10 Countries with most content
top_countries = df['country'].value_counts().head(10)
plt.figure(figsize=(10,5))
sns.barplot(x=top_countries.values, y=top_countries.index, palette="coolwarm")
plt.title("Top 10 Countries Producing Netflix Content")
plt.xlabel("Number of Titles")
plt.ylabel("Country")
plt.show()

# Step 7: Content added over the years
plt.figure(figsize=(12,6))
sns.histplot(df['year_added'].dropna(), bins=20, kde=False, color="purple")
plt.title("Content Added to Netflix Over the Years")
plt.xlabel("Year")
plt.ylabel("Count")
plt.show()

# Step 8: Top 10 Directors
top_directors = df['director'].value_counts().head(10)
plt.figure(figsize=(10,5))
sns.barplot(x=top_directors.values, y=top_directors.index, palette="viridis")
plt.title("Top 10 Directors on Netflix")
plt.xlabel("Number of Titles")
plt.ylabel("Director")
plt.show()

# Step 9: Rating Distribution
plt.figure(figsize=(8,6))
sns.countplot(data=df, y='rating', order=df['rating'].value_counts().index, palette="magma")
plt.title("Distribution of Ratings on Netflix")
plt.xlabel("Count")
plt.ylabel("Rating")
plt.show()

# Step 10: Genre/Listed in Analysis
from collections import Counter

all_genres = ','.join(df['listed_in'].dropna())
genre_count = Counter([genre.strip() for genre in all_genres.split(',')])

genre_df = pd.DataFrame(genre_count.most_common(10), columns=['Genre','Count'])

plt.figure(figsize=(10,5))
sns.barplot(data=genre_df, x='Count', y='Genre', palette="cubehelix")
plt.title("Top 10 Genres on Netflix")
plt.xlabel("Number of Titles")
plt.ylabel("Genre")
plt.show()


FileNotFoundError: [Errno 2] No such file or directory: 'netflix_titles.csv'