### Libraries Used ###

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

### Import and Load ###

In [None]:
# Display settings
pd.set_option('display.max_columns', None)

# Load dataset
df = pd.read_csv("netflix_titles.csv")

# Quick preview
print(df.shape)
df.head()

### Data Cleaning ###

In [3]:
# Convert date_added to datetime
df['date_added'] = pd.to_datetime(df['date_added'], errors='coerce')

# Extract year, month from date_added
df['year_added'] = df['date_added'].dt.year
df['month_added'] = df['date_added'].dt.month

# Clean up country column (replace NaN with 'Unknown')
df['country'] = df['country'].fillna('Unknown')

# Clean duration (separate movies & TV shows)
df['duration_int'] = df['duration'].str.extract('(\d+)').astype(float)
df['duration_type'] = df['duration'].str.extract('([a-zA-Z]+)').astype(str)

# Fill missing director/cast with placeholder
df['director'] = df['director'].fillna('Unknown')
df['cast'] = df['cast'].fillna('Unknown')

### Quick Exploratory Analysis ###

#### Number of Titles by Type ####

In [None]:
plt.figure(figsize=(6,4))
sns.countplot(data=df, x='type', palette='pastel')
plt.title("Movies vs. TV Shows on Netflix")
plt.show()

#### Titles Added per Year ####

In [None]:
plt.figure(figsize=(10,5))
df['year_added'].value_counts().sort_index().plot(kind='bar', color='skyblue')
plt.title("Number of Titles Added to Netflix Each Year")
plt.xlabel("Year Added")
plt.ylabel("Count")
plt.show()

#### Top 10 Genres ####

In [None]:
from collections import Counter

all_genres = ','.join(df['listed_in'].dropna()).split(',')
genre_counts = Counter([g.strip() for g in all_genres])

pd.Series(genre_counts).sort_values(ascending=False).head(10).plot(kind='bar', color='coral')
plt.title("Top 10 Genres on Netflix")
plt.show()

#### Top 10 Countries by Titles ####

In [None]:
df['country'].value_counts().head(10).plot(kind='bar', color='teal')
plt.title("Top 10 Content-Producing Countries on Netflix")
plt.show()