# ðŸ“Š Complete Netflix EDA Notebook
Includes cleaning, feature engineering, and all major visualizations.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
df = pd.read_csv('netflix_titles.csv', encoding='latin1', on_bad_lines='skip')
df.head()

## âœ… 2. Cleaning the Data

In [None]:
# Remove empty columns
df = df.dropna(axis=1, how='all')

# Convert date_added
df['date_added'] = pd.to_datetime(df['date_added'], errors='coerce')

# Fill missing fields
df['director'] = df['director'].fillna('Unknown')
df['cast'] = df['cast'].fillna('Unknown')
df['country'] = df['country'].fillna('Unknown')

## âœ… 3. Feature Engineering

In [None]:
df['year_added'] = df['date_added'].dt.year
df['month_added'] = df['date_added'].dt.month
df['genre_main'] = df['listed_in'].apply(lambda x: x.split(',')[0])

## ðŸŽ¨ 4. Visualizations

In [None]:
df['type'].value_counts().plot(kind='bar')
plt.title('Movies vs TV Shows')
plt.show()

In [None]:
df['year_added'].value_counts().sort_index().plot()
plt.title('Content Added by Year')
plt.show()

In [None]:
df['country'].value_counts().head(10).plot(kind='barh')
plt.title('Top 10 Content-Producing Countries')
plt.show()

In [None]:
df['genre_main'].value_counts().head(10).plot(kind='bar')
plt.title('Top Genres')
plt.show()

In [None]:
df['rating'].value_counts().plot(kind='bar')
plt.title('Ratings Distribution')
plt.show()

In [None]:
df['month_added'].value_counts().sort_index().plot()
plt.title('Content Added by Month')
plt.show()

In [None]:
df['director'].value_counts().head(10).plot(kind='barh')
plt.title('Top Directors')
plt.show()

In [None]:
plt.scatter(df['release_year'], df['year_added'])
plt.title('Release Year vs Added Year')
plt.xlabel('Release Year')
plt.ylabel('Year Added')
plt.show()

In [None]:
try:
    from wordcloud import WordCloud
    text = ' '.join(df['description'].dropna().astype(str))
    wc = WordCloud(width=800, height=400).generate(text)
    plt.imshow(wc)
    plt.axis('off')
    plt.title('WordCloud of Descriptions')
    plt.show()
except Exception as e:
    print('WordCloud not available:', e)