In [None]:
#Import python libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt # visualizing data
%matplotlib inline
import seaborn as sns

In [None]:
#Import csv file
df= pd.read_csv('netflix_titles.csv')
df

In [None]:
df.shape

In [None]:
df.head()

In [None]:
df.info()

In [None]:
# Check for null values
pd.isnull(df).sum()

In [None]:
# Drop null values
df.dropna(inplace=True)

In [None]:
# Drop duplicates (if any)
df.drop_duplicates(inplace=True)

In [None]:
# Fill missing categorical values with 'Unknown'
df['country'] = df['country'].fillna('Unknown')
df['director'] = df['director'].fillna('Unknown')
df['cast'] = df['cast'].fillna('Unknown')


In [None]:
# Convert date_added to datetime

# Clean spaces from 'date_added' so we can convert it to datetime
df['date_added'] = df['date_added'].str.strip()
df['date_added'] = pd.to_datetime(df['date_added'])


In [None]:
# Check again
df.info()

In [None]:
df.describe(include='all')

In [None]:
#Count number of movies and tv shows
type_count= df['type'].value_counts()

#Plot bar chart
plt.bar(type_count.index, type_count.values, color=['blue', 'green'])
plt.title("Movies v/s TV shows")
plt.xlabel("Type")
plt.ylabel("Count")
plt.show()

print(type_count)

Netflix has more movies than TV shows

In [None]:
#Count content by country
top_countries= df['country'].value_counts().head(10)

#Plot bar chart
plt.bar(top_countries.index, top_countries.values, color='orange')
plt.title('Top 10 countries streaming on Netflix')
plt.xticks(rotation=45, ha='right')
plt.ylabel('Number of titles')
plt.show()

print(top_countries)

USA has the the most content on Netflix, followed by India and the UK.

In [None]:
#Extract year from date_added
df['year_added']= df['date_added'].dt.year

#Count number of titles added each year
year_count= df['year_added'].value_counts().sort_index()

#Plot line chart to show number of titles added to Netflix each year
plt.plot(year_count.index, year_count.values, marker='o', color='orange ')
plt.title('Content added to Netflix over the years')
plt.xlabel('Year')
plt.ylabel('Number of titles added')
plt.show()

year_count


Rapid growth in Netflix content after 2015.

In [None]:
# Filter the dataset to include only movies
movies = df[df['type'] == 'Movie'].copy()

# Remove "min" from duration and convert into numbers
movies['duration_num'] = movies['duration'].str.replace(' min', '').astype(float)

# Plot histogram of movie durations 
plt.figure(figsize=(10, 6))
plt.hist(movies['duration_num'], bins=30, color='blue', edgecolor='black')
plt.title('Distribution of Movie Durations')
plt.xlabel('Duration (minutes)')
plt.ylabel('Frequency')
plt.show()


print("Average Movie Duration:", movies['duration_num'].mean())


The majority of movies have a duration of 80 to 120 minutes.

In [None]:
#Filter the dataset to include only TV shows
tv_shows= df[df['type']== 'TV Show'].copy()

#Convert duration into number of seasons
tv_shows['seasons_num']= tv_shows['duration'].str.replace(' Season', '').str.replace('s', '').astype(int)

#Plot bar chart to show TV shows with number of seasons
plt.bar(tv_shows['seasons_num'].value_counts().index, tv_shows['seasons_num'].value_counts().values, color='green')
plt.title('Distribution of TV show seasons')
plt.xlabel('Number of Seasons')
plt.ylabel('Count')
plt.show()


Most TV shows run for 1-2 seasons

In [None]:
#Split genres and count them
all_genres=[]
for g in df['listed_in'].dropna():
    all_genres.extend(g.split(", "))

#Top 10 common genres
from collections import Counter
genre_counts= Counter(all_genres).most_common(10)

#Seperate genres and their counts for plotting
genres = [item[0] for item in genre_counts]
counts = [item[1] for item in genre_counts]

#Plot bar chart to show top 10 most common genres on Netflix
plt.bar(genres, counts, color='purple')
plt.title('Top 10 genres on Netflix')
plt.xticks(rotation=50, ha='right')
plt.ylabel('Count')
plt.show()

Netflix is dominated by International content, Drama and Comedy.

###  Key Insights-
1. Movies dominate over TV Shows.  
2. USA contributes the most content, followed by India & UK.  
3. Netflix experienced significant content growth after 2015.
4. Most movies are around 100 minutes long.  
5. Most TV Shows have just 1–2 seasons.  
6. Drama and Comedy are the most popular genres.  

### Conclusion-
Netflix is movie-dominated, globally expanding (led by the U.S.), surged after 2015, prefers standard-length content, short TV shows, and focuses mainly on Drama and Comedy.  