## Importing Libraries

In [None]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

## Loading Dataset

In [None]:
df = pd.read_csv(r'../input/netflix-shows/netflix_titles.csv')

In [None]:
df.info()

## Data Cleaning

In [None]:
df.drop(['show_id', 'director', 'description', 'release_year'], axis=1, inplace=True)

In [None]:
df['date_added'] = pd.to_datetime(df['date_added'])

In [None]:
# choosing only the year from the date to use it later
df['date_added'] = pd.DatetimeIndex(df['date_added']).year

In [None]:
df.head()

In [None]:
df.duplicated().sum()

In [None]:
df.isnull().sum()

In [None]:
df.dropna(inplace = True)

In [None]:
df.isnull().sum()

## Visualizations

In [None]:
plt.figure(figsize=(18,7))
plt.title("Shows - Movies")
plt.pie(df.type.value_counts(),explode=(0.025,0.025), labels=df.type.value_counts().index, colors=['slategrey', 'cornflowerblue'],autopct='%1.1f%%', startangle=180)

plt.show()

In [None]:
# Finding The most productive country
sns.set_palette("Set2")
fig = plt.subplots(figsize=(12,7))
sns.countplot(y='country', hue='type', order=df.country.value_counts().index[:15], data=df)
plt.show()

In [None]:
# Visualization of the release date
sns.set_palette("Set2")
fig = plt.subplots(figsize=(12,7))
sns.countplot(x='date_added', hue='type', data=df)
plt.title('Release Date')
plt.xlabel('Year')
plt.ylabel('Count')
plt.show()

In [None]:
sns.set_palette("RdBu")
fig = plt.subplots(figsize=(12,7))
sns.countplot(x='rating', order = df['rating'].value_counts().index[:15], data=df)
plt.title('Top Ratings')
plt.show()

In [None]:
fig = plt.subplots(figsize=(12,7))
sns.countplot(y='listed_in', order = df['listed_in'].value_counts().index[:25], data=df)
plt.title('Top 25 Genres')
plt.show()

In [None]:
netflix_movies = df[df['type'] == 'Movie']
netflix_shows = df[df['type'] == 'TV Show']

In [None]:
netflix_movies.duration = netflix_movies.duration.str.replace(' min','').astype(int)
netflix_shows.replace({'duration':{'1 Season':'1 Seasons'}}, inplace=True)
netflix_shows.duration = netflix_shows.duration.str.replace(' Seasons','').astype(int)

In [None]:
fig = plt.subplots(figsize=(12,7))
sns.distplot(netflix_movies['duration'], bins=100)
plt.title('Movies Duration');

In [None]:
ax = plt.subplots(figsize=(12, 7))
g2 = sns.countplot(netflix_shows.duration,ax=ax[1],palette="pastel");
g2.set_title("Netflix TV Shows Seasons")
g2.set_ylabel("Count")
g2.set_xlabel("Season(s)")