## Extracting Dataset of Netflix TV Shows and Movies

In [None]:
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt

In [None]:
df= pd.read_csv('../input/netflix-shows/netflix_titles.csv')
df.head()

In [None]:
df.shape

In [None]:
df.info()

## Data Cleaning

In [None]:
df.isna().sum()

In [None]:
df.drop(columns= ['director', 'cast'], inplace= True)

In [None]:
df.country.value_counts().head()

In [None]:
df.rating.value_counts()

In [None]:
df.country.fillna('United States', inplace= True)
df.rating.fillna('TV-MA', inplace= True)
df.dropna(subset= ['date_added'], inplace= True)
df.isna().sum()

In [None]:
df.head()

## Data Visualization

In [None]:
netflix_palette= sns.palplot(['#db0000', '#564d4d', '#000000', '#ffffff', '#831010'])
plt.title('Netflix Palette', fontsize= 16)

In [None]:
sns.set_palette(['#db0000', '#564d4d'])
sns.set_style('whitegrid')
plt.pie(x= df.type.value_counts(), labels= df.type.value_counts().index, explode= [0.05, 0],
       autopct= '%1.2f%%', shadow= True, startangle= 90)
plt.title('Movie - TV Show', fontsize= 20)

In [None]:
plt.subplots(figsize= (20,5))
top_country= df.country.value_counts().head(10)
sns.countplot(x= df.country, order= top_country.index, hue= df.type)
plt.title('Top 10 - Type per Country', fontsize= 20)

In [None]:
df['year_added']= df.date_added.apply(lambda x: x.split(' ')[-1])
df['year_added']= df['year_added'].astype(str).astype(int)
df.dtypes

In [None]:
plt.figure(figsize= (10,5))
sns.countplot(x= df.year_added, hue= df.type)
plt.title('Year Added per Type', fontsize= 20)

In [None]:
plt.figure(figsize= (10,5))
ax=sns.histplot(x= df.release_year, hue= df.type, kde= True, cumulative= True)
plt.xlim(1990,2020)
plt.title('Titles Realise per Year(1990 - 2020)', fontsize= 20)

In [None]:
df['min_season']= df.duration.apply(lambda x: x.split(' ')[0])
df['min_season']= df['min_season'].astype(str).astype(int)

In [None]:
df_movie= df[df['type']== 'Movie']
df_show= df[df['type']== 'TV Show']

figura= plt.figure(figsize= (20,5))
figura.add_subplot(1,2,1)
sns.histplot(df_movie.min_season, kde= True)
plt.title('Duration per Movie(Min)', fontsize= 20)

figura.add_subplot(1,2,2)
sns.histplot(df_show.min_season, kde= True)
plt.title('Duration per TV Show(Season)', fontsize= 20)

In [None]:
plt.figure(figsize= (10,5))
sns.countplot(df.rating, order= df.rating.value_counts().index, hue= df.type)
plt.title('Rating per Type', fontsize= 20)

In [None]:
figura= plt.figure(figsize= (10,15))

figura.add_subplot(2,1,1)
sns.barplot(x= df_movie.listed_in.value_counts().head(15), y= df_movie.listed_in.value_counts().head(15).index,
           palette= ['#db0000', '#564d4d'])
plt.title('Top 15 - Genre Movie', fontsize= 20)

figura.add_subplot(2,1,2)
sns.barplot(x= df_show.listed_in.value_counts().head(15), y= df_show.listed_in.value_counts().head(15).index,
           palette= ['#db0000', '#564d4d'])
plt.title('Top 15 - Genre TV Show', fontsize= 20)