In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objs as go
import plotly.express as px
from scipy.stats import norm
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
df = pd.read_csv('../input/netflix-shows/netflix_titles.csv')

In [None]:
df.head()

In [None]:
df.shape

**Data Cleaning**

In [None]:
df.isna().sum()

In [None]:
df.drop(['director','cast'],axis = 1,inplace = True)

In [None]:
df.country.value_counts()

In [None]:
df.country.replace(np.nan,'United States',inplace=True)

In [None]:
df.rating.value_counts()

In [None]:
df.rating.replace(np.nan,'TV-MA',inplace=True)

In [None]:
df[df.date_added.isna()]

In [None]:
df = df[df.date_added.notna()]

In [None]:
df.isna().sum()

**Data Visualisation**

In [None]:
movie = df[df.type=='Movie']
tv = df[df.type=='TV Show']

In [None]:
plt.figure(figsize=(8,8))
sns.countplot(x='type', data=df,palette="viridis")

In [None]:
plt.figure(figsize=(8,8))
gen = ['TV Show','Movie']
explode=[0.1,0.1]
colors=['bisque','aquamarine']
plt.pie(df.type.value_counts().sort_values(),labels=gen,wedgeprops={'edgecolor': 'white'},
        explode=explode,startangle=90,colors=colors)
plt.title('Netflix Content Classification : ')
plt.show()

In [None]:
plt.figure(figsize=(15,7))
sns.distplot(movie.duration.str.extract('(\d+)'),fit=norm,kde=False,color=['springgreen'])
sns.set_style('darkgrid')
plt.title('Normal Distribution for Movies')
plt.show()

In [None]:
fig = px.histogram(x=tv.duration)
fig.update_xaxes(categoryorder="total descending")
fig.update_layout(
    title="Distribution of TV Shows duration",
    xaxis_title="Duration of TV ShowS",
)
fig.show()

In [None]:
counts = df.country.value_counts()
counts

In [None]:
trace = go.Bar(x=counts.index[:15], y=counts, marker=dict(
                  opacity=0.8,
                  color=np.arange(15)
              ))
fig = go.Figure(data=[trace])

fig.update_layout(title="Top 15 Countries by Content")
fig.update_xaxes(title="Country")
fig.update_yaxes(title="Count")

fig.show()

In [None]:
yr_count = df.release_year.value_counts(ascending=False).index[:18]
yr_count

In [None]:
plt.figure(figsize=(10,10))
sns.countplot(data=df, y="release_year", order=yr_count, palette="viridis")
# plt.show()

In [None]:
movie_ratings = movie.groupby(['rating'])['show_id'].count().reset_index(name='count').sort_values(by='count',ascending=False)
fig_dims = (18,7)
fig, ax = plt.subplots(figsize=fig_dims)  
sns.pointplot(x='rating',y='count',data=movie_ratings)
plt.title('Top Movie Ratings Based On Rating System',size='20')
plt.show()

In [None]:
tv_ratings = tv.groupby(['rating'])['show_id'].count().reset_index(name='count').sort_values(by='count',ascending=False)
fig_dims = (18,7)
fig, ax = plt.subplots(figsize=fig_dims)  
sns.pointplot(x='rating',y='count',data=tv_ratings)
plt.title('Top Tv Shows Based On Rating System',size='20')
plt.show()

In [None]:
tv_df = df[df.type=='TV Show']
usa_df = df[df.country=='United States']
df_ind = df[df.country=='India']

In [None]:
usa_df.release_year.value_counts()

In [None]:
cou = df_ind.value_counts()
ind_count = df_ind.release_year.value_counts()[:11]
usa_count = usa_df.release_year.value_counts().sort_values(ascending=False)[:11]
usa_count

In [None]:
from plotly.subplots import make_subplots

fig = make_subplots(rows=1,cols=2)

fig.add_trace(
    go.Bar(x=ind_count.index, y=ind_count,name="India", marker=dict(
                  opacity=0.8,
                  color="midnightblue",
              )))
fig.add_trace(
    go.Bar(x=usa_count.index, y=usa_count,name="USA",
                  marker=dict(
                  opacity=0.8,
                  color="firebrick"
              )))

# fig = go.Figure(data=[trace])
# fig.update_layout(title="Year Wise Analysis in India")
fig.update_xaxes(title="Year (2010-2021)")
fig.update_yaxes(title="Number of movies and shows released")
fig.update_layout(height=600, width=800, title_text="Side By Side Subplots")
fig.show()