In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import warnings
warnings.filterwarnings('ignore')

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
netflix = pd.read_csv('/kaggle/input/netflix-shows/netflix_titles.csv')

In [None]:
netflix.head()

In [None]:
netflix.isnull().sum()

In [None]:
netflix.dtypes

In [None]:
netflix.shape

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
plt.rcParams["figure.figsize"] = [10, 10]

In [None]:
import math
def roundup(x):
    return 100 + int(math.ceil(x / 100.0)) * 100 
sns.countplot(x='type',data=netflix)
# Get current axis on current figure
ax = plt.gca()

# ylim max value to be set
y_max = netflix['type'].value_counts().max() 
ax.set_ylim([0, 6000])

# Iterate through the list of axes' patches
for p in ax.patches:
    ax.text(p.get_x() + p.get_width()/2., p.get_height(), '%d' % int(p.get_height()), 
            fontsize=12, color='black', ha='center', va='bottom')
plt.title('Comparison of Total TV Shows & Movies',size='15')
plt.show()

In [None]:
movies = netflix[netflix.type == 'Movie']
shows = netflix[netflix.type == 'TV Show']

# Movies Analysis

In [None]:
movies['release_year'].unique()

In [None]:
recent_5yrs = movies[movies.release_year >= 2011]
sns.countplot(x='release_year',data=recent_5yrs)

# Get current axis on current figure
ax = plt.gca()

# ylim max value to be set
y_max = recent_5yrs['type'].value_counts().max() 
ax.set_ylim([0,1000])

# Iterate through the list of axes' patches
for p in ax.patches:
    ax.text(p.get_x() + p.get_width()/2., p.get_height(), '%d' % int(p.get_height()), 
            fontsize=12, color='black', ha='center', va='bottom')
plt.title('Total Releases of Movies for Last 10 Years ',size='15')    
plt.show()

In [None]:
top_director = movies.groupby(['director'])['show_id'].count().reset_index(name='count').sort_values(by='count',ascending=False)
fig_dims = (18,7)
fig, ax = plt.subplots(figsize=fig_dims)
sns.pointplot(x='director',y='count',data=top_director.head(10))
plt.title('Top 10 directors by Movies',size='20')
plt.show()

# Different category ratings denotes the following
**Kids**
TV-Y
Designed to be appropriate for all children

TV-Y7
Suitable for ages 7 and up

G
Suitable for General Audiences

TV-G
Suitable for General Audiences

PG
Parental Guidance suggested

TV-PG
Parental Guidance suggested

**Teens**
PG-13
Parents strongly cautioned. May be Inappropriate for ages 12 and under.

TV-14
Parents strongly cautioned. May not be suitable for ages 14 and under.

**Adults**
R
Restricted. May be inappropriate for ages 17 and under.

TV-MA
For Mature Audiences. May not be suitable for ages 17 and under.

NC-17
Inappropriate for ages 17 and under

In [None]:
top_ratings = movies.groupby(['rating'])['show_id'].count().reset_index(name='count').sort_values(by='count',ascending=False)
fig_dims = (18,7)
fig, ax = plt.subplots(figsize=fig_dims)  
sns.pointplot(x='rating',y='count',data=top_ratings,color='purple')
plt.title('Top Ratings',size='20')
plt.show()

In [None]:
from wordcloud import WordCloud, STOPWORDS
text3 = ' '.join(movies['listed_in'])
wordcloud2 = WordCloud().generate(text3)
# Generate plot
plt.imshow(wordcloud2)
plt.axis("off")
plt.title('Word Cloud for Type of Movie',size='15')
plt.show()

In [None]:
top_contributors = movies.groupby(['country'])['show_id'].count().reset_index(name='count').sort_values(by='count',ascending=False)
sns.barplot(x='country',y='count',data=top_contributors.head(5))
# Get current axis on current figure
ax = plt.gca()

# ylim max value to be set
y_max = top_contributors['country'].value_counts().max() 
ax.set_ylim([0,2000])

# Iterate through the list of axes' patches
for p in ax.patches:
    ax.text(p.get_x() + p.get_width()/2., p.get_height(), '%d' % int(p.get_height()), 
            fontsize=12, color='black', ha='center', va='bottom')
plt.title('Contribution by country in Movies',size='15')    
plt.show()

# Let's see the Trends of the country which invented Netflix (USA)

In [None]:
usa = netflix[netflix.country == 'United States']
usa.head()

In [None]:
sns.countplot(x='type',data=usa)
# Get current axis on current figure
ax = plt.gca()

# ylim max value to be set
y_max = usa['type'].value_counts().max() 
ax.set_ylim([0, 2000])
# Iterate through the list of axes' patches
for p in ax.patches:
    ax.text(p.get_x() + p.get_width()/2., p.get_height(), '%d' % int(p.get_height()), 
            fontsize=12, color='black', ha='center', va='bottom')
plt.title('Comparison of Total TV Shows & Movies',size='15')
plt.show()

In [None]:
top_cat = usa.groupby(['listed_in'])['show_id'].count().reset_index(name='count').sort_values(by='count',ascending=False)
fig_dims = (18,7)
fig, ax = plt.subplots(figsize=fig_dims)  
sns.pointplot(x='listed_in',y='count',data=top_cat.head(5),color='green',ax=ax)
plt.title('Top categories',size='15')
plt.show()

In [None]:
top_director = usa.groupby(['director'])['show_id'].count().reset_index(name='count').sort_values(by='count',ascending=False)
fig_dims = (18,7)
fig, ax = plt.subplots(figsize=fig_dims)
sns.pointplot(x='director',y='count',data=top_director.head(10))
plt.title('Top 10 directors',size='20')
plt.show()

In [None]:
usa_recent5 =  usa[usa.release_year >= 2016]
g = sns.catplot(
    data=usa_recent5, kind="count",
    x="release_year", hue="type",
    ci="sd", palette="dark", alpha=.6, height=6)
ax = plt.gca()

# ylim max value to be set
y_max = usa_recent5['type'].value_counts().max() 
ax.set_ylim([0, 350])
# Iterate through the list of axes' patches
for p in ax.patches:
    ax.text(p.get_x() + p.get_width()/2., p.get_height(), '%d' % int(p.get_height()), 
            fontsize=12, color='black', ha='center', va='bottom')
plt.title('Last 5 years trends in Netflix by USA',size='15')
plt.show()

## Thank You! Drop an upvote if you liked the kernel! :)