## The Netflix data has a lot of possibilities. I have used it to do some analysis and visualization

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

### Reading the dataset

In [None]:
df = pd.read_csv("../input/netflix-shows/netflix_titles.csv",low_memory=False)

## Having an initial look at the data

In [None]:
np.random.seed(0)
df.sample(10)

## Checking for the size of the dataset

In [None]:
df.shape

## Checking for null values

In [None]:
df.isnull().sum()

## It seems that the director and the cast columns would be difficult to address and will not add much to the visualizations and analysis

In [None]:
df.drop(['director','cast'],axis=1,inplace=True)

## We fill the other columns with the mode of the values. 

In [None]:
df['country'] = df['country'].fillna(df['country'].mode()[0])
df['date_added'] = df['date_added'].fillna(df['date_added'].mode()[0])
df['rating'] = df['rating'].fillna(df['country'].mode()[0])

In [None]:
df.sample(10)

In [None]:
df.isnull().sum()

## Visualizing the types of content

In [None]:
values=df['type'].value_counts()
show_type = df['type'].unique()

In [None]:
plt.figure(figsize=(4,5))
plt.bar(show_type,values)
plt.show()

### Trend of Movies and TV shows over time

In [None]:
time=df[['type','release_year']]
df1=time.value_counts(['type','release_year'])

In [None]:
plt.figure(figsize=(10,8))
plt.xlabel("Year", size = 'x-large')
plt.ylabel("Number of Shows",size = 'x-large')
plt.title("Trend over time")

plt.plot(df1['TV Show'])
plt.plot(df1['Movie'])
plt.legend(['TV Shows', 'Movies'])
plt.show()

## The trend is kinda weird at the end. After I analyzed the dataset further, I saw there are entries in the year 2021 too. Something to learn from every small thing, I guess

In [None]:
countries = ['United States', 'India']
df.loc[df['country'].isin(countries)]

### Plotting content by year. 

In [None]:
during_2020 = df[(df['release_year']<2021) & (df['release_year']>2019)]['release_year']
during_2019 = df[(df['release_year']<2020) & (df['release_year']>2018)]['release_year']
during_2018 = df[(df['release_year']<2019) & (df['release_year']>2017)]['release_year']
during_2017 = df[(df['release_year']<2018) & (df['release_year']>2016)]['release_year']
before_2017 = df[df['release_year']<2017]['release_year']

# year_value = df[['during_2020','during_2019','during_2018','during_2017','before_2017']].value_counts()
# WRITE A FOR LOOP FOR THIS

In [None]:
plt.figure(figsize=(10,10))
plt.bar(during_2020.unique(), during_2020.value_counts())
plt.bar(during_2019.unique(), during_2019.value_counts())
plt.bar(during_2018.unique(), during_2018.value_counts())
plt.bar(during_2017.unique(), during_2017.value_counts())
plt.bar(before_2017.unique(), before_2017.value_counts())
plt.legend(['2020','2019','2018','2017','<2017'])

### Bar chart of Ratings 

In [None]:
ratings = df['rating'].value_counts()
# ratings.unique()
plt.figure(figsize=(15,10))
c = ['red', 'yellow', 'black', 'blue', 'orange']
plt.bar(df['rating'].unique(),ratings,color= c)

### A pie chart will be better visualization of the content available

In [None]:
plt.figure(figsize=(10,10))
plt.pie(df['type'].value_counts(),labels=df['type'].unique())
plt.show()

### Pie chart of content in various countries 

In [None]:
country = df['country'][0:10]
plt.figure(figsize=(10,10))
plt.pie(country.value_counts(),labels=country.unique())
plt.show()