## Netflix Data Analysis

In [None]:
import pandas as pd
import seaborn as sns

In [None]:
netflix_df=pd.read_csv('./netflix.csv')

In [None]:
netflix_df

### Getting basic information of dataframe

In [None]:
netflix_df.shape

In [None]:
netflix_df.size

In [None]:
netflix_df.info()

In [None]:
netflix_df.head()

In [None]:
netflix_df.tail()

In [None]:
netflix_df.columns

In [None]:
netflix_df.dtypes

### Checking and removing duplicate values

In [None]:
netflix_df.duplicated().sum()

In [None]:
netflix_df[netflix_df.duplicated()]

In [None]:
netflix_df.drop_duplicates(inplace=True)

In [None]:
netflix_df.shape

### Checking and removing null values

In [None]:
pd.isnull(netflix_df).sum()

In [None]:
# Showing the null values count using heat map
sns.heatmap(netflix_df.isnull(), cmap='Blues')

#### For 'House of Cards', what is the show ID and who is the director?

In [None]:
netflix_df['Title'].isin(['House of Cards'])

In [None]:
hoc_df=netflix_df[netflix_df['Title'].isin(['House of Cards'])]
hoc_df

In [None]:
hoc_df.Show_Id

In [None]:
hoc_df.Director

#### In which year, highest number of the TV shows and movies were released?

In [None]:
netflix_df['Date_Released']=pd.to_datetime(netflix_df['Release_Date'].str.strip(), errors='coerce')

In [None]:
netflix_df

In [None]:
netflix_df.Date_Released.dtype

In [None]:
netflix_df.Date_Released.dt.year.value_counts()

In [None]:
# Showing the results as a bar plot
netflix_df.Date_Released.dt.year.value_counts().plot(kind='bar')

#### How many movies and tv shows are in the dataset?

In [None]:
netflix_df.groupby('Category').Category.count()

In [None]:
ax=sns.countplot(x='Category', data=netflix_df)

for bars in ax.containers:
  ax.bar_label(bars)

#### Show all the movies released in the year 2020

In [None]:
netflix_df[(netflix_df.Category=='Movie') & (netflix_df.Date_Released.dt.year==2020)]

#### Show only the titles of TV shows released in India

In [None]:
netflix_df[(netflix_df.Category=='TV Show') & (netflix_df.Country=='India')]['Title']

#### Show top 10 directors, who gave the highest number of TV shows and movies to Netflix

In [None]:
netflix_df['Director'].value_counts().head(10)

#### Show all the records where "Category is `Movie` and Type is `Comedies`" or Country is `United Kingdom`

In [None]:
netflix_df[((netflix_df.Category=='Movie') & (netflix_df.Type=='Comedies')) | (netflix_df.Country=='United Kingdom')]

#### In how many movies/TV shows, Tom Cruise was the cast?

In [None]:
netflix_df[netflix_df.Cast=='Tom Cruise']

# .str.contains() doesn't work on null values
# netflix_df[netflix_df['Cast'].str.contains('Tom Cruise')]

#### What are the different ratings defined by Netflix?

In [None]:
netflix_df.Rating.nunique()

In [None]:
netflix_df.Rating.unique()

#### How many movies got the `TV-14` rating in Canada?

In [None]:
netflix_df[(netflix_df.Category=='Movie') & (netflix_df.Rating=='TV-14') & (netflix_df.Country=='Canada')].shape[0]

#### How many TV shows got the `R` rating after 2018?

In [None]:
netflix_df[(netflix_df.Category=='TV Show') & (netflix_df.Rating=='R') & (netflix_df.Date_Released.dt.year>2018)].shape[0]

#### What is the maximum duration of a movie/TV show on Netflix?

In [None]:
netflix_df.Duration.unique()

In [None]:
netflix_df[['Minutes', 'Unit']]=netflix_df.Duration.str.split(' ', expand=True)

In [None]:
netflix_df.head(2)

In [None]:
netflix_df.Minutes.max()

In [None]:
netflix_df.Minutes.min()

In [None]:
netflix_df.dtypes

#### Which individual country has the highest no. of TV shows?

In [None]:
netflix_tv_shows=netflix_df[netflix_df.Category=='TV Show']

In [None]:
netflix_tv_shows.Country.value_counts()

In [None]:
netflix_tv_shows.Country.value_counts().head(1)

#### Sorting the dataset w.r.t year

In [None]:
netflix_df['Year']=netflix_df.Date_Released.dt.year

In [None]:
netflix_df.sort_values('Year', ascending=False).head()

In [None]:
netflix_df.sort_values('Year').head()