In [None]:
import pandas as pd

## Load Data

In [None]:
df = pd.read_csv('../input/netflix-shows/netflix_titles.csv', index_col = 'show_id')

In [None]:
df.head()

In [None]:
df.shape

In [None]:
df.columns

## Renaming Columns

In [None]:
df.rename(columns = {'listed_in': 'genre'}, inplace = True)

In [None]:
df['genre']

## Updating Rows (of 'type' column)

In [None]:
df['type'].value_counts()

In [None]:
df['type'] = df['type'].map({'Movie': 'M', 'TV Show': 'T'})

In [None]:
df['type']

In [None]:
df['rating'].unique()

In [None]:
print(f"From {min(df['release_year'])} to {max(df['release_year'])}")

## All Genres

In [None]:
all_genres = []
df['genre'].apply(lambda x: [all_genres.append(genre) for genre in x.split(', ')])

In [None]:
keys = [key for key in set(all_genres)]
genres = dict.fromkeys(keys, 0)
for genre in all_genres:
    genres[genre] += 1

In [None]:
sorted_genres = dict(sorted(genres.items(), key = lambda i: i[1], reverse = True))

In [None]:
sorted_genres

## Filter by type

In [None]:
movie = (df['type'] == 'M')
df_movie = df.loc[movie].drop(columns = ['type'])

# using negation to get tv shows
df_tv = df.loc[~movie].drop(columns = ['type'])

In [None]:
df_movie.head()

In [None]:
df_tv.head()

## Sorting by Release Year

In [None]:
df_movie.sort_values(by = ['release_year', 'title'], ascending = [False, True], inplace = True)
print(df_movie.shape)

In [None]:
df_tv.sort_values(by = ['release_year', 'title'], ascending = [False, True], inplace = True)
print(df_tv.shape)

In [None]:
df_movie[['title', 'release_year', 'duration', 'genre']].head()

In [None]:
df_tv[['title', 'release_year', 'duration', 'genre']].head()

## Longest Movies on Netflix

In [None]:
df_movie['time'] = df_movie['duration'].apply(lambda x: int(x.split(' min')[0]))

In [None]:
df_movie.nlargest(10, 'time')[['title', 'release_year', 'duration', 'genre']]

## Longest Shows on Netflix

In [None]:
df_tv['seasons'] = df_tv['duration'].apply(lambda x: int(x.split(' Season')[0]))

In [None]:
df_tv.nlargest(10, 'seasons')[['title', 'release_year', 'duration', 'genre']]

## Latest Country-wise Movies & TV Shows

In [None]:
# checking the distribution for tv shows
df_tv['country'].value_counts(normalize = True)

In [None]:
# checking distribution for movies
df_movie['country'].value_counts(normalize = True)

India's share of Netflix's content
- TV Shows - 3.32%
- Movies - 16.55%

As of 2021, **Netflix**'s market share in India is less compared to its major competitor **Amazon Prime Video**.

A big difference between these two streaming platforms is the price offered by them, amazon is way cheaper than netflix and there are other benfits of prime membership.<br>
Both platforms have their content for indian audience, but during the past year(s) amazon's content was more appealing to people than netflix.<br>
Since India is a big market for streaming platforms like netflix, it will not give up. Even in 2021 there are many movies, TV Shows, Web Series are lined up from netflix.

In [None]:
country_grp_tv = df_tv.groupby('country')
country_grp_tv.get_group('India')[['title', 'release_year', 'duration', 'genre']].head(10)

In [None]:
country_grp_movie = df_movie.groupby('country')
country_grp_movie.get_group('India')[['title', 'release_year', 'duration', 'genre']].head(10)