# **OTT movies Recommendation in various platform (Netflix,HULU,prime,Disney+)**

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

**Import necessary libararies**

In [None]:
import matplotlib.pyplot as plt
import plotly as py
import cufflinks as cf
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
import math
import pandas_profiling
from pandas_profiling import ProfileReport
from plotly.colors import n_colors


from plotly.offline import init_notebook_mode, iplot
init_notebook_mode(connected=True)
cf.go_offline()

In [None]:
# load dataset
allmovies = pd.read_csv('../input/movies-on-netflix-prime-video-hulu-and-disney/MoviesOnStreamingPlatforms_updated.csv')

In [None]:
allmovies

Detailed report of data that contains all movies in various platform using pandas profiling

In [None]:
pandas_profiling.ProfileReport(allmovies)

# CLEANING THE DATA

In [None]:
allmovies.info()

In [None]:
allmovies.isna().sum()

**Removing unwanted column**

In [None]:
ottmovies = allmovies.drop(['Unnamed: 0', 'ID', 'Type'], axis=1)

In [None]:
ottmovies.dtypes

In [None]:
ottmovies.mean()

**Replacing NaN Values by other values**

In [None]:
ottmovies.update(ottmovies[['Age']].fillna('all'))
ottmovies.update(ottmovies[['Directors', 'Genres', 'Country', 'Language']].fillna('Others'))
ottmovies.update(ottmovies[['Runtime']].fillna(93))
ottmovies.update(ottmovies[['IMDb']].fillna(5.91))
ottmovies.update(ottmovies[['Rotten Tomatoes']].fillna('50%'))
ottmovies

In [None]:
ottmovies.isna().sum()

In [None]:
ottmovies.info()

In [None]:
ottmovies['IMDb'].value_counts()

In [None]:
def round_val(data):
    if int(data) != 'nan':
        return round(data)
    
ottmovies['IMDB'] = ottmovies['IMDb'].apply(round_val)

In [None]:
values = ottmovies['IMDB'].value_counts().sort_index(ascending=True).tolist()
index = ottmovies['IMDB'].value_counts().sort_index(ascending=True).index

In [None]:
values,index

# **Movies based by IMDB rating**

In [None]:
fig = px.bar(x=index, y=values, height = 400, color = index, 
            labels = { 'x' : 'IMDB rating', 'y' : 'Number of movies'})
fig.show()

*We can see here that average number of movies are above rating 6+ in all ott platform*

In [None]:
def val_sum(r,c):
    return ottmovies[c].sum(axis=0)

In [None]:
ott_counts = []
row = [ottmovies]
col = ['Netflix', 'Hulu', 'Prime Video', 'Disney+']

for x in row:
    for y in col:
        ott_counts.append(val_sum(x,y))

In [None]:
ott_counts

In [None]:
labels = ['Netflix', 'Hulu', 'Prime Video', 'Disney+']

fig = go.Figure(data=[go.Pie(labels=labels, values=ott_counts, hole=.4)])
fig.update_layout(title_text = 'Number of movies present in all platform')
fig.show()

***This pie chart shows that amazon prime have most number of movies***

In [None]:
def splitting(dataframe, col):
    result = dataframe[col].str.get_dummies(',')
    return result

unique_genres = splitting(ottmovies, 'Genres')
unique_lang = splitting(ottmovies, 'Language')

In [None]:
newmovies = pd.concat([ottmovies, unique_genres], axis = 1)
newmovies

In [None]:
len(ottmovies['Title'].unique())

In [None]:
def val_sum(r,c):
    return unique_genres[c].sum(axis=0)

In [None]:
unique_counts = []
row = [unique_genres]
col = [unique_genres.columns]

for x in row:
    for y in col:
        unique_counts.append(val_sum(x,y))

In [None]:
unique_genres.sum()

In [None]:
plt.figure(figsize = (20, 10))
unique_genres.sum().plot(kind="bar")
plt.ylabel('Genres')
plt.xlabel('Total number of movies')
plt.title('Movies and its genres')
plt.show()

***Drama are having more number of movies***

In [None]:
long_runtime_movies = newmovies.sort_values('Runtime', ascending=False).head(20)

fig = px.bar(long_runtime_movies, x='Title', y='Runtime', color = 'Runtime', height = 600, 
             title = 'Top 20 long runtime movies')
fig.show()

In [None]:
movies_by_year = newmovies.groupby('Year')['Title'].count().reset_index().rename(columns = {
    'Title' : 'Number of movies'
     })

fig = px.bar(movies_by_year, y = 'Year', x = 'Number of movies', color = 'Number of movies', orientation = 'h', 
             title = '1900 to 2020 total number of movies')
fig.show()

* ***Year 2017 most number of movies released***
* ***Year 2020 only less number of movies released due to covid academic***

In [None]:
def sunburst(dataframe, platform, c):
    dataframe = dataframe.loc[dataframe[platform] == 1]
    dataframe = dataframe.sort_values('IMDb', ascending = False)
    rating = dataframe[0:20]
    fig = px.sunburst(
    rating, path = ['Title', 'Genres'],
    values = 'IMDb', color = 'IMDb',
    color_continuous_scale=c )
    fig.show()

# Netflix top 20 movies

In [None]:
sunburst(ottmovies, 'Netflix', 'twilight')

# Hulu top 20 movies

In [None]:
sunburst(ottmovies, 'Hulu', 'fall')

# Prime video top 20 movies

In [None]:
sunburst(ottmovies, 'Prime Video', 'hot')

# Disney+ top 20 movies

In [None]:
sunburst(ottmovies, 'Disney+', 'greens')

In [None]:
top_20 = ottmovies.groupby('Country')['Title'].count().reset_index().rename(columns = {'Title':'Number_of_Movies'}).sort_values('Number_of_Movies',ascending = False).head(20)
fig = px.bar(top_20, x='Country', y='Number_of_Movies', color='Number_of_Movies', height=700,
            title = 'Total number of movies based on country')
fig.show()

***United states only the country releasing high number of movies***

In [None]:
lang_merged = pd.concat([ottmovies, unique_lang], axis = 1, sort = False)
genre_merged = pd.concat([ottmovies, unique_genres], axis = 1, sort = False)

# Total Number of movies based on genre and language in all the platforms

In [None]:
def bar(dataframe,platform,c):
    dataframe=dataframe.loc[dataframe[platform] == 1]
    val_counts = dataframe.iloc[:,15:].sum(axis=0).sort_values(ascending=False)
    val_counts2 = pd.DataFrame(val_counts,columns=['Number of movies'])
    return val_counts2[0:20].style.bar(subset=["Number of movies",], color=c)

In [None]:
bar(lang_merged, 'Netflix', '#636EFA')

In [None]:
bar(genre_merged, 'Netflix', '#EF553B')

In [None]:
bar(lang_merged, 'Prime Video','#00CC96' )

In [None]:
bar(genre_merged, 'Prime Video','#AB63FA' )

In [None]:
bar(lang_merged, 'Hulu', '#FFA15A')

In [None]:
bar(genre_merged, 'Hulu', '#19D3F3')

In [None]:
bar(lang_merged, 'Disney+', '#FF6692')

In [None]:
bar(genre_merged, 'Disney+', '#B6E880')

# **Overall this visualization..we suggest you to watch movies in prime video**