In [None]:
import pandas as pd
import numpy as np
import plotly.express as px
from plotly.subplots import make_subplots
import plotly as py
import plotly.graph_objs as go
import warnings
warnings.filterwarnings("ignore")

In [None]:
data = pd.read_csv('../input/movies-on-netflix-prime-video-hulu-and-disney/MoviesOnStreamingPlatforms_updated.csv')
data.columns

In [None]:

df  = data[['Title', 'Year', 'IMDb',
       'Netflix', 'Hulu', 'Prime Video', 'Disney+', 'Directors',
       'Genres', 'Country', 'Language', 'Runtime']]
df.isnull().sum()/df.shape[0]

In [None]:
df.dropna(inplace = True)
print(df.duplicated().sum())

In [None]:
df[['dir1','dir2','dir3']] = df['Directors'].str.split(',',n=2,expand = True)

In [None]:
genres = df['Genres'].str.get_dummies(',')
genres.columns

In [None]:
genre = genres[['Action', 'Adventure', 'Animation', 'Biography', 'Comedy', 'Crime',
       'Documentary', 'Drama', 'Family', 'Fantasy', 'Film-Noir', 'History',
       'Horror', 'Music', 'Musical', 'Mystery', 'News', 'Reality-TV',
       'Romance', 'Sci-Fi', 'Short', 'Sport', 'Talk-Show', 'Thriller', 'War',
       'Western']].sum().reset_index()
genre.columns = ['genre','sum']
genre.sort_values(by = 'sum',ascending =False,inplace =True)

In [None]:
df_g = genre
#df_g.loc[df_g['Percentage'] < 1, 'Genre'] = 'Other '
fig = px.pie(df_g, values='sum', names= 'genre',title='MOST POPULAR GENRES ACROSS STREAMING PLATFORMS')
fig.show()

In [None]:
pie_df = (df[['Netflix', 'Hulu', 'Prime Video', 'Disney+']].sum()/genres.shape[0]).reset_index()
pie_df.columns  = ['Service','Percentage']
df_p = pie_df
fig = px.pie(df_p, values= df_p.Percentage, names= df_p.Service, color = df_p.Service,title='CONTENT SHARE ACROSS STREAMING PLATFORMS',
            color_discrete_map={'Netflix': '#E50914',
                                'Hulu':'#25D366',
                                 'Prime Video':'#00A8E1',
                                'Disney+':'#003380'})
fig.show()

In [None]:
prod_country = df['Country'].str.get_dummies(',')
agg = prod_country.sum().reset_index()
agg.columns = ['country','producers']
df_bar = agg.sort_values(by='producers', ascending=False)

In [None]:
#country.columns = ['country','producers']
df_bar = df_bar.head(20)
fig = px.bar(df_bar, y='producers', x='country', text='producers',color = 'producers',title = 'TOP CONTENT PRODUCERS OF STREAMING SERVICES WORLDWIDE')
fig.update_traces(textposition='outside')
fig.update_layout(uniformtext_minsize=10, uniformtext_mode='hide')
fig.show()

In [None]:
fig = go.Figure(data=go.Choropleth(
    locations=df_bar['country'], # Spatial coordinates
    z = df_bar['producers'].astype(float), # Data to be color-coded
    locationmode = 'country names', # set of locations match entries in `locations`
    colorscale = 'Reds',
    colorbar_title = "Producers",
))

fig.update_layout(
    title_text = ' TOP CONTENT PRODUCERS WORLDWIDE',
    geo_scope='world'

)

fig.show()


In [None]:
list(df_bar.country)

In [None]:
df_runtime = df[['Title','Runtime']]
df_runtime.sort_values(by = 'Runtime',ascending =False,inplace =True)
df_runtime = df_runtime.head(10)
fig = px.bar(df_runtime, x='Title', y='Runtime',
     hover_data=['Runtime', 'Title'], color='Title',height=400,color_discrete_sequence= px.colors.qualitative.T10)
fig.update_layout(width=800,
    height=800,
    margin=dict(l=10, r=20, t=20, b=20)               
)
#fig.update_yaxes(automargin=True) 

fig.show()

In [None]:
hulu = df.loc[df.Hulu==1]
netflix = df.loc[df.Netflix == 1]
prime_video = df.loc[df['Prime Video'] == 1]
disney = df.loc[df['Disney+'] == 1]
hulu.drop(columns = ['Netflix','Disney+','Prime Video'],inplace =True)
netflix.drop(columns = ['Hulu','Disney+','Prime Video'],inplace =True)
prime_video.drop(columns = ['Hulu','Disney+','Netflix'],inplace =True)

In [None]:
#dir_hulu = hulu.groupby(['Title','dir1'])['Year'].count().reset_index()
dir_hul = hulu['dir1'].value_counts().reset_index()
dir_hul.columns = ['Director','No.of films']
dir_hul.sort_values(by = 'No.of films',inplace = True,ascending =False)
dir_hul = dir_hul.head(5)
fig = make_subplots(2, 3)
#fig.add_trace(px.bar(dir_hul, x='Director', y='No.of films',color='Director',height= 400,title = 'Top 5 directors on Hulu',color_discrete_sequence= px.colors.qualitative.T10),row=1,col=1)
fig =  px.bar(dir_hul, x='Director', y='No.of films',
     color='Director',height= 400,title = 'Top 5 directors on Hulu',color_discrete_sequence= px.colors.qualitative.Prism)
fig.update_layout(width=500,
    height=500,showlegend = False)
fig.show()

In [None]:
#dir_hulu = hulu.groupby(['Title','dir1'])['Year'].count().reset_index()
dir_net = netflix['dir1'].value_counts().reset_index()
dir_net.columns = ['Director','No.of films']
dir_net.sort_values(by = 'No.of films',inplace = True,ascending =False)
dir_net = dir_net.head(5)
fig = make_subplots(2, 3)
#fig.add_trace(px.bar(dir_hul, x='Director', y='No.of films',color='Director',height= 400,title = 'Top 5 directors on Hulu',color_discrete_sequence= px.colors.qualitative.T10),row=1,col=1)
fig =  px.bar(dir_net, x='Director', y='No.of films',
     color='Director',height= 400,title = 'Top 5 directors on Netflix',color_discrete_sequence= px.colors.qualitative.Prism)
fig.update_layout(width=500,
    height=500,showlegend = False)
fig.show()

In [None]:
#dir_hulu = hulu.groupby(['Title','dir1'])['Year'].count().reset_index()
dir_prime = prime_video['dir1'].value_counts().reset_index()
dir_prime.columns = ['Director','No.of films']
dir_prime.sort_values(by = 'No.of films',inplace = True,ascending =False)
dir_prime = dir_prime.head(5)
fig = make_subplots(2, 3)
#fig.add_trace(px.bar(dir_hul, x='Director', y='No.of films',color='Director',height= 400,title = 'Top 5 directors on Hulu',color_discrete_sequence= px.colors.qualitative.T10),row=1,col=1)
fig =  px.bar(dir_prime, x='Director', y='No.of films',
     color='Director',height= 400,title = 'Top 5 directors on Prime Video',color_discrete_sequence= px.colors.qualitative.Prism)
fig.update_layout(width=500,
    height=500,showlegend = False)
fig.show()

In [None]:
#dir_hulu = hulu.groupby(['Title','dir1'])['Year'].count().reset_index()
dir_dis = disney['dir1'].value_counts().reset_index()
dir_dis.columns = ['Director','No.of films']
dir_dis.sort_values(by = 'No.of films',inplace = True,ascending =False)
dir_dis = dir_dis.head(5)
fig = make_subplots(2, 3)
#fig.add_trace(px.bar(dir_hul, x='Director', y='No.of films',color='Director',height= 400,title = 'Top 5 directors on Hulu',color_discrete_sequence= px.colors.qualitative.T10),row=1,col=1)
fig =  px.bar(dir_dis, x='Director', y='No.of films',
     color='Director',height= 400,title = 'Top 5 directors on Disney+',color_discrete_sequence= px.colors.qualitative.Prism)
fig.update_layout(width=500,
    height=500,showlegend = False)
fig.show()

In [None]:
countries  = df['Country'].str.get_dummies(',')
us = countries.loc[countries['United States'] == 1]
uk = countries.loc[countries['United Kingdom'] == 1]
ind = countries.loc[countries['India'] == 1]
can = countries.loc[countries['Canada'] == 1]
fra = countries.loc[countries['France'] == 1]
us = us['United States']
uk =uk['United Kingdom']
ind = ind['India']
can = can['Canada']
fra = fra['France']
newdf = pd.concat([df,us,uk,ind,can,fra],axis =1)

In [None]:
country = []
mean = []
top5 = ['United States','United Kingdom','India','Canada','France']
for i in list(top5):
    country.append(i)
    c = newdf.groupby([i], as_index=False,sort =True)['IMDb'].mean()
    mean.append(c['IMDb'])

In [None]:
top5 = pd.DataFrame(mean,index = country)
top5.reset_index()
top5.columns = ['Average']
top5.sort_values(by = 'Average',ascending =False,inplace =True)

In [None]:
    fig =  px.bar(top5, x = top5.index, y='Average',color= top5.index,height= 400,
                  title = 'AVERAGE IMDB RATING OF TOP CONTENT PRODUCERS',
              color_discrete_sequence= px.colors.qualitative.Bold)
    fig.update_layout(width = 500,height = 500,xaxis=dict(title_text="Country"),
    yaxis=dict(title_text="Average"),showlegend = False)  
fig.show()

In [None]:
data = dict (
    type = 'choropleth',
    locations = ['United States','United Kingdom','India','Canada','France'],
    locationmode='country names',
    colorscale = 'viridis',
    z=top5.Average)
lyt = dict(geo=dict(scope='world'))
fig = go.Figure(data=[data])
fig.show()