In [1]:
import pandas as pd 
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
netflix_data = pd.read_csv('netflix_content.csv')

In [2]:
pio.renderers.default = "notebook"

#This line sets the default renderer for Plotly to "notebook".
#That means any Plotly chart you create will automatically display inside your Jupyter Notebook

In [10]:
netflix_data.dtypes

Title                   object
Available Globally?     object
Release Date            object
Hours Viewed           float64
Language Indicator      object
Content Type            object
dtype: object

In [11]:
netflix_data['Hours Viewed']=netflix_data['Hours Viewed'].replace(',','',regex=True).astype(float)
netflix_data

Unnamed: 0,Title,Available Globally?,Release Date,Hours Viewed,Language Indicator,Content Type
0,The Night Agent: Season 1,Yes,2023-03-23,812100000.0,English,Show
1,Ginny & Georgia: Season 2,Yes,2023-01-05,665100000.0,English,Show
2,The Glory: Season 1 // 더 글로리: 시즌 1,Yes,2022-12-30,622800000.0,Korean,Show
3,Wednesday: Season 1,Yes,2022-11-23,507700000.0,English,Show
4,Queen Charlotte: A Bridgerton Story,Yes,2023-05-04,503000000.0,English,Movie
...,...,...,...,...,...,...
24807,We Are Black and British: Season 1,No,,100000.0,English,Show
24808,Whitney Cummings: Can I Touch It?,Yes,2019-07-30,100000.0,English,Movie
24809,Whitney Cummings: Jokes,No,2022-07-26,100000.0,English,Movie
24810,"Whose Vote Counts, Explained: Limited Series",Yes,2020-09-28,100000.0,English,Movie


In [12]:
content_type=netflix_data.groupby('Content Type')['Hours Viewed'].sum()
content_type

Content Type
Movie    5.063780e+10
Show     1.077641e+11
Name: Hours Viewed, dtype: float64

In [13]:
fig= go.Figure(data=[go.Bar(x=content_type.index, y=content_type.values, marker_color=['skyblue','salmon'])])

fig.update_layout(title='Total Viewership Hours by Content Type',
xaxis_title='content_type',yaxis_title='total hours viewed(in billions)',height=400,width=800)

fig.show()




## analyze the distribution of viewership across different languages 

In [14]:
language_of_content=netflix_data.groupby('Language Indicator')['Hours Viewed'].sum().sort_values(ascending=False)
language_of_content

Language Indicator
English        1.244417e+11
Korean         1.537840e+10
Non-English    1.043910e+10
Japanese       7.102000e+09
Hindi          9.261000e+08
Russian        1.146000e+08
Name: Hours Viewed, dtype: float64

In [15]:
fig=go.Figure(data=[go.Bar(x=language_of_content.index, y=language_of_content.values,
marker_color='skyblue' )])

fig.update_layout(title='Total Viewership Hours by language',
xaxis_title='language',yaxis_title='total hours viewed',height=400,
width=800,xaxis_tickangle=45
)
fig.show()

In [16]:
netflix_data['Release Date'] = pd.to_datetime(netflix_data['Release Date'])

In [17]:

netflix_data['Release Month'] = netflix_data['Release Date'].dt.month.astype('Int64')
monthly_viewership = netflix_data.groupby('Release Month')['Hours Viewed'].sum()
monthly_viewership


Release Month
1     7.271600e+09
2     7.103700e+09
3     7.437100e+09
4     6.865700e+09
5     7.094600e+09
6     8.522000e+09
7     6.524800e+09
8     6.817800e+09
9     7.262200e+09
10    8.123200e+09
11    7.749500e+09
12    1.005580e+10
Name: Hours Viewed, dtype: float64

In [18]:
fig = go.Figure(data=[
    go.Scatter(
        x=monthly_viewership.index,
        y=monthly_viewership.values,
        mode='lines+markers',
        marker_color='blue')
    
])

fig.update_layout(
    title='Total Viewership Hours by Release Month ',
    xaxis_title='Month',yaxis_title='Total Hours Viewed (in billions)',
    xaxis=dict(tickmode='array',
    tickvals=list(range(1,13)),
    ticktext=['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 
            'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
    ))

fig.show()

## Analyze viewership trends by content type:

In [19]:
top_5=netflix_data.nlargest(5,'Hours Viewed')
top_5
# return the 5 rows from netflix_data that have the largest values in the column "Hours Viewed".

Unnamed: 0,Title,Available Globally?,Release Date,Hours Viewed,Language Indicator,Content Type,Release Month
0,The Night Agent: Season 1,Yes,2023-03-23,812100000.0,English,Show,3
1,Ginny & Georgia: Season 2,Yes,2023-01-05,665100000.0,English,Show,1
18227,King the Land: Limited Series // 킹더랜드: 리미티드 시리즈,Yes,2023-06-17,630200000.0,Korean,Movie,6
2,The Glory: Season 1 // 더 글로리: 시즌 1,Yes,2022-12-30,622800000.0,Korean,Show,12
18214,ONE PIECE: Season 1,Yes,2023-08-31,541900000.0,English,Show,8


In [20]:
monthly_viewership_pivot_table=netflix_data.pivot_table(index='Release Month',values='Hours Viewed',
columns='Content Type',aggfunc='sum')
monthly_viewership_pivot_table

Content Type,Movie,Show
Release Month,Unnamed: 1_level_1,Unnamed: 2_level_1
1,2275900000.0,4995700000.0
2,1654400000.0,5449300000.0
3,2109400000.0,5327700000.0
4,2757600000.0,4108100000.0
5,2520500000.0,4574100000.0
6,3135800000.0,5386200000.0
7,1615700000.0,4909100000.0
8,2186400000.0,4631400000.0
9,2092300000.0,5169900000.0
10,3400400000.0,4722800000.0


In [21]:
fig = go.Figure()
for n in monthly_viewership_pivot_table.columns:
    fig.add_trace(go.Scatter(
        x=monthly_viewership_pivot_table.index,
        y=monthly_viewership_pivot_table[n],
        mode='lines+markers',name=n,marker_color=['skyblue','salmon']
    ))

    fig.update_layout(title=' Viewership trends by Content Type',
    xaxis_title='Month',yaxis_title='Total Hours Viewed (in billions)',
    xaxis=dict(tickmode='array', tickvals= list(range(1,13)),
    ticktext=['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 
            'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
    ),width=800,legend_title='Content Type',height=700
    )

fig.show()

## explore the total viewership hours distributed across different release seasons

In [22]:
def get_season(month):
    if month in [12,1,2]:
        return 'Winter'
    elif month in [3,4,5]:
        return 'Spring'
    elif month in [6,7,8]:
        return 'Summer'
    else:
        return 'Fall'

In [23]:
netflix_data['Release Season'] = netflix_data['Release Month'].apply(get_season)
netflix_data

Unnamed: 0,Title,Available Globally?,Release Date,Hours Viewed,Language Indicator,Content Type,Release Month,Release Season
0,The Night Agent: Season 1,Yes,2023-03-23,812100000.0,English,Show,3,Spring
1,Ginny & Georgia: Season 2,Yes,2023-01-05,665100000.0,English,Show,1,Winter
2,The Glory: Season 1 // 더 글로리: 시즌 1,Yes,2022-12-30,622800000.0,Korean,Show,12,Winter
3,Wednesday: Season 1,Yes,2022-11-23,507700000.0,English,Show,11,Fall
4,Queen Charlotte: A Bridgerton Story,Yes,2023-05-04,503000000.0,English,Movie,5,Spring
...,...,...,...,...,...,...,...,...
24807,We Are Black and British: Season 1,No,NaT,100000.0,English,Show,,Fall
24808,Whitney Cummings: Can I Touch It?,Yes,2019-07-30,100000.0,English,Movie,7,Summer
24809,Whitney Cummings: Jokes,No,2022-07-26,100000.0,English,Movie,7,Summer
24810,"Whose Vote Counts, Explained: Limited Series",Yes,2020-09-28,100000.0,English,Movie,9,Fall


In [24]:
seasonal_viewership=netflix_data.groupby('Release Season')['Hours Viewed'].sum()
order = ['Winter', 'Spring', 'Summer', 'Fall']
seasonal_viewership=seasonal_viewership.reindex(order)

In [25]:
fig=go.Figure(data=[go.Bar(x=seasonal_viewership.index,y=seasonal_viewership.values,
marker_color='magenta')])

fig.update_layout(
title='Viewership trends by Release Season', title_x=0.5,
xaxis_title='Season', yaxis_title='Total Hours Viewed (in billions)',
)
fig.show()

## analyze the number of content releases and their viewership hours across months

In [26]:
monthly_content_relase=netflix_data['Release Month'].value_counts().sort_index()
monthly_content_relase

Release Month
1     608
2     560
3     690
4     647
5     624
6     670
7     631
8     674
9     739
10    802
11    734
12    787
Name: count, dtype: Int64

In [27]:
monthly_content_relase=netflix_data['Release Month'].value_counts().sort_index()
monthly_viewership = netflix_data.groupby('Release Month')['Hours Viewed'].sum()

fig = go.Figure()
fig.add_trace(go.Bar(
    x=monthly_content_relase.index,y=monthly_content_relase.values,
    marker_color='goldenrod',name='Number of Releases',opacity=0.7,
    yaxis='y1' #By default, a figure has one y-axis, called y1.
))
fig.add_trace(go.Scatter(x=monthly_viewership.index,y=monthly_viewership.values,
    marker_color='red',yaxis='y2',name='viewership hours',mode='lines+markers'
    ))

fig.update_layout(title='Monthly Release Patterns and Viewership Hours (2023)',
title_x=0.5,xaxis_title='month',
xaxis=dict(tickmode='array',tickvals=list(range(1,13)),
ticktext=['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 
        'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']),
yaxis=dict( title='Number of Releases',
showgrid=False,side='left'),
yaxis2=dict( title='Number of Releases',
showgrid=False,side='right',
overlaying='y'), #overlaying='y' means Place (y2) on top of the first y-axis 
legend=dict(x=1.08, y=1,
orientation='v',xanchor='left')
, width=830)


fig.show()

In [28]:
netflix_data['Release day'] = netflix_data['Release Date'].dt.day_name()
weekday_releases=netflix_data['Release day'].value_counts().reindex(
    ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'])
weekday_viewership =netflix_data.groupby('Release day')['Hours Viewed'].sum().reindex(
    ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'])

In [29]:
fig = go.Figure()
fig.add_trace(go.Bar(
    x=weekday_releases.index,
    y=weekday_releases.values,
    marker_color='blue',opacity=0.5,
    yaxis='y',name='Number of releases'
))
fig.add_trace(go.Scatter(
    x=weekday_viewership.index,
    y=weekday_viewership.values,
    line=dict(color='red'),
    marker=dict(color='red'),
    mode='lines+markers',
    name='Viewership hours',
    yaxis='y2'
))
fig.update_layout(
title='Weekly Release Patterns and Viewership Hours',
xaxis=dict(
        title='Day of the Week',
        categoryorder='array',
        categoryarray=['Monday', 'Tuesday', 'Wednesday', 'Thursday',
                    'Friday', 'Saturday', 'Sunday']
),

yaxis =dict(
    title='Number of Releases',
    showgrid=False,side='left',
    showline=True,          
        linewidth=2,
        linecolor='black',
        ticks='outside',
        tickfont=dict(color='red')
),
yaxis2 =dict(
    title='Total Hours Viewed (in billions)',
    showgrid=False,side='right',
    overlaying='y',
    showline=True,         
        linewidth=2,
        linecolor='blue',
        ticks='outside',
        tickfont=dict(color='blue'),   
    )

,legend=dict(
    x=1.09,y=1
),width =800, height =600
)
fig.show()

### explore specific high-impact dates for Netflix MENA in Saudi Arabia

In [32]:
important_dates = [
    '2023-04-21',  # Eid al-Fitr 
    '2023-04-22',  # Eid al-Fitr Holiday 
    '2023-04-23',  # Eid al-Fitr Holiday
    '2023-06-28',  # Eid al-Adha – Saudi 
    '2023-06-29',  # Eid al-Adha Holiday
    '2023-06-30',  # Eid al-Adha Holiday 
    '2023-07-19',  # Islamic New Year 
    '2023-09-23'   # Saudi National Day 
]
# convert to datetime
important_dates=pd.to_datetime(important_dates)
important_dates=important_dates.tolist()

#Check how many hours people watched Netflix on these dates?
holiday_releases=netflix_data[netflix_data['Release Date'].isin(important_dates)]
holiday_viewership = holiday_releases.groupby('Release Date')['Hours Viewed'].sum().sort_values(ascending=False)
holiday_viewership 


Release Date
2023-06-29    422100000.0
2023-04-21    333300000.0
2023-06-30    269900000.0
2023-06-28     60100000.0
2023-07-19     54400000.0
2023-04-22      7700000.0
Name: Hours Viewed, dtype: float64

In [33]:
# check for content releases close to these significant holidays (within a 3-day window)
holiday_release = netflix_data[
    netflix_data['Release Date'].apply(
        lambda x :any ((x- date ).days in range (-3,4) for date in important_dates)
    )]
# aggregate viewership hours for releases near significant holidays
holiday_viewership = holiday_release.groupby('Release Date')['Hours Viewed'].sum()

holiday_release[['Title', 'Release Date', 'Hours Viewed']]

Unnamed: 0,Title,Release Date,Hours Viewed
18,The Diplomat: Season 1,2023-04-20,214100000.0
31,The Marked Heart: Season 2 // Pálpito: Tempora...,2023-04-19,174300000.0
48,The Good Bad Mother: Limited Series // 나쁜엄마: 리...,2023-04-26,148600000.0
84,Welcome to Eden: Season 2 // Bienvenidos a Edé...,2023-04-21,104500000.0
161,Rough Diamonds: Season 1,2023-04-21,72800000.0
...,...,...,...
21367,"Ada Twist, Scientist: Season 4",2023-04-22,2800000.0
21759,Hard Broken: Limited Series // كسرة قلب: Limit...,2023-09-20,1900000.0
22170,Love After Music: Season 1 // El amor después ...,2023-04-26,1600000.0
22828,Tooth Pari: When Love Bites: Limited Series //...,2023-04-20,1200000.0
