In [1]:
# 📦 Upload CSV file
from google.colab import files
uploaded = files.upload()

# 📚 Libraries
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
import io

# 📊 Plotly settings
pio.templates.default = "plotly_white"

# 📄 Read the uploaded file
filename = list(uploaded.keys())[0]
netflix_data = pd.read_csv(io.BytesIO(uploaded[filename]))

netflix_data.head()


Saving netflix_content_2023.csv to netflix_content_2023.csv


Unnamed: 0,Title,Available Globally?,Release Date,Hours Viewed,Language Indicator,Content Type
0,The Night Agent: Season 1,Yes,2023-03-23,812100000,English,Show
1,Ginny & Georgia: Season 2,Yes,2023-01-05,665100000,English,Show
2,The Glory: Season 1 // 더 글로리: 시즌 1,Yes,2022-12-30,622800000,Korean,Show
3,Wednesday: Season 1,Yes,2022-11-23,507700000,English,Show
4,Queen Charlotte: A Bridgerton Story,Yes,2023-05-04,503000000,English,Movie


In [2]:
# 🧼 Rename columns for easier access
netflix_data.columns = netflix_data.columns.str.strip().str.lower().str.replace(' ', '_')

# Convert 'hours_viewed' from string to float
netflix_data['hours_viewed'] = netflix_data['hours_viewed'].replace(',', '', regex=True).astype(float)

# Convert release_date to datetime
netflix_data['release_date'] = pd.to_datetime(netflix_data['release_date'], errors='coerce')
netflix_data['release_month'] = netflix_data['release_date'].dt.month
netflix_data['release_day'] = netflix_data['release_date'].dt.day_name()


In [3]:
content_type_viewership = netflix_data.groupby('content_type')['hours_viewed'].sum()

fig = go.Figure(data=[
    go.Bar(
        x=content_type_viewership.index,
        y=content_type_viewership.values,
        marker_color=['skyblue', 'salmon']
    )
])

fig.update_layout(
    title='Total Viewership Hours by Content Type (2023)',
    xaxis_title='Content Type',
    yaxis_title='Total Hours Viewed',
    height=500,
    width=800
)

fig.show()


In [4]:
language_viewership = netflix_data.groupby('language_indicator')['hours_viewed'].sum().sort_values(ascending=False)

fig = go.Figure(data=[
    go.Bar(
        x=language_viewership.index,
        y=language_viewership.values,
        marker_color='lightcoral'
    )
])

fig.update_layout(
    title='Total Viewership Hours by Language (2023)',
    xaxis_title='Language',
    yaxis_title='Total Hours Viewed',
    xaxis_tickangle=45,
    height=600,
    width=1000
)

fig.show()


In [5]:
monthly_viewership = netflix_data.groupby('release_month')['hours_viewed'].sum()

fig = go.Figure(data=[
    go.Scatter(
        x=monthly_viewership.index,
        y=monthly_viewership.values,
        mode='lines+markers',
        marker=dict(color='blue'),
        line=dict(color='blue')
    )
])

fig.update_layout(
    title='Monthly Viewership Trend (2023)',
    xaxis_title='Month',
    yaxis_title='Total Hours Viewed',
    xaxis=dict(
        tickmode='array',
        tickvals=list(range(1, 13)),
        ticktext=['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
    ),
    height=600,
    width=1000
)

fig.show()


In [6]:
top_5_titles = netflix_data.nlargest(5, 'hours_viewed')
top_5_titles[['title', 'hours_viewed', 'language_indicator', 'content_type', 'release_date']]


Unnamed: 0,title,hours_viewed,language_indicator,content_type,release_date
0,The Night Agent: Season 1,812100000.0,English,Show,2023-03-23
1,Ginny & Georgia: Season 2,665100000.0,English,Show,2023-01-05
18227,King the Land: Limited Series // 킹더랜드: 리미티드 시리즈,630200000.0,Korean,Movie,2023-06-17
2,The Glory: Season 1 // 더 글로리: 시즌 1,622800000.0,Korean,Show,2022-12-30
18214,ONE PIECE: Season 1,541900000.0,English,Show,2023-08-31


In [7]:
pivot = netflix_data.pivot_table(index='release_month', columns='content_type', values='hours_viewed', aggfunc='sum')

fig = go.Figure()

for col in pivot.columns:
    fig.add_trace(go.Scatter(x=pivot.index, y=pivot[col], mode='lines+markers', name=col))

fig.update_layout(
    title='Monthly Viewership by Content Type',
    xaxis_title='Month',
    yaxis_title='Hours Viewed',
    xaxis=dict(
        tickmode='array',
        tickvals=list(range(1, 13)),
        ticktext=['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
    ),
    height=600,
    width=1000
)

fig.show()


In [8]:
def get_season(month):
    if month in [12, 1, 2]: return 'Winter'
    elif month in [3, 4, 5]: return 'Spring'
    elif month in [6, 7, 8]: return 'Summer'
    else: return 'Fall'

netflix_data['release_season'] = netflix_data['release_month'].apply(get_season)

seasonal_viewership = netflix_data.groupby('release_season')['hours_viewed'].sum()
season_order = ['Winter', 'Spring', 'Summer', 'Fall']
seasonal_viewership = seasonal_viewership.reindex(season_order)

fig = go.Figure([
    go.Bar(x=seasonal_viewership.index, y=seasonal_viewership.values, marker_color='orange')
])

fig.update_layout(
    title='Seasonal Viewership (2023)',
    xaxis_title='Season',
    yaxis_title='Total Hours Viewed',
    height=500,
    width=800
)

fig.show()


In [9]:
monthly_releases = netflix_data['release_month'].value_counts().sort_index()
monthly_viewership = netflix_data.groupby('release_month')['hours_viewed'].sum()

fig = go.Figure()

fig.add_trace(go.Bar(
    x=monthly_releases.index,
    y=monthly_releases.values,
    name='Number of Releases',
    marker_color='goldenrod',
    opacity=0.7,
    yaxis='y1'
))

fig.add_trace(go.Scatter(
    x=monthly_viewership.index,
    y=monthly_viewership.values,
    name='Viewership Hours',
    mode='lines+markers',
    marker=dict(color='red'),
    yaxis='y2'
))

fig.update_layout(
    title='Monthly Releases vs Viewership',
    xaxis=dict(
        tickmode='array',
        tickvals=list(range(1, 13)),
        ticktext=['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
    ),
    yaxis=dict(title='Number of Releases', side='left', showgrid=False),
    yaxis2=dict(title='Hours Viewed', overlaying='y', side='right', showgrid=False),
    height=600,
    width=1000
)

fig.show()


In [10]:
weekday_releases = netflix_data['release_day'].value_counts().reindex([
    'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'
])

weekday_viewership = netflix_data.groupby('release_day')['hours_viewed'].sum().reindex([
    'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'
])

fig = go.Figure()

fig.add_trace(go.Bar(
    x=weekday_releases.index,
    y=weekday_releases.values,
    name='Number of Releases',
    marker_color='blue',
    opacity=0.6,
    yaxis='y1'
))

fig.add_trace(go.Scatter(
    x=weekday_viewership.index,
    y=weekday_viewership.values,
    name='Viewership Hours',
    mode='lines+markers',
    marker=dict(color='red'),
    yaxis='y2'
))

fig.update_layout(
    title='Weekly Release Patterns vs Viewership',
    xaxis=dict(title='Day of the Week'),
    yaxis=dict(title='Number of Releases', side='left'),
    yaxis2=dict(title='Hours Viewed', overlaying='y', side='right'),
    height=600,
    width=1000
)

fig.show()


In [11]:
important_dates = pd.to_datetime([
    '2023-01-01', '2023-02-14', '2023-07-04', '2023-10-31', '2023-12-25'
])

holiday_releases = netflix_data[netflix_data['release_date'].apply(
    lambda x: any((x - date).days in range(-3, 4) for date in important_dates)
)]

holiday_releases[['title', 'release_date', 'hours_viewed']].sort_values('hours_viewed', ascending=False).head(10)


Unnamed: 0,title,release_date,hours_viewed
2,The Glory: Season 1 // 더 글로리: 시즌 1,2022-12-30,622800000.0
6,La Reina del Sur: Season 3,2022-12-30,429600000.0
18223,The Lincoln Lawyer: Season 2,2023-07-06,292300000.0
11,Kaleidoscope: Limited Series,2023-01-01,252500000.0
29,Perfect Match: Season 1,2023-02-14,176800000.0
18263,Fatal Seduction: Season 1,2023-07-07,132100000.0
18225,All the Light We Cannot See: Limited Series,2023-11-02,129900000.0
18383,Daily Dose of Sunshine: Season 1 // 정신병동에도 아침이...,2023-11-03,115900000.0
18441,Castaway Diva: Limited Series // 무인도의 디바: 리미티드...,2023-10-28,107900000.0
18335,Selling Sunset: Season 7,2023-11-03,89600000.0
