In [None]:
# 08_content_explosion_ott.ipynb
# Project: The Golden Age Myth â€“ IMDb Analysis
# Author: Prateek Chandra

# This notebook demonstrates the CONTENT EXPLOSION effect,
# especially in the OTT era, and how increased volume increases
# rating variance.

In [1]:
import pandas as pd
import plotly.express as px
import plotly.io as pio
from pathlib import Path

pio.templates.default = "plotly_white"

# ---
# Cell 2: Load Feature Data
DATA_DIR = Path("../data/processed")
movies = pd.read_csv(DATA_DIR / "movies_features.csv")

# ---   
# Cell 3: Type Safety & Cleaning
movies["decade"] = pd.to_numeric(movies["decade"], errors="coerce")
movies["averageRating"] = pd.to_numeric(movies["averageRating"], errors="coerce")
movies = movies.dropna(subset=["decade", "averageRating"])
movies["decade"] = movies["decade"].astype(int)

In [2]:
# ---
# Cell 4: Titles Released per Decade
count_by_decade = movies.groupby("decade").size().reset_index(name="count")

fig = px.bar(count_by_decade, 
             x="decade", 
             y="count", 
             title="Content Explosion: Movies Released per Decade",
             labels={"decade": "Decade", "count": "Number of Movies"})
fig.show()

In [3]:
# ---
# Cell 5: Rating Variance per Decade
variance_by_decade = movies.groupby("decade")["averageRating"].var().reset_index(name="variance")

fig = px.line(variance_by_decade, 
              x="decade", 
              y="variance", 
              markers=True,
              title="Rating Variance Explosion in the OTT Era",
              labels={"decade": "Decade", "variance": "Rating Variance"})
fig.show()