In [None]:
# 07_genre_shift_analysis.ipynb
# Project: The Golden Age Myth â€“ IMDb Analysis
# Author: Prateek Chandra

# This notebook analyzes GENRE SHIFT across decades and shows
# how changing genre composition affects perceived ratings.

In [3]:
import pandas as pd
import plotly.express as px
import plotly.io as pio
from pathlib import Path

pio.templates.default = "plotly_white"

# ---
# Cell 2: Load Genre-Exploded Data
DATA_DIR = Path("../data/processed")
movies_genres = pd.read_csv(DATA_DIR / "movies_genres.csv")

# ---
# Cell 3: Type Safety & Cleaning
movies_genres["decade"] = pd.to_numeric(movies_genres["decade"], errors="coerce")
movies_genres["averageRating"] = pd.to_numeric(movies_genres["averageRating"], errors="coerce")

movies_genres = movies_genres.dropna(subset=["decade", "averageRating", "genres"])
movies_genres["decade"] = movies_genres["decade"].astype(int)

# ---
# Cell 4: Focus on Major Genres Only
TOP_GENRES = 8
top_genres = movies_genres["genres"].value_counts().head(TOP_GENRES).index
movies_genres = movies_genres[movies_genres["genres"].isin(top_genres)]

In [4]:
# ---
# Cell 5: Genre Frequency by Decade
genre_counts = (
    movies_genres
    .groupby(["decade", "genres"])
    .size()
    .reset_index(name="count")
    .sort_values("decade")
)

fig = px.bar(genre_counts, 
             x="decade", 
             y="count", 
             color="genres", 
             title="Genre Distribution by Decade (Top Genres)",
             labels={"decade": "Decade", "count": "Number of Movies", "genres": "Genre"})
fig.show()

In [5]:
# ---
# Cell 6: Average Rating per Genre Over Time
genre_rating = (
    movies_genres
    .groupby(["decade", "genres"])["averageRating"]
    .mean()
    .reset_index()
)

fig = px.line(genre_rating, 
              x="decade", 
              y="averageRating", 
              color="genres", 
              markers=True,
              title="Average Rating Trend by Genre",
              labels={"decade": "Decade", "averageRating": "Average Rating", "genres": "Genre"})
fig.show()