In [0]:
%sql
CREATE DATABASE IF NOT EXISTS gold;

In [0]:
%sql
CREATE OR REPLACE TABLE gold.monthly_trends AS
SELECT
    YEAR(creation_timestamp) AS year,
    MONTH(creation_timestamp) AS month,
    DATE_TRUNC('month', creation_timestamp) AS month_start,
    COUNT(question_id) AS total_questions,
    AVG(view_count) AS avg_views,
    AVG(answer_count) AS avg_answers,
    SUM(CASE WHEN is_answered THEN 1 ELSE 0 END) AS answered_questions
FROM silver.questions
GROUP BY
    YEAR(creation_timestamp),
    MONTH(creation_timestamp),
    DATE_TRUNC('month', creation_timestamp)
ORDER BY
    year, month;


In [0]:
%sql SELECT * FROM gold.monthly_trends LIMIT 10

In [0]:
import matplotlib.pyplot as plt

# load into spark dataframe and convert to pandas for plotting
gold_monthly_trends = spark.table("gold.monthly_trends")
pdf = gold_monthly_trends.toPandas()

pdf = pdf.sort_values("month_start")

# plot total questions over time
plt.figure(figsize=(12,6))
plt.plot(pdf["month_start"], pdf["total_questions"], marker="o", label="Total Questions")

plt.title("question trends per month")
plt.xlabel("month")
plt.ylabel("count")
plt.legend()
plt.grid(True)
plt.show()
