In [None]:
# === Cell 1: imports & paths ===
import pandas as pd
import numpy as np
from pathlib import Path

DATA_DIR = Path("../data")
PATH_TOPICS = DATA_DIR / "df_corpus_with_topics.parquet"

df = pd.read_parquet(PATH_TOPICS)

# TRL final: varsa pseudo + gerçek
# (Eğer df_corpus_with_topics içinde pseudo_trl gibi kolonlar varsa burada merge edebilirsin.)
df.head()


In [None]:
# === Cell 2: zaman damgası ve temel filtre ===
df = df.copy()
df["year"] = df["year"].astype(int)
df["month"] = df["month"].astype(int)
df["date"] = pd.to_datetime(dict(year=df["year"], month=df["month"], day=1))

# sadece TRL'i bilinen kayıtlar
df_trl = df[~df["trl"].isna()].copy()
len(df_trl)


In [None]:
# === Cell 3: topic + ay bazında TRL özetleri ===
# topic-date bazında ortalama TRL ve adet
g = df_trl.groupby(["topic_id", "topic_name", "date"])

df_topic_month = g["trl"].agg(["mean", "count"]).reset_index()
df_topic_month.rename(columns={"mean": "trl_mean", "count": "n_docs"}, inplace=True)

df_topic_month.head()


In [None]:
# === Cell 4: EWMA ile TRL momentum (örnek: 6 aylık) ===
df_topic_month = df_topic_month.sort_values(["topic_id", "date"])

def add_ewma(group, span=6):
    group = group.sort_values("date")
    group[f"trl_ewma_{span}"] = group["trl_mean"].ewm(span=span, adjust=False).mean()
    group[f"n_docs_ewma_{span}"] = group["n_docs"].ewm(span=span, adjust=False).mean()
    return group

df_topic_month = (
    df_topic_month
    .groupby("topic_id", group_keys=False)
    .apply(add_ewma, span=6)
)

df_topic_month.head()


In [None]:
# === Cell 5: basit hype faz etiketi (örnek kural) ===
# Bu kısım tamamen heuristik, istersen kuralları değiştir.
def assign_hype_stage(row):
    trl = row["trl_mean"]
    n = row["n_docs"]
    trl_mom = row["trl_ewma_6"]
    vol_mom = row["n_docs_ewma_6"]

    # çok kaba kurallar
    if trl < 3 and vol_mom > 2 and trl_mom < 3:
        return "Discovery"
    if 3 <= trl < 5 and vol_mom > 3:
        return "Early_Development"
    if trl >= 5 and vol_mom > 5:
        return "Industrialization"
    if trl >= 7 and vol_mom <= 3:
        return "Maturity"
    return "Unclear"

df_topic_month["hype_stage"] = df_topic_month.apply(assign_hype_stage, axis=1)

df_topic_month.head(20)


In [None]:
# === Cell 6: örnek bir topic için grafik (opsiyonel) ===
import matplotlib.pyplot as plt

sample_topic = df_topic_month["topic_id"].dropna().unique()[0]
tmp = df_topic_month[df_topic_month["topic_id"] == sample_topic].sort_values("date")

plt.figure(figsize=(8, 4))
plt.plot(tmp["date"], tmp["trl_mean"], label="TRL mean")
plt.plot(tmp["date"], tmp["trl_ewma_6"], label="TRL EWMA(6)")
plt.legend()
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

tmp[["date", "trl_mean", "trl_ewma_6", "n_docs", "hype_stage"]].tail(12)


In [None]:
# === Cell 7: final çıktı kaydı ===
df_topic_month.to_parquet(DATA_DIR / "topic_trl_timeseries.parquet", index=False)
print("Saved:", DATA_DIR / "topic_trl_timeseries.parquet")
