In [None]:
# ------------------------------------------------------------
# 📍 0. Notebook Title & Purpose
# Employee Sentiment Analysis · Task 5: Flight Risk Detection
# ------------------------------------------------------------

# ------------------------------------------------------------
# 📍 1. Imports
import pandas as pd
from pathlib import Path
from datetime import timedelta

# ------------------------------------------------------------
# 📍 2. Load Data
df = pd.read_csv("../data/processed/labeled_messages.csv")
df["date_parsed"] = pd.to_datetime(df["date_parsed"], errors="coerce")

# Keep only negative messages with a valid date
df_neg = df[(df["Sentiment"] == "Negative") & (df["date_parsed"].notna())]
df_neg = df_neg.sort_values(["from", "date_parsed"])

# ------------------------------------------------------------
# 📍 3. Identify Rolling 30-day Negative Message Clusters
def flag_risk(group):
    risk_dates = []
    dates = group["date_parsed"].tolist()
    for i in range(len(dates)):
        count = 1
        start = dates[i]
        for j in range(i+1, len(dates)):
            if (dates[j] - start).days <= 30:
                count += 1
            else:
                break
        if count >= 4:
            risk_dates.append(start)
    return pd.Series({"At_Risk": len(risk_dates) > 0})

risk_flags = df_neg.groupby("from").apply(flag_risk).reset_index()
risk_flags = risk_flags.rename(columns={"from": "Employee"})

# ------------------------------------------------------------
# 📍 4. Output & Save
print(risk_flags[risk_flags["At_Risk"] == True])
risk_flags.to_csv("../data/processed/flight_risk_employees.csv", index=False)
