In [2]:
# ------------------------------------------------------------
# 📍 0. Notebook Title & Purpose
# Employee Sentiment Analysis · Task 4: Ranking Employees
# ------------------------------------------------------------

# ------------------------------------------------------------
# 📍 1. Imports
import pandas as pd
from pathlib import Path

# ------------------------------------------------------------
# 📍 2. Load Monthly Scores
df = pd.read_csv("../data/processed/monthly_sentiment_scores.csv")
df["Month"] = pd.PeriodIndex(df["Month"], freq="M")

# ------------------------------------------------------------
# 📍 3. Define Ranking Logic
def get_rankings(group):
    top = (
        group.sort_values(by=["Score", "Employee"], ascending=[False, True])
             .head(3)
             .assign(Rank_Type="Top Positive")
    )
    bottom = (
        group.sort_values(by=["Score", "Employee"], ascending=[True, True])
             .head(3)
             .assign(Rank_Type="Top Negative")
    )
    return pd.concat([top, bottom])

# ------------------------------------------------------------
# 📍 4. Apply Ranking Per Month
rankings = df.groupby("Month", group_keys=False).apply(get_rankings).reset_index(drop=True)

# Preview
print(rankings.head(10))

# ------------------------------------------------------------
# 📍 5. Save Rankings to File
rankings.to_csv("../data/processed/monthly_employee_rankings.csv", index=False)

#get overall top positive and negative employees


                      Employee    Month  Score     Rank_Type
0      kayne.coulter@enron.com  2010-01      5  Top Positive
1     patti.thompson@enron.com  2010-01      5  Top Positive
2       don.baughman@enron.com  2010-01      4  Top Positive
3      rhonda.denton@enron.com  2010-01      0  Top Negative
4      johnny.palmer@enron.com  2010-01      1  Top Negative
5  bobette.riner@ipgdirect.com  2010-01      2  Top Negative
6  bobette.riner@ipgdirect.com  2010-02      7  Top Positive
7        john.arnold@enron.com  2010-02      7  Top Positive
8       don.baughman@enron.com  2010-02      6  Top Positive
9      lydia.delgado@enron.com  2010-02      1  Top Negative


  rankings = df.groupby("Month", group_keys=False).apply(get_rankings).reset_index(drop=True)


In [3]:
# 📍 6. Global Top‑3 Positive & Negative Employees (across months)
rankings = pd.read_csv("../data/processed/monthly_employee_rankings.csv")

# Assign +1 for Top Positive, ‑1 for Top Negative
rankings["point"] = rankings["Rank_Type"].map({"Top Positive": 1, "Top Negative": -1})

# Aggregate points across all months
overall_scores = (
    rankings.groupby("Employee")["point"]
    .sum()
    .reset_index()
    .rename(columns={"point": "Overall_Score"})
)

# Sort for global Top Positive (highest) and Top Negative (lowest)
top_global_positive = (
    overall_scores.sort_values(by=["Overall_Score", "Employee"], ascending=[False, True])
    .head(3)
    .assign(Global_Rank="Top Positive")
)

top_global_negative = (
    overall_scores.sort_values(by=["Overall_Score", "Employee"], ascending=[True, True])
    .head(3)
    .assign(Global_Rank="Top Negative")
)

global_top3 = pd.concat([top_global_positive, top_global_negative])
print(global_top3)

# ------------------------------------------------------------
# 📍 7. Save Global Rankings
global_top3.to_csv("../data/processed/global_top3_employees.csv", index=False)


                      Employee  Overall_Score   Global_Rank
6      lydia.delgado@enron.com              5  Top Positive
3        john.arnold@enron.com              4  Top Positive
2          eric.bass@enron.com              2  Top Positive
8      rhonda.denton@enron.com             -7  Top Negative
4      johnny.palmer@enron.com             -4  Top Negative
0  bobette.riner@ipgdirect.com             -3  Top Negative
