In [14]:
import pandas as pd
import numpy as np
from textblob import TextBlob
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split

# 1. Prepare your historical data (for model training)
data = pd.read_csv('combined_employee_data.csv')  # columns: record_id, reason_and_factors, cessation_year

# Clean and preprocess
def extract_year(date_str):
    import re
    match = re.search(r'(\d{4})', str(date_str))
    return int(match.group(1)) if match else np.nan

data['cessation_year'] = data['cessation_year'].apply(extract_year)
data['sentiment_score'] = data['reason_and_factors'].apply(lambda x: TextBlob(str(x)).sentiment.polarity)

def label_attrition_risk(reason):
    reason = str(reason).lower()
    if 'resignation' in reason or 'ill health' in reason:
        return 1  # At risk
    else:
        return 0  # Not at risk

data['attrition_risk'] = data['reason_and_factors'].apply(label_attrition_risk)

# Encode reasons
le = LabelEncoder()
data['reason_encoded'] = le.fit_transform(data['reason_and_factors'])

# Features and target
X = data[['reason_encoded', 'cessation_year', 'sentiment_score']]
y = data['attrition_risk']

# Train/test split and model training
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=42)
clf = RandomForestClassifier(random_state=42)
clf.fit(X_train, y_train)

def predict_attrition_and_recommend(feedback_text):
    sentiment_score = TextBlob(feedback_text).sentiment.polarity
    # Rule-based risk: negative or neutral sentiment = high risk
    if sentiment_score <= 0.1:
        predicted_risk = True
        if "workload" in feedback_text or "overwhelming" in feedback_text:
            strategy = "Review workloads; promote work-life balance; offer support."
        elif "recognition" in feedback_text or "unnoticed" in feedback_text:
            strategy = "Increase recognition and manager feedback; implement regular check-ins."
        elif "career" in feedback_text or "advancement" in feedback_text:
            strategy = "Create clear career pathways; offer mentorship and growth opportunities."
        elif "communication" in feedback_text:
            strategy = "Improve leadership communication; increase transparency and consistency."
        elif "resources" in feedback_text or "technology" in feedback_text:
            strategy = "Invest in updated tools; provide resource support and training."
        else:
            strategy = "Conduct stay interviews to understand concerns."
    else:
        predicted_risk = False
        strategy = "Maintain current engagement and recognition programs."
    return {
        "feedback": feedback_text,
        "sentiment_score": round(sentiment_score, 2),
        "predicted_attrition_risk": predicted_risk,
        "engagement_strategy": strategy
    }



In [15]:
# 3. Example usage
sample_feedbacks = [
    "I feel like my hard work goes unnoticed, and my manager rarely gives feedback.",
    "I am excited about the new training programs and the support from my team.",
    "The workload is overwhelming and I often have to stay late to finish my tasks.",
    "I have great flexibility in my schedule and feel trusted by my supervisor.",
    "There are no clear opportunities for career advancement here.",
    "My colleagues are supportive and we celebrate our achievements together.",
    "The communication from leadership is inconsistent and confusing.",
    "I appreciate the recognition I receive for my contributions.",
    "I'm frustrated by the lack of resources and outdated technology.",
    "I feel valued and see a future for myself at this company."
]

for fb in sample_feedbacks:
    result = predict_attrition_and_recommend(fb)
    print(f"Feedback: {result['feedback']}\n"
          f"Sentiment score: {result['sentiment_score']}\n"
          f"Predicted Attrition Risk: {result['predicted_attrition_risk']}\n"
          f"Engagement Strategy: {result['engagement_strategy']}\n")

Feedback: I feel like my hard work goes unnoticed, and my manager rarely gives feedback.
Sentiment score: -0.06
Predicted Attrition Risk: True
Engagement Strategy: Increase recognition and manager feedback; implement regular check-ins.

Feedback: I am excited about the new training programs and the support from my team.
Sentiment score: 0.26
Predicted Attrition Risk: False
Engagement Strategy: Maintain current engagement and recognition programs.

Feedback: The workload is overwhelming and I often have to stay late to finish my tasks.
Sentiment score: 0.1
Predicted Attrition Risk: True
Engagement Strategy: Review workloads; promote work-life balance; offer support.

Feedback: I have great flexibility in my schedule and feel trusted by my supervisor.
Sentiment score: 0.8
Predicted Attrition Risk: False
Engagement Strategy: Maintain current engagement and recognition programs.

Feedback: There are no clear opportunities for career advancement here.
Sentiment score: -0.05
Predicted Attrit