# =========================
# Notebook 5: Streamlit Dashboard
# =========================

In [13]:
# Step 1: Import libraries
import streamlit as st
import pandas as pd
import joblib

In [14]:

# Step 2: Load trained pipeline (scaler + model together)
pipeline = joblib.load("models/attrition_pipeline.pkl")

st.set_page_config(page_title="Employee Attrition Prediction", layout="wide")

st.title("🚀 Employee Attrition Prediction with Sentiment Analysis")

st.write("""
This app predicts whether an employee is likely to leave the company 
and also provides synthetic sentiment analysis based on job satisfaction features.
""")



In [18]:
# Step 3: Sidebar input
st.sidebar.header("Enter Employee Details")

def user_input():
    age = st.sidebar.slider("Age", 18, 60, 30)
    gender = st.sidebar.selectbox("Gender", ["Male", "Female"])
    marital_status = st.sidebar.selectbox("Marital Status", ["Single", "Married", "Divorced"])
    department = st.sidebar.selectbox("Department", ["HR", "IT", "Sales", "Marketing"])
    job_role = st.sidebar.selectbox("Job Role", ["Manager", "Analyst", "Executive", "Engineer", "Consultant"])
    job_level = st.sidebar.slider("Job Level", 1, 5, 2)
    monthly_income = st.sidebar.number_input("Monthly Income", 1000, 50000, 5000)
    overtime = st.sidebar.selectbox("Overtime", ["Yes", "No"])
    job_satisfaction = st.sidebar.slider("Job Satisfaction (1-5)", 1, 5, 3)
    work_life_balance = st.sidebar.slider("Work Life Balance (1-5)", 1, 5, 3)
    work_env_sat = st.sidebar.slider("Work Environment Satisfaction (1-5)", 1, 5, 3)
    relationship_manager = st.sidebar.slider("Relationship with Manager (1-5)", 1, 5, 3)

    data = {
        "Age": age,
        "Gender": gender,   # keep as string, pipeline will encode
        "Marital_Status": marital_status,
        "Department": department,
        "Job_Role": job_role,
        "Job_Level": job_level,
        "Monthly_Income": monthly_income,
        "Overtime": overtime,
        "Job_Satisfaction": job_satisfaction,
        "Work_Life_Balance": work_life_balance,
        "Work_Environment_Satisfaction": work_env_sat,
        "Relationship_with_Manager": relationship_manager,
    }
    return pd.DataFrame([data])


input_df = user_input()



In [19]:
# Step 4: Sentiment logic
input_df["sentiment_score"] = (
    input_df["JobSatisfaction"] +
    input_df["WorkLifeBalance"] +
    input_df["WorkEnvironmentSatisfaction"] +
    input_df["RelationshipWithManager"]
) / 4

def map_sentiment(score):
    if score >= 3:
        return "Positive"
    elif score == 2:
        return "Neutral"
    else:
        return "Negative"

input_df["sentiment_label"] = input_df["sentiment_score"].apply(map_sentiment)

KeyError: 'JobSatisfaction'

In [20]:
# Step 4: Sentiment logic
input_df["sentiment_score"] = (
    input_df["Job_Satisfaction"] +
    input_df["Work_Life_Balance"] +
    input_df["Work_Environment_Satisfaction"] +
    input_df["Relationship_with_Manager"]
) / 4

def map_sentiment(score):
    if score >= 3:
        return "Positive"
    elif score == 2:
        return "Neutral"
    else:
        return "Negative"

input_df["sentiment_label"] = input_df["sentiment_score"].apply(map_sentiment)


In [21]:
# Step 5: Predict attrition (pipeline does scaling + prediction)
prediction = pipeline.predict(input_df.drop(columns=["sentiment_label"]))[0]
probability = pipeline.predict_proba(input_df.drop(columns=["sentiment_label"]))[0][1]



ValueError: could not convert string to float: 'Male'