NameError: name 'employee_analysis_results' is not defined

In [2]:
import shap
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression  # Or your model
import numpy as np  # Ensure numpy is imported

# --- Load and Preprocess Data (Use your function) --

def load_and_preprocess_data(file_path):
    df = pd.read_csv(file_path)
    df['Attrition'] = df['Attrition'].map({'No': 0, 'Yes': 1})
    categorical_cols = [col for col in df.columns if df[col].dtype == 'object']
    df = pd.get_dummies(df, columns=categorical_cols, drop_first=True)
    X = df.drop('Attrition', axis=1)
    y = df['Attrition']
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    for col in X_train.columns:
        if isinstance(X_train[col], pd.Series):  # Add this check
            X_train[col] = pd.to_numeric(X_train[col], errors='coerce').fillna(0)
        else:
            print(f"WARNING: Column '{col}' in X_train is not a Series. Skipping to_numeric.")
        if isinstance(X_test[col], pd.Series):   # And this check
            X_test[col] = pd.to_numeric(X_test[col], errors='coerce').fillna(0)
        else:
            print(f"WARNING: Column '{col}' in X_test is not a Series. Skipping to_numeric.")
    bool_cols = X_train.select_dtypes(include='bool').columns
    X_train[bool_cols] = X_train[bool_cols].astype(int)
    X_test[bool_cols] = X_test[bool_cols].astype(int)
    return X_train, X_test, y_train, y_test

file_path = "/content/drive/MyDrive/HR-Employee-Attrition.csv"  # Replace with your actual file path
X_train, X_test, y_train, y_test = load_and_preprocess_data(file_path)

# --- Train Your Model (Use your model) ---
model = LogisticRegression(random_state=42, solver='liblinear')  # Or RandomForest
model.fit(X_train, y_train)

# --- Create SHAP Explainer ---
explainer = shap.LinearExplainer(model, X_train)  # Or TreeExplainer

# --- Employee Selection ---
# Select employees for analysis (modify this selection logic)
num_employees_to_analyze = 5  # Adjust as needed

# 1. Get predicted probabilities to help with selection
predicted_probabilities = model.predict_proba(X_test)[:, 1]  # Prob of attrition

# 2. Select diverse employees
import numpy as np

# Sort employees by predicted probability
indices_sorted = np.argsort(predicted_probabilities)

# Select a mix (e.g., lowest, highest, and middle)
selected_indices = np.concatenate([
    indices_sorted[:num_employees_to_analyze // 3],  # Lowest risk
    indices_sorted[num_employees_to_analyze // 3: 2 * num_employees_to_analyze // 3], # Middle risk
    indices_sorted[- (num_employees_to_analyze - 2 * num_employees_to_analyze // 3):] # Highest risk
])

selected_employees = X_test.iloc[selected_indices]

# --- Employee Analysis ---

employee_analysis_results = []  # This is where the list is initialized

for i in range(len(selected_employees)):
    employee_data = selected_employees.iloc[[i]]
    shap_values_individual = explainer.shap_values(employee_data)

    # Generate force plot (and save it)
    shap.force_plot(explainer.expected_value, shap_values_individual, employee_data, show=False, matplotlib=True)
    plt.savefig(f"shap_employee_{selected_indices[i]}.png")  # Unique filename
    plt.close()

    # --- Analyze and Document ---
    analysis = {
        "Employee Index": selected_indices[i],
        "Predicted Attrition Probability": predicted_probabilities[selected_indices[i]],
        "Factors Increasing Risk": [],
        "Factors Decreasing Risk": [],
        "Summary": ""
    }

    # Get feature names
    feature_names = X_test.columns

    # Get SHAP values for this employee
    shap_values = shap_values_individual  # This is a 2D array

    # Get feature values for this employee
    feature_values = employee_data.values.flatten()

    # Sort features by absolute SHAP value (most important first)
    abs_shap_values = np.abs(shap_values)
    important_feature_indices = np.argsort(abs_shap_values)[::-1]

    # Analyze top contributing features (adjust the number as needed)
    num_top_features = 5
    for idx in important_feature_indices[:num_top_features]:  # Correct loop
        print("\n--- Debugging Information ---")
        print(f"  - Employee Index: {selected_indices[i]}")
        print(f"  - Feature Index (idx): {idx}")
        print(f"  - shap_values shape: {shap_values.shape}")
        print(f"  - feature_names shape: {feature_names.shape}")
        print(f"  - feature_values shape: {feature_values.shape}")

        feature_name = feature_names[idx]
        feature_value = feature_values[idx]

        # --- ULTIMATE SCALAR CONVERSION ---
        try:
            feature_value = feature_value.item()
        except (ValueError, AttributeError):
            try:
                feature_value = float(feature_value)
            except (TypeError, ValueError):
                try:
                    feature_value = float(str(feature_value))
                except (TypeError, ValueError):
                    print(f"  - WARNING: Could not convert feature value for '{feature_name}' to float. Using NaN.")
                    feature_value = np.nan

        sv = shap_values[0, idx]
        print(f"  - type(sv): {type(sv)}")
        if isinstance(sv, np.ndarray):
            print(f"  - sv shape: {sv.shape}")
            print(f"  - sv dtype: {sv.dtype}")
        else:
            print(f"  - sv value: {sv}")

        try:
            shap_value = float(shap_values[0, idx].item())  # Corrected and robust
        except ValueError as e:
            print(f"  - ValueError: {e}")
            shap_value = np.nan  # Or some other default value

        print(f"  - feature_name: {feature_name}")
        print(f"  - feature_value: {feature_value}")
        print(f"  - shap_value: {shap_value}")

        if shap_value > 0:
            analysis["Factors Increasing Risk"].append(
                f"Feature: {feature_name} - Value: {feature_value:.2f}, SHAP: {shap_value:.2f}"
            )
        else:
            analysis["Factors Decreasing Risk"].append(
                f"Feature: {feature_name} - Value: {feature_value:.2f}, SHAP: {shap_value:.2f}"
            )

    employee_analysis_results.append(analysis)

    # Print Analysis (for your immediate review)
    print(f"\n--- Employee {selected_indices[i]} Analysis ---")
    for key, value in analysis.items():
        if key not in ["Factors Increasing Risk", "Factors Decreasing Risk"]:
            print(f"  {key}: {value}")
        else:
            print(f"  {key}:")
            for item in value:
                print(f"    - {item}")

# --- Overall Pattern Synthesis ---
risk_factor_counts = {}
protective_factor_counts = {}

for employee_analysis in employee_analysis_results:
    for risk_factor_string in employee_analysis.get("Factors Increasing Risk", []):
        # Extract the feature name (part before " - ")
        feature_name = risk_factor_string.split(" - ")[0].replace("Feature: ", "")
        risk_factor_counts[feature_name] = risk_factor_counts.get(feature_name, 0) + 1

    for protective_factor_string in employee_analysis.get("Factors Decreasing Risk", []):
        # Extract the feature name
        feature_name = protective_factor_string.split(" - ")[0].replace("Feature: ", "")
        protective_factor_counts[feature_name] = protective_factor_counts.get(feature_name, 0) + 1

print("\n--- Overall Risk Factor Counts ---")
for feature, count in sorted(risk_factor_counts.items(), key=lambda item: item[1], reverse=True):
    print(f"{feature}: {count}")

print("\n--- Overall Protective Factor Counts ---")
for feature, count in sorted(protective_factor_counts.items(), key=lambda item: item[1], reverse=True):
    print(f"{feature}: {count}")

# --- PDF Outline (Skeleton - You'll need to add PDF generation code) ---
print("\n--- PDF Outline ---")
print("  Executive Summary: (To be written)")
print("  Analysis Summary: (Draft paragraph)")
print("  Actionable Recommendations: (Initial list of actions based on counts above)")


--- Debugging Information ---
  - Employee Index: 80
  - Feature Index (idx): [ 4 17 33 40 39 36 15 31 38  7 42  5 34 43 27 44 12 41 22  3 37 14 13  2
 30 29 18 16 10  1  8 21 28 32 26 35 45 24 46  0 20 25  6 23  9 11 19]
  - shap_values shape: (1, 47)
  - feature_names shape: (47,)
  - feature_values shape: (47,)
       'JobRole_Research Director', 'JobRole_Manufacturing Director',
       'JobRole_Human Resources', 'PerformanceRating',
       'EducationField_Marketing', 'JobRole_Manager', 'HourlyRate',
       'JobRole_Sales Executive', 'EmployeeNumber',
       'EducationField_Technical Degree', 'JobRole_Sales Representative',
       'BusinessTravel_Travel_Rarely', 'MaritalStatus_Married', 'MonthlyRate',
       'JobRole_Research Scientist', 'YearsAtCompany', 'Education',
       'JobRole_Laboratory Technician', 'PercentSalaryHike',
       'NumCompaniesWorked', 'DistanceFromHome',
       'EducationField_Life Sciences', 'Department_Sales', 'StockOptionLevel',
       'RelationshipSatisfac

In [3]:
num_top_features = 5
    for i_top in range(min(num_top_features, len(important_feature_indices))):
        idx = important_feature_indices[i_top]
        print("\n--- Debugging Information ---")
        print(f"  - Employee Index: {selected_indices[i]}")
        print(f"  - Top Feature Index (i_top): {i_top}")
        print(f"  - Original Feature Index (idx): {idx}")
        print(f"  - shap_values shape: {shap_values.shape}")
        print(f"  - feature_names shape: {feature_names.shape}")
        print(f"  - feature_values shape: {feature_values.shape}")

        feature_name = feature_names[idx]
        feature_value = feature_values[idx]

        # --- ULTIMATE SCALAR CONVERSION ---
        try:
            feature_value = feature_value.item()
        except (ValueError, AttributeError):
            try:
                feature_value = float(feature_value)
            except (TypeError, ValueError):
                try:
                    feature_value = float(str(feature_value))
                except (TypeError, ValueError):
                    print(f"  - WARNING: Could not convert feature value for '{feature_name}' to float. Using NaN.")
                    feature_value = np.nan

        sv = shap_values[0, idx]
        print(f"  - type(sv): {type(sv)}")
        if isinstance(sv, np.ndarray):
            print(f"  - sv shape: {sv.shape}")
            print(f"  - sv dtype: {sv.dtype}")
        else:
            print(f"  - sv value: {sv}")

        try:
            shap_value = float(shap_values[0, idx].item())  # Corrected and robust
        except ValueError as e:
            print(f"  - ValueError: {e}")
            shap_value = np.nan  # Or some other default value

        print(f"  - feature_name: {feature_name}")
        print(f"  - feature_value: {feature_value}")
        print(f"  - shap_value: {shap_value}")

        if shap_value > 0:
            analysis["Factors Increasing Risk"].append(
                f"Feature: {feature_name} - Value: {feature_value:.2f}, SHAP: {shap_value:.2f}"
            )
        else:
            analysis["Factors Decreasing Risk"].append(
                f"Feature: {feature_name} - Value: {feature_value:.2f}, SHAP: {shap_value:.2f}"
            )

IndentationError: unexpected indent (<ipython-input-3-5ac1f9a69bce>, line 2)

In [5]:
num_top_features = 5
 for i_top in range(min(num_top_features, len(important_feature_indices))):
        idx = important_feature_indices[i_top]
        print("\n--- Debugging Information ---")
        print(f"  - Employee Index: {selected_indices[i]}")
        print(f"  - Top Feature Index (i_top): {i_top}")
        print(f"  - Original Feature Index (idx): {idx}")
        print(f"  - shap_values shape: {shap_values.shape}")
        print(f"  - feature_names shape: {feature_names.shape}")
        print(f"  - feature_values shape: {feature_values.shape}")

        feature_name = feature_names[idx]
        feature_value = feature_values[idx]

        # --- ULTIMATE SCALAR CONVERSION ---
        try:
            feature_value = feature_value.item()
        except (ValueError, AttributeError):
            try:
                feature_value = float(feature_value)
            except (TypeError, ValueError):
                try:
                    feature_value = float(str(feature_value))
                except (TypeError, ValueError):
                    print(f"  - WARNING: Could not convert feature value for '{feature_name}' to float. Using NaN.")
                    feature_value = np.nan

        sv = shap_values[0, idx]
        print(f"  - type(sv): {type(sv)}")
        if isinstance(sv, np.ndarray):
            print(f"  - sv shape: {sv.shape}")
            print(f"  - sv dtype: {sv.dtype}")
        else:
            print(f"  - sv value: {sv}")

        try:
            shap_value = float(shap_values[0, idx].item())  # Corrected and robust
        except ValueError as e:
            print(f"  - ValueError: {e}")
            shap_value = np.nan  # Or some other default value

        print(f"  - feature_name: {feature_name}")
        print(f"  - feature_value: {feature_value}")
        print(f"  - shap_value: {shap_value}")

        if shap_value > 0:
            analysis["Factors Increasing Risk"].append(
                f"Feature: {feature_name} - Value: {feature_value:.2f}, SHAP: {shap_value:.2f}"
            )
        else:
            analysis["Factors Decreasing Risk"].append(
                f"Feature: {feature_name} - Value: {feature_value:.2f}, SHAP: {shap_value:.2f}"
            )

IndentationError: unexpected indent (<ipython-input-5-840d15f025fd>, line 2)

In [6]:
for i in range(len(selected_employees)):
    employee_data = selected_employees.iloc[[i]]
    shap_values_individual = explainer.shap_values(employee_data)

    # ... (code for force plot) ...

    analysis = { ... }
    feature_names = X_test.columns
    shap_values = shap_values_individual
    feature_values = employee_data.values.flatten()
    abs_shap_values = np.abs(shap_values)
    important_feature_indices = np.argsort(abs_shap_values)[::-1]

    num_top_features = 5
    for i_top in range(min(num_top_features, len(important_feature_indices))):
        idx = important_feature_indices[i_top]
        print("\n--- Debugging Information ---")
        # ... (all the print statements and logic for analyzing top features) ...
        if shap_value > 0:
            analysis["Factors Increasing Risk"].append(...)
        else:
            analysis["Factors Decreasing Risk"].append(...)

    employee_analysis_results.append(analysis)
    print(f"\n--- Employee {selected_indices[i]} Analysis ---")
    # ... (printing of employee analysis) ...

# --- Overall Pattern Synthesis ---
# ... (rest of the code for counting risk and protective factors) ...


--- Debugging Information ---


TypeError: 'set' object is not subscriptable

In [7]:
analysis = {
        "Employee Index": selected_indices[i],
        "Predicted Attrition Probability": predicted_probabilities[selected_indices[i]],
        "Factors Increasing Risk": [],
        "Factors Decreasing Risk": [],
        "Summary": ""
    }

    # ... (rest of the code) ...

    if shap_value > 0:
        analysis["Factors Increasing Risk"].append(
            f"Feature: {feature_name} - Value: {feature_value:.2f}, SHAP: {shap_value:.2f}"
        )
    else:
        analysis["Factors Decreasing Risk"].append(
            f"Feature: {feature_name} - Value: {feature_value:.2f}, SHAP: {shap_value:.2f}"
        )

IndentationError: unexpected indent (<ipython-input-7-fad0102595ed>, line 11)

In [9]:
if shap_value > 0:
            analysis["Factors Increasing Risk"].append(
                f"Feature: {feature_name} - Value: {feature_value:.2f}, SHAP: {shap_value:.2f}"
            )
        else:
            print(f"\n--- Debugging Type Check ---")
            print(f"Type of analysis['Factors Decreasing Risk']: {type(analysis.get('Factors Decreasing Risk'))}")
            analysis["Factors Decreasing Risk"].append(
                f"Feature: {feature_name} - Value: {feature_value:.2f}, SHAP: {shap_value:.2f}"
            )

IndentationError: unindent does not match any outer indentation level (<tokenize>, line 5)

In [10]:
num_top_features = 5
    for i_top in range(min(num_top_features, len(important_feature_indices))):
        idx = important_feature_indices[i_top]
        print("\n--- Debugging Information ---")
        print(f"  - Employee Index: {selected_indices[i]}")
        print(f"  - Top Feature Index (i_top): {i_top}")
        print(f"  - Original Feature Index (idx): {idx}")
        print(f"  - shap_values shape: {shap_values.shape}")
        print(f"  - feature_names shape: {feature_names.shape}")
        print(f"  - feature_values shape: {feature_values.shape}")

        feature_name = feature_names[idx]
        feature_value = feature_values[idx]

        # --- ULTIMATE SCALAR CONVERSION ---
        try:
            feature_value = feature_value.item()
        except (ValueError, AttributeError):
            try:
                feature_value = float(feature_value)
            except (TypeError, ValueError):
                try:
                    feature_value = float(str(feature_value))
                except (TypeError, ValueError):
                    print(f"  - WARNING: Could not convert feature value for '{feature_name}' to float. Using NaN.")
                    feature_value = np.nan

        sv = shap_values[0, idx]
        print(f"  - type(sv): {type(sv)}")
        if isinstance(sv, np.ndarray):
            print(f"  - sv shape: {sv.shape}")
            print(f"  - sv dtype: {sv.dtype}")
        else:
            print(f"  - sv value: {sv}")

        try:
            shap_value = float(shap_values[0, idx].item())  # Corrected and robust
        except ValueError as e:
            print(f"  - ValueError: {e}")
            shap_value = np.nan  # Or some other default value

        print(f"  - feature_name: {feature_name}")
        print(f"  - feature_value: {feature_value}")
        print(f"  - shap_value: {shap_value}")

        if shap_value > 0:  # Correct indentation
            analysis["Factors Increasing Risk"].append(
                f"Feature: {feature_name} - Value: {feature_value:.2f}, SHAP: {shap_value:.2f}"
            )
        else:  # Correct indentation
            print(f"\n--- Debugging Type Check ---")
            print(f"Type of analysis['Factors Decreasing Risk']: {type(analysis.get('Factors Decreasing Risk'))}")
            analysis["Factors Decreasing Risk"].append(
                f"Feature: {feature_name} - Value: {feature_value:.2f}, SHAP: {shap_value:.2f}"
            )

IndentationError: unexpected indent (<ipython-input-10-a45ebd004b6b>, line 2)

In [11]:
for i in range(len(selected_employees)):
    employee_data = selected_employees.iloc[[i]]
    shap_values_individual = explainer.shap_values(employee_data)

    # Generate force plot (and save it)
    shap.force_plot(explainer.expected_value, shap_values_individual, employee_data, show=False, matplotlib=True)
    plt.savefig(f"shap_employee_{selected_indices[i]}.png")  # Unique filename
    plt.close()

    # --- Analyze and Document ---
    analysis = {
        "Employee Index": selected_indices[i],
        "Predicted Attrition Probability": predicted_probabilities[selected_indices[i]],
        "Factors Increasing Risk": [],  # Explicitly re-initialize as a list
        "Factors Decreasing Risk": [],  # Explicitly re-initialize as a list
        "Summary": ""
    }

    # Get feature names
    feature_names = X_test.columns

    # Get SHAP values for this employee
    shap_values = shap_values_individual  # This is a 2D array

    # Get feature values for this employee
    feature_values = employee_data.values.flatten()

    # Sort features by absolute SHAP value (most important first)
    abs_shap_values = np.abs(shap_values)
    important_feature_indices = np.argsort(abs_shap_values)[::-1]

    # Analyze top contributing features (adjust the number as needed)
    num_top_features = 5
    for i_top in range(min(num_top_features, len(important_feature_indices))):
        idx = important_feature_indices[i_top]
        print("\n--- Debugging Information ---")
        print(f"  - Employee Index: {selected_indices[i]}")
        print(f"  - Top Feature Index (i_top): {i_top}")
        print(f"  - Original Feature Index (idx): {idx}")
        print(f"  - shap_values shape: {shap_values.shape}")
        print(f"  - feature_names shape: {feature_names.shape}")
        print(f"  - feature_values shape: {feature_values.shape}")

        feature_name = feature_names[idx]
        feature_value = feature_values[idx]

        # --- ULTIMATE SCALAR CONVERSION ---
        try:
            feature_value = feature_value.item()
        except (ValueError, AttributeError):
            try:
                feature_value = float(feature_value)
            except (TypeError, ValueError):
                try:
                    feature_value = float(str(feature_value))
                except (TypeError, ValueError):
                    print(f"  - WARNING: Could not convert feature value for '{feature_name}' to float. Using NaN.")
                    feature_value = np.nan

        sv = shap_values[0, idx]
        print(f"  - type(sv): {type(sv)}")
        if isinstance(sv, np.ndarray):
            print(f"  - sv shape: {sv.shape}")
            print(f"  - sv dtype: {sv.dtype}")
        else:
            print(f"  - sv value: {sv}")

        try:
            shap_value = float(shap_values[0, idx].item())  # Corrected and robust
        except ValueError as e:
            print(f"  - ValueError: {e}")
            shap_value = np.nan  # Or some other default value

        print(f"  - feature_name: {feature_name}")
        print(f"  - feature_value: {feature_value}")
        print(f"  - shap_value: {shap_value}")

        if shap_value > 0:
            analysis["Factors Increasing Risk"].append(
                f"Feature: {feature_name} - Value: {feature_value:.2f}, SHAP: {shap_value:.2f}"
            )
        else:
            print(f"\n--- Debugging Type Check ---")
            print(f"Type of analysis['Factors Decreasing Risk']: {type(analysis.get('Factors Decreasing Risk'))}")
            analysis["Factors Decreasing Risk"].append(
                f"Feature: {feature_name} - Value: {feature_value:.2f}, SHAP: {shap_value:.2f}"
            )

    employee_analysis_results.append(analysis)

    # Print Analysis (for your immediate review)
    print(f"\n--- Employee {selected_indices[i]} Analysis ---")
    for key, value in analysis.items():
        if key not in ["Factors Increasing Risk", "Factors Decreasing Risk"]:
            print(f"  {key}: {value}")
        else:
            print(f"  {key}:")
            for item in value:
                print(f"    - {item}")

# --- Overall Pattern Synthesis ---
risk_factor_counts = {}
protective_factor_counts = {}

for employee_analysis in employee_analysis_results:
    for risk_factor_string in employee_analysis.get("Factors Increasing Risk", []):
        # Extract the feature name (part before " - ")
        feature_name = risk_factor_string.split(" - ")[0].replace("Feature: ", "")
        risk_factor_counts[feature_name] = risk_factor_counts.get(feature_name, 0) + 1

    for protective_factor_string in employee_analysis.get("Factors Decreasing Risk", []):
        # Extract the feature name
        feature_name = protective_factor_string.split(" - ")[0].replace("Feature: ", "")
        protective_factor_counts[feature_name] = protective_factor_counts.get(feature_name, 0) + 1

print("\n--- Overall Risk Factor Counts ---")
for feature, count in sorted(risk_factor_counts.items(), key=lambda item: item[1], reverse=True):
    print(f"{feature}: {count}")

print("\n--- Overall Protective Factor Counts ---")
for feature, count in sorted(protective_factor_counts.items(), key=lambda item: item[1], reverse=True):
    print(f"{feature}: {count}")

# --- PDF Outline (Skeleton - You'll need to add PDF generation code) ---
print("\n--- PDF Outline ---")
print("  Executive Summary: (To be written)")
print("  Analysis Summary: (Draft paragraph)")
print("  Actionable Recommendations: (Initial list of actions based on counts above)")


--- Debugging Information ---
  - Employee Index: 80
  - Top Feature Index (i_top): 0
  - Original Feature Index (idx): [ 4 17 33 40 39 36 15 31 38  7 42  5 34 43 27 44 12 41 22  3 37 14 13  2
 30 29 18 16 10  1  8 21 28 32 26 35 45 24 46  0 20 25  6 23  9 11 19]
  - shap_values shape: (1, 47)
  - feature_names shape: (47,)
  - feature_values shape: (47,)
       'JobRole_Research Director', 'JobRole_Manufacturing Director',
       'JobRole_Human Resources', 'PerformanceRating',
       'EducationField_Marketing', 'JobRole_Manager', 'HourlyRate',
       'JobRole_Sales Executive', 'EmployeeNumber',
       'EducationField_Technical Degree', 'JobRole_Sales Representative',
       'BusinessTravel_Travel_Rarely', 'MaritalStatus_Married', 'MonthlyRate',
       'JobRole_Research Scientist', 'YearsAtCompany', 'Education',
       'JobRole_Laboratory Technician', 'PercentSalaryHike',
       'NumCompaniesWorked', 'DistanceFromHome',
       'EducationField_Life Sciences', 'Department_Sales', 'Stock

In [13]:
pip install reportlab

Collecting reportlab
  Downloading reportlab-4.4.1-py3-none-any.whl.metadata (1.8 kB)
Downloading reportlab-4.4.1-py3-none-any.whl (2.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m18.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: reportlab
Successfully installed reportlab-4.4.1


In [14]:
from reportlab.lib.pagesizes import letter
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
from reportlab.lib.styles import getSampleStyleSheet
from reportlab.lib.units import inch

def generate_attrition_report(filename="attrition_prevention_report.pdf"):
    doc = SimpleDocTemplate(filename, pagesize=letter)
    styles = getSampleStyleSheet()
    story = []

    # Title
    title = Paragraph("Employee Attrition Prevention Suggestions", styles['h1'])
    story.append(title)
    story.append(Spacer(1, 0.2*inch))

    # Executive Summary
    summary_heading = Paragraph("Executive Summary", styles['h2'])
    story.append(summary_heading)
    summary_text = "This report outlines key strategies to proactively address and reduce employee attrition within the organization. The recommendations focus on creating a more attractive and supportive work environment by emphasizing competitive compensation and benefits, fostering a healthy work-life balance, enhancing job satisfaction, providing opportunities for career development, nurturing a positive company culture, and ensuring a comfortable work environment."
    summary = Paragraph(summary_text, styles['Normal'])
    story.append(summary)
    story.append(Spacer(1, 0.2*inch))

    # Introduction
    intro_heading = Paragraph("Introduction", styles['h2'])
    story.append(intro_heading)
    intro_text = "Employee attrition poses significant challenges to organizations, including increased recruitment and training costs, loss of valuable knowledge and experience, and potential negative impacts on team morale and productivity. This report provides actionable recommendations, drawing upon common drivers of attrition and considering typical factors within an HR dataset, to help [Your Company Name] in Hyderabad build a more engaged and stable workforce."
    intro = Paragraph(intro_text, styles['Normal'])
    story.append(intro)
    story.append(Spacer(1, 0.2*inch))

    # Attrition Prevention Suggestions
    suggestions_heading = Paragraph("Attrition Prevention Suggestions", styles['h2'])
    story.append(suggestions_heading)
    story.append(Spacer(1, 0.1*inch))

    # Compensation and Benefits
    comp_heading = Paragraph("1. Compensation and Benefits", styles['h3'])
    story.append(comp_heading)
    comp_suggestion_1 = Paragraph("<b>1.1 Proactive Review and Enhancement:</b> Regularly conduct market research specific to Hyderabad and the tech industry to benchmark salaries for various roles and experience levels within your company. Ensure that your compensation packages are at least competitive, if not slightly above average, to attract and retain talent. Beyond base salary, explore enhancing benefits such as comprehensive health insurance that includes mental wellness support, flexible retirement plans with employer contributions that vest reasonably quickly, and performance-based bonuses or profit-sharing schemes that reward employee contributions to the company's success. Consider non-monetary benefits like subsidized gym memberships, wellness programs, or childcare assistance.", styles['Normal'])
    story.append(comp_suggestion_1)
    story.append(Spacer(1, 0.1*inch))

    # Work-Life Balance
    wlb_heading = Paragraph("2. Work-Life Balance", styles['h3'])
    story.append(wlb_heading)
    wlb_suggestion_1 = Paragraph("<b>2.1 Fostering a Sustainable Work Culture:</b> Implement clear guidelines around working hours and actively discourage excessive overtime. Utilize project management tools to distribute workload effectively and realistically. Promote the use of vacation time by setting minimum vacation days or offering incentives for taking time off. Explore and implement flexible work arrangements such as remote work options (full-time, hybrid), compressed workweeks, or flextime policies where job roles allow. This can significantly improve employee morale and reduce stress associated with rigid schedules and long commutes, especially in a city like Hyderabad with its traffic challenges.", styles['Normal'])
    story.append(wlb_suggestion_1)
    story.append(Spacer(1, 0.1*inch))

    # Job Satisfaction
    js_heading = Paragraph("3. Job Satisfaction", styles['h3'])
    story.append(js_heading)
    js_suggestion_1 = Paragraph("<b>3.1 Cultivating Engagement and Purpose:</b> Conduct regular, anonymous employee engagement surveys to gather honest feedback on job satisfaction, role clarity, workload, and relationships with managers and colleagues. Act on the feedback received and communicate the changes made as a result. Ensure that job roles are well-defined, provide employees with a sense of ownership and autonomy, and offer opportunities to utilize their skills and contribute meaningfully to the company's goals. Foster a culture of recognition through both formal programs (e.g., employee of the month) and informal appreciation (e.g., team shout-outs, thank-you notes). Invest in team-building activities to strengthen relationships and create a more cohesive work environment.", styles['Normal'])
    story.append(js_suggestion_1)
    story.append(Spacer(1, 0.1*inch))

    # Career Development
    cd_heading = Paragraph("4. Career Development", styles['h3'])
    story.append(cd_heading)
    cd_suggestion_1 = Paragraph("<b>4.1 Investing in Employee Growth:</b> Create transparent career paths for different roles within the organization, outlining the skills, experience, and training required for advancement. Offer a variety of learning and development opportunities, including technical training, leadership workshops, mentorship programs pairing junior and senior employees, and tuition reimbursement for relevant external courses. Regularly discuss career goals with employees during performance reviews and work collaboratively to create development plans. Prioritize internal candidates for promotions to demonstrate the company's commitment to employee growth and provide clear incentives for staying with the organization long-term.", styles['Normal'])
    story.append(cd_suggestion_1)
    story.append(Spacer(1, 0.1*inch))

    # Company Culture and Management
    cc_heading = Paragraph("5. Company Culture and Management", styles['h3'])
    story.append(cc_heading)
    cc_suggestion_1 = Paragraph("<b>5.1 Fostering a Positive and Supportive Environment:</b> Promote a culture of open and honest communication between employees and management. Provide leadership training to managers to equip them with the skills to effectively lead, motivate, and support their teams. Establish clear and fair policies and procedures, and ensure consistent application across the organization. Promote a sense of community and belonging through team-building activities and social events.", styles['Normal'])
    story.append(cc_suggestion_1)
    story.append(Spacer(1, 0.1*inch))

    # Work Environment
    we_heading = Paragraph("6. Work Environment", styles['h3'])
    story.append(we_heading)
    we_suggestion_1 = Paragraph("<b>6.1 Ensuring a Comfortable and Productive Workspace:</b> Ensure a comfortable and productive physical workspace. If 'DistanceFromHome' is a significant factor, explore options such as transportation assistance or subsidies, or support for relocation where applicable.", styles['Normal'])
    story.append(we_suggestion_1)
    story.append(Spacer(1, 0.2*inch))

    # Implementation Considerations
    impl_heading = Paragraph("Implementation Considerations", styles['h2'])
    story.append(impl_heading)
    impl_text = "To effectively reduce attrition, [Your Company Name] should adopt a data-driven approach, tracking key HR metrics to monitor the impact of implemented initiatives. A phased implementation, prioritizing high-impact and easily achievable actions, is recommended. Continuous communication with employees throughout the process is crucial for building trust and ensuring the success of these strategies."
    impl = Paragraph(impl_text, styles['Normal'])
    story.append(impl)
    story.append(Spacer(1, 0.2*inch))

    # Conclusion
    conclusion_heading = Paragraph("Conclusion", styles['h2'])
    story.append(conclusion_heading)
    conclusion_text = "Proactively addressing employee attrition is essential for the long-term success and sustainability of [Your Company Name] in Hyderabad. By prioritizing the well-being and professional growth of employees through competitive compensation, work-life balance initiatives, enhanced job satisfaction, career development opportunities, a positive company culture, and a comfortable work environment, the organization can foster a more engaged, loyal, and productive workforce."
    conclusion = Paragraph(conclusion_text, styles['Normal'])
    story.append(conclusion)

    doc.build(story)
    print(f"PDF report '{filename}' generated successfully.")

# Call the function to generate the PDF
generate_attrition_report()

PDF report 'attrition_prevention_report.pdf' generated successfully.
