#### Practical 2: Implement a basic AI prototype using TensorFlow/PyTorch for an application and Train the model with relevant datasets and evaluate its performance.
#### Name: Pawar Sneha Sachin
#### Roll No: 24207148
#### Class: TY-A  Batch: C

#### Employee attrition (employees leaving an organization) is a major challenge for companies.The goal of this AI prototype is to predict whether an employee will leave the company or not based on HR data.

In [3]:
import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.optim as optim

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import accuracy_score, classification_report

In [4]:
data = pd.read_csv("WA_Fn-UseC_-HR-Employee-Attrition.csv")
data

Unnamed: 0.1,Unnamed: 0,Age,Attrition,BusinessTravel,DailyRate,Department,DistanceFromHome,Education,EducationField,EmployeeCount,...,YearsSinceLastPromotion,YearsWithCurrManager,DailyRateGroups,HourlyRateGroups,MonthlyIncomeGroups,MonthlyRateGroups,TotalWorkingYearsGroups,YearsAtCompanyGroups,AgeGroups,YearsBeforeCompanyGroups
0,0,41,Yes,Travel_Rarely,1102,Sales,1,College,Life Sciences,1,...,0,5,K - 1100 => 1200,I - 90 => 100,E - 5000 => 6000,S - 19000 => 20000,D - 6 => 10 Years,D - 6 => 10 Years,E - 41 => 50 Years,B - 2 => 3 Years
1,1,49,No,Travel_Frequently,279,Research & Development,8,Below College,Life Sciences,1,...,1,7,B - 200 => 300,F - 60 => 70,E - 5000 => 6000,X - 24000 => 25000,D - 6 => 10 Years,D - 6 => 10 Years,E - 41 => 50 Years,B - 2 => 3 Years
2,2,37,Yes,Travel_Rarely,1373,Research & Development,2,College,Other,1,...,0,0,M - 1300 => 1400,I - 90 => 100,B - 2000 => 3000,B - 2000 => 3000,D - 6 => 10 Years,A - 0 => 1 Year,D - 31 => 40 Years,B - 2 => 3 Years
3,3,33,No,Travel_Frequently,1392,Research & Development,3,Master,Life Sciences,1,...,3,0,M - 1300 => 1400,E - 50 => 60,B - 2000 => 3000,W - 23000 => 24000,D - 6 => 10 Years,D - 6 => 10 Years,D - 31 => 40 Years,B - 2 => 3 Years
4,4,27,No,Travel_Rarely,591,Research & Development,2,Below College,Medical,1,...,2,2,E - 500 => 600,C - 30 => 40,C - 3000 => 4000,P - 16000 => 17000,D - 6 => 10 Years,B - 2 => 3 Years,C - 26 => 30 Years,B - 2 => 3 Years
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1465,1465,36,No,Travel_Frequently,884,Research & Development,23,College,Medical,1,...,0,3,H - 800 => 900,D - 40 => 50,B - 2000 => 3000,L - 12000 => 13000,F - 16 => 20 Years,C - 4 => 5 Years,D - 31 => 40 Years,B - 2 => 3 Years
1466,1466,39,No,Travel_Rarely,613,Research & Development,6,Below College,Medical,1,...,1,7,F - 600 => 700,D - 40 => 50,I - 9000 => 10000,U - 21000 => 22000,D - 6 => 10 Years,D - 6 => 10 Years,D - 31 => 40 Years,B - 2 => 3 Years
1467,1467,27,No,Travel_Rarely,155,Research & Development,4,Bachelor,Life Sciences,1,...,0,3,A - 100 => 200,H - 80 => 90,F - 6000 => 7000,E - 5000 => 6000,D - 6 => 10 Years,D - 6 => 10 Years,C - 26 => 30 Years,B - 2 => 3 Years
1468,1468,49,No,Travel_Frequently,1023,Sales,2,Bachelor,Medical,1,...,0,8,J - 1000 => 1100,F - 60 => 70,E - 5000 => 6000,M - 13000 => 14000,F - 16 => 20 Years,D - 6 => 10 Years,E - 41 => 50 Years,B - 2 => 3 Years


In [5]:
# columns do not help in prediction
data.drop(["EmployeeCount",
           "EmployeeNumber",
           "Over18",
           "StandardHours"], axis=1, inplace=True)

In [6]:
# Convert categorical columns to numeric values
label_encoder = LabelEncoder()

for column in data.select_dtypes(include="object").columns:
    data[column] = label_encoder.fit_transform(data[column])

In [7]:
# input features
X = data.drop("Attrition", axis=1).values

# target output
y = data["Attrition"].values

In [8]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2,     # 80% training, 20% testing
    random_state=42
)

In [9]:
scaler = StandardScaler()

X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [10]:
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)

X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32).view(-1, 1)

In [11]:
class AttritionModel(nn.Module):
    def __init__(self, input_size):
        super(AttritionModel, self).__init__()

        # First hidden layer
        self.fc1 = nn.Linear(input_size, 64)

        # Second hidden layer
        self.fc2 = nn.Linear(64, 32)

        # Output layer
        self.fc3 = nn.Linear(32, 1)

        # Activation functions
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.sigmoid(self.fc3(x))
        return x

In [12]:
# Create model object
model = AttritionModel(X_train.shape[1])

# Binary Cross Entropy Loss
criterion = nn.BCELoss()

# Adam optimizer
optimizer = optim.Adam(model.parameters(), lr=0.001)


In [13]:
epochs = 20

for epoch in range(epochs):
    # Clear old gradients
    optimizer.zero_grad()

    # Forward pass
    outputs = model(X_train)

    # Calculate loss
    loss = criterion(outputs, y_train)

    # Backpropagation
    loss.backward()

    # Update weights
    optimizer.step()

    if (epoch + 1) % 5 == 0:
        print(f"Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}")

Epoch [5/20], Loss: 0.6033
Epoch [10/20], Loss: 0.5649
Epoch [15/20], Loss: 0.5258
Epoch [20/20], Loss: 0.4880


In [14]:
# Disable gradient calculation
with torch.no_grad():
    predictions = model(X_test)
    predicted_classes = (predictions > 0.5).int()

# Accuracy
accuracy = accuracy_score(y_test, predicted_classes)

print("Model Accuracy:", accuracy)
print("\nClassification Report:\n")
print(classification_report(y_test, predicted_classes))

Model Accuracy: 0.8673469387755102

Classification Report:

              precision    recall  f1-score   support

         0.0       0.87      1.00      0.93       255
         1.0       0.00      0.00      0.00        39

    accuracy                           0.87       294
   macro avg       0.43      0.50      0.46       294
weighted avg       0.75      0.87      0.81       294



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [27]:
# Predict attrition for all employees
with torch.no_grad():
    all_predictions = model(torch.tensor(
        scaler.transform(X), dtype=torch.float32
    ))
# 1 = Leaving, 0 = Not Leaving
all_predictions_class = (all_predictions > 0.5).int().numpy()

In [23]:
data["Prediction"] = all_predictions_class

data["Prediction"] = data["Prediction"].map({
    1: "Employee Will Leave",
    0: "Employee Will Not Leave"
})

In [25]:
for index, row in data.iterrows():
    print(f"Employee {index + 1}: {row['Prediction']}")

Employee 1: Employee Will Not Leave
Employee 2: Employee Will Not Leave
Employee 3: Employee Will Not Leave
Employee 4: Employee Will Not Leave
Employee 5: Employee Will Not Leave
Employee 6: Employee Will Not Leave
Employee 7: Employee Will Not Leave
Employee 8: Employee Will Not Leave
Employee 9: Employee Will Not Leave
Employee 10: Employee Will Not Leave
Employee 11: Employee Will Not Leave
Employee 12: Employee Will Not Leave
Employee 13: Employee Will Not Leave
Employee 14: Employee Will Not Leave
Employee 15: Employee Will Not Leave
Employee 16: Employee Will Not Leave
Employee 17: Employee Will Not Leave
Employee 18: Employee Will Not Leave
Employee 19: Employee Will Not Leave
Employee 20: Employee Will Not Leave
Employee 21: Employee Will Not Leave
Employee 22: Employee Will Not Leave
Employee 23: Employee Will Not Leave
Employee 24: Employee Will Not Leave
Employee 25: Employee Will Not Leave
Employee 26: Employee Will Not Leave
Employee 27: Employee Will Not Leave
Employee 2