Step 1: Load and Explore the Dataset

In [2]:
import pandas as pd
df = pd.read_csv('/content/HR_Engagement_Survey_Data_with_Question_Details.csv')
print(df.head())
print(df.info())
print(df.isnull().sum())

  df = pd.read_csv('/content/HR_Engagement_Survey_Data_with_Question_Details.csv')


       Year    Status   Role      Department Director Manager  Supervisor  \
0  2020 Jun  Complete  Staff  Human Services    False   False       False   
1  2020 Jun  Complete  Staff  District Court    False   False       False   
2  2020 Jun  Complete  Staff  Superior Court    False   False       False   
3  2021 May  Complete   Lead  Human Services    False   False       False   
4  2019 May   Partial    NaN             NaN    False   False       False   

    Lead  Staff                                    Question  Answer_Numeric  \
0  False   True     07. At work, my opinions seem to count.               3   
1  False   True     07. At work, my opinions seem to count.               3   
2  False   True     07. At work, my opinions seem to count.               3   
3   True  False     07. At work, my opinions seem to count.               3   
4  False  False  01. I know what is expected of me at work.               0   

  Answer_Text  
0       Agree  
1       Agree  
2       Agree 

Step 2: Data Cleaning and Preprocessing

In [3]:
df['Status'] = df['Status'].map({'Complete': 0, 'Partial': 1})
df['Lead'] = df['Lead'].fillna(False).astype(bool)
df['Staff'] = df['Staff'].fillna(False).astype(bool)
df['Supervisor'] = df['Supervisor'].astype(bool)
categorical_cols = ['Role', 'Department', 'Director', 'Manager']

for col in categorical_cols:
    df[col] = df[col].fillna('Unknown')
df = pd.get_dummies(df, columns=categorical_cols, drop_first=True)

from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
df['Answer_Numeric'] = scaler.fit_transform(df[['Answer_Numeric']])
features = ['Lead', 'Staff', 'Supervisor', 'Answer_Numeric'] + \
           [col for col in df.columns if col.startswith(tuple(categorical_cols))]

X = df[features]
y = df['Status']

X = X.copy()
for col in ['Lead', 'Staff', 'Supervisor']:
    X[col] = X[col].astype(int)

print(X.head())
print(y.head())


  df['Lead'] = df['Lead'].fillna(False).astype(bool)
  df['Staff'] = df['Staff'].fillna(False).astype(bool)


   Lead  Staff  Supervisor  Answer_Numeric  Role_Lead  Role_Manager  \
0     0      1           0            0.75      False         False   
1     0      1           0            0.75      False         False   
2     0      1           0            0.75      False         False   
3     1      0           0            0.75       True         False   
4     0      0           0            0.00      False         False   

   Role_Staff  Role_Staff Member  Role_Supervisor  Role_Unknown  ...  \
0        True              False            False         False  ...   
1        True              False            False         False  ...   
2        True              False            False         False  ...   
3       False              False            False         False  ...   
4       False              False            False          True  ...   

   Department_Parks and Recreation  Department_Planning and Public Works  \
0                            False                              

Step 3: Convert Data to PyTorch Tensors

In [4]:
import numpy as np
import torch
X_np = np.array(X.values, dtype=np.float32)
y_np = np.array(y.values, dtype=np.float32)
X_tensor = torch.from_numpy(X_np)
y_tensor = torch.tensor(y_np).unsqueeze(1)

Step 4: Create DataLoader and Split Dataset

In [5]:
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, TensorDataset
X_train, X_val, y_train, y_val = train_test_split(X_tensor, y_tensor, test_size=0.2, random_state=42)
train_dataset = TensorDataset(X_train, y_train)
val_dataset = TensorDataset(X_val, y_val)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64)


Step 5: Define the MLP Model

In [6]:
import torch.nn as nn
class MLP(nn.Module):
    def __init__(self, input_size):
        super(MLP, self).__init__()
        self.layers = nn.Sequential(
            nn.Linear(input_size, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.layers(x)

model = MLP(X.shape[1])

Step 6: Train the Model

In [7]:
import torch.optim as optim

criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

epochs = 10

for epoch in range(epochs):
    model.train()
    train_loss = 0
    for features, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(features)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        train_loss += loss.item() * features.size(0)

    train_loss /= len(train_loader.dataset)

    model.eval()
    val_loss = 0
    correct = 0
    total = 0
    with torch.no_grad():
        for features, labels in val_loader:
            outputs = model(features)
            loss = criterion(outputs, labels)
            val_loss += loss.item() * features.size(0)
            preds = (outputs >= 0.5).float()
            correct += (preds == labels).sum().item()
            total += labels.size(0)

    val_loss /= len(val_loader.dataset)
    val_acc = correct / total

    print(f"Epoch {epoch+1}/{epochs}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, Val Accuracy: {val_acc:.4f}")


Epoch 1/10, Train Loss: 0.0311, Val Loss: 0.0156, Val Accuracy: 0.9973
Epoch 2/10, Train Loss: 0.0123, Val Loss: 0.0145, Val Accuracy: 0.9973
Epoch 3/10, Train Loss: 0.0120, Val Loss: 0.0145, Val Accuracy: 0.9967
Epoch 4/10, Train Loss: 0.0116, Val Loss: 0.0136, Val Accuracy: 0.9973
Epoch 5/10, Train Loss: 0.0114, Val Loss: 0.0144, Val Accuracy: 0.9974
Epoch 6/10, Train Loss: 0.0114, Val Loss: 0.0140, Val Accuracy: 0.9974
Epoch 7/10, Train Loss: 0.0112, Val Loss: 0.0142, Val Accuracy: 0.9973
Epoch 8/10, Train Loss: 0.0111, Val Loss: 0.0130, Val Accuracy: 0.9974
Epoch 9/10, Train Loss: 0.0110, Val Loss: 0.0138, Val Accuracy: 0.9973
Epoch 10/10, Train Loss: 0.0110, Val Loss: 0.0128, Val Accuracy: 0.9975


Step 7: Evaluate with Precision, Recall, and F1-Score

In [8]:
from sklearn.metrics import precision_score, recall_score, f1_score

model.eval()
all_preds = []
all_labels = []

with torch.no_grad():
    for features, labels in val_loader:
        outputs = model(features)
        preds = (outputs >= 0.5).int().squeeze().tolist()
        all_preds.extend(preds if isinstance(preds, list) else [preds])
        all_labels.extend(labels.int().squeeze().tolist() if isinstance(labels.int().squeeze().tolist(), list) else [labels.int().squeeze().tolist()])

precision = precision_score(all_labels, all_preds)
recall = recall_score(all_labels, all_preds)
f1 = f1_score(all_labels, all_preds)

print(f"Precision: {precision:.4f}")
print(f"Recall:    {recall:.4f}")
print(f"F1-score:  {f1:.4f}")


Precision: 0.9921
Recall:    0.6614
F1-score:  0.7937
