In [24]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.preprocessing import StandardScaler

In [25]:
# Generate 1000 rows with realistic patterns
np.random.seed(7)  

data = []
# Generate 400 Best Workers (3)
for _ in range(400):
    age = np.random.randint(25, 45)
    bmi = np.random.normal(23, 2)
    spo2 = np.random.normal(97, 1.5)
    alcohol = np.random.choice(['Never', 'Occasionally'], p=[0.9, 0.1])
    satisfaction = np.random.randint(7, 11)
    data.append([age, bmi, spo2, alcohol, satisfaction, 3])

# Generate 350 Moderate Workers (2)
for _ in range(350):
    age = np.random.randint(35, 58)
    bmi = np.random.normal(26, 2.5)
    spo2 = np.random.normal(94, 2)
    alcohol = np.random.choice(['Never', 'Occasionally', 'Frequently'], p=[0.2, 0.7, 0.1])
    satisfaction = np.random.randint(4, 9)
    data.append([age, bmi, spo2, alcohol, satisfaction, 2])

# Generate 250 Limited Workers (1)
for _ in range(250):
    age = np.random.choice([
        np.random.randint(18, 25),
        np.random.randint(56, 66)
    ])
    bmi = np.random.normal(28, 4)
    spo2 = np.random.normal(91, 3)
    alcohol = np.random.choice(['Never', 'Occasionally', 'Frequently'], p=[0.1, 0.3, 0.6])
    satisfaction = np.random.randint(1, 7)
    data.append([age, bmi, spo2, alcohol, satisfaction, 1])


In [26]:
data

[[40, 21.559711137238814, 95.37299626405147, 'Occasionally', 10, 3],
 [39, 22.22906298505086, 96.66962283789448, 'Never', 7, 3],
 [32, 25.190249377272245, 97.12366780935832, 'Never', 7, 3],
 [36, 25.37415472571879, 98.8014186404763, 'Never', 10, 3],
 [30, 22.735951321536877, 97.35828327089091, 'Never', 8, 3],
 [25, 22.439428979743163, 97.45312295124418, 'Never', 9, 3],
 [34, 18.709760760836858, 95.57250485302133, 'Never', 8, 3],
 [25, 22.917915702864182, 95.58342615473049, 'Never', 7, 3],
 [32, 20.398455405078604, 95.67955393326646, 'Never', 10, 3],
 [25, 22.271391086667112, 94.40485358271812, 'Never', 10, 3],
 [26, 21.626462496103176, 93.73507797665673, 'Never', 9, 3],
 [30, 21.798156114863215, 98.68875300412476, 'Never', 10, 3],
 [43, 26.01494375926364, 98.77551711585217, 'Never', 10, 3],
 [41, 21.616799452433526, 99.86810172635728, 'Never', 10, 3],
 [39, 22.845658787581247, 99.5901590862356, 'Occasionally', 10, 3],
 [28, 24.53504328051013, 99.99019981088433, 'Never', 9, 3],
 [36, 26

In [27]:
# Converting this data to a DataFrame
df = pd.DataFrame(data, columns=['Age', 'BMI', 'SpO2', 'Alcohol_Consumption', 'Job_Satisfaction', 'Worker_Capability'])


In [28]:
print(df)

     Age        BMI       SpO2 Alcohol_Consumption  Job_Satisfaction  \
0     40  21.559711  95.372996        Occasionally                10   
1     39  22.229063  96.669623               Never                 7   
2     32  25.190249  97.123668               Never                 7   
3     36  25.374155  98.801419               Never                10   
4     30  22.735951  97.358283               Never                 8   
..   ...        ...        ...                 ...               ...   
995   59  41.077066  86.592906        Occasionally                 2   
996   58  30.270574  87.884788        Occasionally                 2   
997   23  29.099125  90.250600          Frequently                 4   
998   57  22.329439  90.142852          Frequently                 6   
999   24  26.122971  92.548986          Frequently                 3   

     Worker_Capability  
0                    3  
1                    3  
2                    3  
3                    3  
4         

In [29]:
# Clean up any unrealistic values
df['BMI'] = df['BMI'].clip(16.5, 35)
df['SpO2'] = df['SpO2'].clip(85, 100)

In [30]:
print(df)

     Age        BMI       SpO2 Alcohol_Consumption  Job_Satisfaction  \
0     40  21.559711  95.372996        Occasionally                10   
1     39  22.229063  96.669623               Never                 7   
2     32  25.190249  97.123668               Never                 7   
3     36  25.374155  98.801419               Never                10   
4     30  22.735951  97.358283               Never                 8   
..   ...        ...        ...                 ...               ...   
995   59  35.000000  86.592906        Occasionally                 2   
996   58  30.270574  87.884788        Occasionally                 2   
997   23  29.099125  90.250600          Frequently                 4   
998   57  22.329439  90.142852          Frequently                 6   
999   24  26.122971  92.548986          Frequently                 3   

     Worker_Capability  
0                    3  
1                    3  
2                    3  
3                    3  
4         

In [31]:
# Convert alcohol consumption to numeric
alcohol_map = {'Never': 0, 'Occasionally': 1, 'Frequently': 2}
df['Alcohol_Consumption'] = df['Alcohol_Consumption'].map(alcohol_map)

In [32]:
print(df)

     Age        BMI       SpO2  Alcohol_Consumption  Job_Satisfaction  \
0     40  21.559711  95.372996                    1                10   
1     39  22.229063  96.669623                    0                 7   
2     32  25.190249  97.123668                    0                 7   
3     36  25.374155  98.801419                    0                10   
4     30  22.735951  97.358283                    0                 8   
..   ...        ...        ...                  ...               ...   
995   59  35.000000  86.592906                    1                 2   
996   58  30.270574  87.884788                    1                 2   
997   23  29.099125  90.250600                    2                 4   
998   57  22.329439  90.142852                    2                 6   
999   24  26.122971  92.548986                    2                 3   

     Worker_Capability  
0                    3  
1                    3  
2                    3  
3                    3 

In [33]:
# Split features and target
X = df.drop('Worker_Capability', axis=1)
y = df['Worker_Capability'] - 1

In [34]:
y.unique()

array([2, 1, 0], dtype=int64)

In [35]:
X

Unnamed: 0,Age,BMI,SpO2,Alcohol_Consumption,Job_Satisfaction
0,40,21.559711,95.372996,1,10
1,39,22.229063,96.669623,0,7
2,32,25.190249,97.123668,0,7
3,36,25.374155,98.801419,0,10
4,30,22.735951,97.358283,0,8
...,...,...,...,...,...
995,59,35.000000,86.592906,1,2
996,58,30.270574,87.884788,1,2
997,23,29.099125,90.250600,2,4
998,57,22.329439,90.142852,2,6


In [36]:
# Split into train/validation/test
X_temp, X_test, y_temp, y_test = train_test_split(X, y, test_size=0.15, random_state=7)
X_train, X_val, y_train, y_val = train_test_split(X_temp, y_temp, test_size=0.18, random_state=7)


In [37]:
# Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(X_test)


In [38]:
X_train_scaled

array([[ 1.06520029,  0.52019957, -0.16286593,  0.35397251,  0.25847433],
       [ 0.07834437, -0.2952932 ,  0.97233589, -0.97964284,  0.67744319],
       [ 1.31191427,  2.12611805, -0.65811736,  1.68758786, -0.99843225],
       ...,
       [ 0.57177233, -0.28501944, -0.37975697,  0.35397251, -0.16049453],
       [-1.5664155 ,  0.02880111, -2.17484437,  0.35397251, -1.41740111],
       [ 1.55862825,  0.21913534, -0.11207027,  0.35397251, -0.57946339]])

In [39]:
#  neural net parameters
model = Sequential([
    Dense(32, activation='relu', input_shape=(5,)),
    Dense(16, activation='relu'),
    Dense(3, activation='softmax')
])

model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])


In [40]:
# Train
history = model.fit(X_train_scaled, y_train,
                    validation_data=(X_val_scaled, y_val),
                    epochs=50,
                    batch_size=32,
                    verbose=1)


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [41]:
# Evaluate
test_loss, test_accuracy = model.evaluate(X_test_scaled, y_test)
print(f"\nTest Accuracy: {test_accuracy*100:.2f}%")



Test Accuracy: 96.00%


In [60]:
# Try checking with your example case
label_map = {0: 'Limited', 1: 'Moderate', 2: 'Best'}
example_case = [[59, 22, 97, 0, 2]]

example_scaled = scaler.transform(example_case)
predictions = model.predict(example_scaled)

predicted_label = label_map[np.argmax(predictions)]
print(f"Predicted Worker Capability: {predicted_label}")

Predicted Worker Capability: Moderate


