In [None]:
#Import the packages
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

from tensorflow.keras import layers

import keras
from keras.models import Sequential
from keras.layers import Dense
from keras import models

In [None]:
df = pd.read_csv('garments_worker_productivity.csv')
df

In [None]:
#transform actual productivity to categorical data
tmp_condition = df['actual_productivity'] > 0.735091097
df.loc[tmp_condition, 'actual_productivity'] = 'Satisfactory'
df.loc[~tmp_condition, 'actual_productivity'] = 'Unsatisfactory'

In [None]:
# Convert the categorical data to Numerical data
df['quarter'].replace(['Quarter1', 'Quarter2', 'Quarter3', 'Quarter4', 'Quarter5'],
                       [1, 2, 3, 4, 5], inplace=True)
df['day'].replace(['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'],
                        [1, 2, 3, 4, 5, 6, 7], inplace=True)
df['actual_productivity'].replace(['Satisfactory', 'Unsatisfactory'],
                       [1, 0,], inplace=True)

df

In [None]:
# Remove unnecessary columns
df_input = df.drop(columns = ['department', 'date', 'wip', 'incentive', 'idle_time', 'idle_men', 'no_of_style_change', 'actual_productivity'], axis=1)
df_output = df['actual_productivity']

In [None]:
df_input

In [None]:
df_output

# ANN with ML Libraries

In [None]:
# Split the data
x_train, x_test, y_train, y_test = train_test_split(df_input, df_output, test_size=0.3, random_state=42)

x = df_input
y = df_output

# Scale the data
scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train)
x_test_scaled = scaler.transform(x_test)

# Define the model
model = keras.Sequential([
  layers.Dense(16, activation="relu", input_shape=(7,)),        
  layers.Dense(1)                                       
])

# Compile the model
model.compile(loss='mean_squared_error', optimizer='adam', metrics=['mean_absolute_error', 'mean_squared_error'])

# Train the model
history = model.fit(x_train_scaled, y_train, epochs=100, validation_split=0.2)

In [None]:
# Evaluation
y_train_pred = model.predict(x_train_scaled)
y_test_pred = model.predict(x_test_scaled)

# Calculate metrics
train_mae = mean_absolute_error(y_train, y_train_pred)
train_mse = mean_squared_error(y_train, y_train_pred)
train_r2 = r2_score(y_train, y_train_pred)

test_mae = mean_absolute_error(y_test, y_test_pred)
test_mse = mean_squared_error(y_test, y_test_pred)
test_r2 = r2_score(y_test, y_test_pred)

print("Training MAE: ", train_mae)
print("Training MSE: ", train_mse)
print("Training R²: ", train_r2)

print("Test MAE: ", test_mae)
print("Test MSE: ", test_mse)
print("Test R²: ", test_r2)

In [None]:
# Plotting the training history
plt.figure()
plt.plot(history.history['mean_absolute_error'], label='Train MAE')
plt.plot(history.history['val_mean_absolute_error'], label='Validation MAE')
plt.ylabel('Mean Absolute Error', fontsize=16)
plt.xlabel('Epoch', fontsize=16)
plt.xticks(fontsize=16)
plt.yticks(fontsize=16)
plt.legend(fontsize=16, loc='upper right')
fig = plt.gcf()
fig.set_size_inches(8, 8)
plt.show()

In [None]:
# Plot training & validation loss values
plt.figure(figsize=(12, 6))
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Model Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend(loc='upper right')
plt.grid(True)
plt.show()

# Random Forest Architecture

In [None]:
# Define the Random Forest model
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)

# Train the model
rf_model.fit(x_train_scaled, y_train)

# Evaluation
y_train_pred_rf = rf_model.predict(x_train_scaled)
y_test_pred_rf = rf_model.predict(x_test_scaled)

# Calculate metrics
train_mae_rf = mean_absolute_error(y_train, y_train_pred_rf)
train_mse_rf = mean_squared_error(y_train, y_train_pred_rf)
train_r2_rf = r2_score(y_train, y_train_pred_rf)

test_mae_rf = mean_absolute_error(y_test, y_test_pred_rf)
test_mse_rf = mean_squared_error(y_test, y_test_pred_rf)
test_r2_rf = r2_score(y_test, y_test_pred_rf)

print("Random Forest - Training MAE: ", train_mae_rf)
print("Random Forest - Training MSE: ", train_mse_rf)
print("Random Forest - Training R²: ", train_r2_rf)

print("Random Forest - Test MAE: ", test_mae_rf)
print("Random Forest - Test MSE: ", test_mse_rf)
print("Random Forest - Test R²: ", test_r2_rf)

# Plotting feature importances
importances = rf_model.feature_importances_
indices = np.argsort(importances)[::-1]
names = [df_input.columns[i] for i in indices]

plt.figure(figsize=(10, 6))
plt.title("Feature Importance")
plt.bar(range(x_train.shape[1]), importances[indices])
plt.xticks(range(x_train.shape[1]), names, rotation=90)
plt.show()