Here we will run a neural network on the dataset

In [62]:
#import relevant modules
import tensorflow as tf
from tensorflow.keras.callbacks import TensorBoard
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
import pandas as pd
import datetime

#reading in the data
data = pd.read_csv('transformed_data.csv')

X = data[['team', 'idle_men', 'incentive', 'over_time', 'no_of_workers', 'no_of_style_change', 'wip', 'smv', 'targeted_productivity']] #all integer data points
y = data[['actual_productivity']] #target value

#standardizing data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

#Train-Test Split of 70-30
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, random_state=42)

In [63]:
# Testing different optimizers
optimizers_to_try = ['Adam', 'SGD', 'Adagrad']
optimizer_test_losses = []

for optimizer_name in optimizers_to_try:
    optimizer = getattr(tf.keras.optimizers, optimizer_name)()
    
    model = tf.keras.Sequential([
        tf.keras.layers.Dense(64, activation='sigmoid', input_shape=(9,)),
        tf.keras.layers.Dense(32, activation='linear'),
    ])

    model.compile(optimizer=optimizer, loss='mean_squared_error')
    model.fit(X_train, y_train, epochs=30, batch_size=32, validation_data=(X_test, y_test))

    test_loss = model.evaluate(X_test, y_test)
    optimizer_test_losses.append(test_loss)
    print(f'Test loss with {optimizer_name}: {test_loss}')
print(optimizer_test_losses)

#adam found to be best optimizer

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Test loss with Adam: 0.02681141346693039
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Test loss with SGD: 0.044732559472322464
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/

In [64]:
#Testing different Learning Rates
learning_rates = [0.1, 0.2, 0.01, 0.02, 0.001, 0.0001]
rates_test_losses = []
for learning_rate in learning_rates:
    optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
    
    model = tf.keras.Sequential([
        tf.keras.layers.Dense(64, activation='sigmoid', input_shape=(9,)),
        tf.keras.layers.Dense(32, activation='linear'),
    ])

    model.compile(optimizer=optimizer, loss='mean_squared_error')
    model.fit(X_train, y_train, epochs=30, batch_size=32, validation_data=(X_test, y_test))

    test_loss = model.evaluate(X_test, y_test)
    rates_test_losses.append(test_loss)
    print(f'Test loss with learning rate {learning_rate}: {test_loss}')
print(rates_test_losses)

#0.02 found to be the best learning rate

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Test loss with learning rate 0.1: 0.025188252329826355
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Test loss with learning rate 0.2: 0.023030243813991547
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch

In [65]:
# Testing different node sizes
node_sizes_to_try = [(32, 16), (64, 32), (128, 64), (32, 32)]
node_test_losses = []
for hidden_layer_size, output_layer_size in node_sizes_to_try:
    model = tf.keras.Sequential([
        tf.keras.layers.Dense(hidden_layer_size, activation='sigmoid', input_shape=(9,)),
        tf.keras.layers.Dense(output_layer_size, activation='linear')
    ])
    optimizer = tf.keras.optimizers.Adam(learning_rate=0.02)
    model.compile(optimizer=optimizer, loss='mean_squared_error')
    model.fit(X_train, y_train, epochs=30, batch_size=32, validation_data=(X_test, y_test))

    test_loss = model.evaluate(X_test, y_test)
    node_test_losses.append(test_loss)
    print(f'Test loss with hidden layer size {hidden_layer_size} and output layer size {output_layer_size}: {test_loss}')
print(node_test_losses)

#64 and 32 is found best


Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Test loss with hidden layer size 32 and output layer size 16: 0.024822600185871124
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Test loss with hidden layer size 64 and output layer size 32: 0.02335420995950699
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/3

In [66]:
# Testing different numbers of epochs
epochs_to_try = [10, 20, 30, 40]
epochs_test_losses = []
for num_epochs in epochs_to_try:
    model = tf.keras.Sequential([
        tf.keras.layers.Dense(64, activation='sigmoid', input_shape=(9,)),
        tf.keras.layers.Dense(32, activation='linear'),
    ])
    optimizer = tf.keras.optimizers.Adam(learning_rate=0.02)
    model.compile(optimizer=optimizer, loss='mean_squared_error')
    model.fit(X_train, y_train, epochs=num_epochs, batch_size=32, validation_data=(X_test, y_test))

    test_loss = model.evaluate(X_test, y_test)
    epochs_test_losses.append(test_loss)
    print(f'Test loss with {num_epochs} epochs: {test_loss}')
print(epochs_test_losses)

#30 is found best


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test loss with 10 epochs: 0.02731589414179325
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Test loss with 20 epochs: 0.028812099248170853
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Test loss with 30 epochs: 0.029844505712389946
Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15

In [69]:
#Final Model

#visualization
log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)

model = tf.keras.Sequential([
    tf.keras.layers.Dense(64, activation='sigmoid', input_shape=(9,)),
    tf.keras.layers.Dense(32, activation='linear'),
])
optimizer = tf.keras.optimizers.Adam(learning_rate=0.02)
model.compile(optimizer=optimizer, loss='mean_squared_error')
model.fit(X_train, y_train, epochs=30, batch_size=32, validation_data=(X_test, y_test), callbacks=[tensorboard_callback])

test_loss = model.evaluate(X_test, y_test)
predictions = model.predict(X_test)
print(f'Test loss: {test_loss}')

# for true_value, predicted_value in zip(y_test['actual_productivity'].values, predictions):
#     print(f'True Value: {true_value}, Predicted Value: {predicted_value[0]}')

print(len(y_test['actual_productivity'].values))
print(len(predictions))
# r2 = r2_score(y_test['actual_productivity'].values, predictions)
# print('R-squared:', r2)
    

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Test loss: 0.022297987714409828
306
306
