## Imports

In [63]:
import pandas as pd
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score



### load the data

In [3]:
data = np.load('processed_data.npz')
data

NpzFile 'processed_data.npz' with keys: x, y

In [4]:
X = data["x"]
y = data["y"]
print("X shape: ", X.shape)
print("y shape: ", y.shape)

X shape:  (5572, 8038)
y shape:  (5572,)


In [5]:
## split x into train and test
x_train, x_test, y_train , y_test = train_test_split(X, y,  test_size=0.2, random_state=365)
## take 10 percent of the train variable as validation varibles
x_train, x_validation, y_train, y_validation = train_test_split(x_train, y_train,  test_size=0.1, random_state=365)

In [6]:
print("x_train shape: ", x_train.shape)
print("x_test shape: ", x_test.shape)
print("x_validation shape: ", x_validation.shape)
print("y_train shape: ", y_train.shape)
print("y_test shape: ", y_test.shape)
print("y_validation shape: ", y_validation.shape)

x_train shape:  (4011, 8038)
x_test shape:  (1115, 8038)
x_validation shape:  (446, 8038)
y_train shape:  (4011,)
y_test shape:  (1115,)
y_validation shape:  (446,)


In [7]:
## Model
## Objective Function (mean squared error)
## Optimization Algorithm (SGD)

In [25]:
input_size = 8038
output_size=1
hidden_layer_size=250
model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(input_size,)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
    tf.keras.layers.Dense(hidden_layer_size, activation='tanh'),
    tf.keras.layers.Dense(output_size, activation='sigmoid')
])
NUM_OF_EPOCHS=100
BATCH_SIZE = 32

# Create training dataset
train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_dataset = train_dataset.shuffle(buffer_size=1000).batch(BATCH_SIZE)

# Validation dataset
val_dataset = tf.data.Dataset.from_tensor_slices((x_validation, y_validation))
val_dataset = val_dataset.batch(BATCH_SIZE)
# Test dataset
test_dataset = tf.data.Dataset.from_tensor_slices((x_test, y_test))
test_dataset = test_dataset.batch(BATCH_SIZE)


model.compile(optimizer="sgd", loss='mean_squared_error', metrics=["accuracy"])

## Train model

In [27]:

# model.fit(x_train, y_train, epochs=NUM_OF_EPOCHS, validation_data=(x_validation, y_validation) , verbose=2)
model.fit(train_dataset, epochs=NUM_OF_EPOCHS, validation_data=val_dataset , verbose=2)

Epoch 1/100
126/126 - 2s - 13ms/step - accuracy: 0.8340 - loss: 0.2017 - val_accuracy: 0.8946 - val_loss: 0.1557
Epoch 2/100
126/126 - 1s - 9ms/step - accuracy: 0.8644 - loss: 0.1471 - val_accuracy: 0.8946 - val_loss: 0.1210
Epoch 3/100
126/126 - 1s - 8ms/step - accuracy: 0.8644 - loss: 0.1277 - val_accuracy: 0.8946 - val_loss: 0.1068
Epoch 4/100
126/126 - 1s - 8ms/step - accuracy: 0.8644 - loss: 0.1193 - val_accuracy: 0.8946 - val_loss: 0.0998
Epoch 5/100
126/126 - 1s - 8ms/step - accuracy: 0.8644 - loss: 0.1148 - val_accuracy: 0.8946 - val_loss: 0.0956
Epoch 6/100
126/126 - 1s - 8ms/step - accuracy: 0.8644 - loss: 0.1118 - val_accuracy: 0.8946 - val_loss: 0.0928
Epoch 7/100
126/126 - 1s - 8ms/step - accuracy: 0.8644 - loss: 0.1095 - val_accuracy: 0.8946 - val_loss: 0.0906
Epoch 8/100
126/126 - 1s - 9ms/step - accuracy: 0.8644 - loss: 0.1072 - val_accuracy: 0.8946 - val_loss: 0.0886
Epoch 9/100
126/126 - 1s - 9ms/step - accuracy: 0.8644 - loss: 0.1050 - val_accuracy: 0.8946 - val_loss

<keras.src.callbacks.history.History at 0x13825a8a900>

## Test model

In [29]:
y_pred = model.predict(test_dataset)

[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step


In [99]:
## since predict gives the probability of the output being one, we can round it up like this 
y_pred = np.squeeze((y_pred > 0.5).astype(np.int64))

In [117]:
print("Model Test Accuracy: ", accuracy_score(y_pred, y_test))

Model Test Accuracy:  0.9811659192825112


In [115]:
test_loss , test_accuracy = model.evaluate(test_dataset)
print('Test loss: {0:.2f}, Test Accuracy {1:.2f}'.format(test_loss,test_accuracy*100))

[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.9792 - loss: 0.0182
Test loss: 0.02, Test Accuracy 98.12
