In [1]:
import load_data
import numpy as np
import pandas as pd
import random

from imblearn.over_sampling import SMOTE
from sklearn.model_selection import train_test_split, ParameterGrid
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, accuracy_score
from keras.utils import to_categorical

import tensorflow as tf
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Input, Conv2D, Conv2DTranspose, MaxPooling2D, ReLU, Flatten, Dense, Reshape, Dropout
from tensorflow.keras.optimizers import Adam, SGD, RMSprop
from tensorflow.keras.losses import categorical_crossentropy, sparse_categorical_crossentropy
from tensorflow.keras.regularizers import l2

In [2]:
data = load_data.read_data_sets()

# get train data
train_x = data.train.data

# get train labels
train_labels = data.train.labels

# get test data
test_x = data.test.data

# get test labels
test_labels = data.test.labels

# get sample number
n_samples = data.train.num_examples

# Print the first 10 examples of training data and labels
print("First 10 examples of training data:")
print(train_x[:10])
print()

print("Corresponding labels for the training data:")
print(train_labels[:10])
print()

# Print the first 10 examples of test data and labels
print("First 10 examples of test data:")
print(test_x[:10])
print()

print("Corresponding labels for the test data:")
print(test_labels[:10])
print()

# Print the total number of samples in the dataset
print(f"Total number of samples in the dataset: {n_samples}")

First 10 examples of training data:
[[26.94933313 26.94942352 26.86116345 ... 16.22032493 17.53295088
  17.53075379]
 [26.95540644 26.95561349 26.86818211 ... 16.22077216 17.53405894
  17.53186146]
 [26.96394591 26.96354065 26.87468671 ... 16.22162589 17.53570343
  17.53347345]
 ...
 [26.95091461 26.95146416 26.85930528 ... 16.21242109 17.53291899
  17.53075099]
 [26.93989902 26.94184343 26.84918537 ... 16.21115102 17.5320002
  17.52995518]
 [26.92817907 26.9306008  26.83702679 ... 16.21079389 17.53139138
  17.52946743]]

Corresponding labels for the training data:
[2 2 2 2 2 2 2 2 2 2]

First 10 examples of test data:
[[26.88684078 26.36006069 26.2375236  ... 16.32757215 16.90493887
  16.7821357 ]
 [26.88612478 26.35698842 26.23444401 ... 16.33011158 16.90430262
  16.78066285]
 [26.88125864 26.3487917  26.22622124 ... 16.33296166 16.90424512
  16.77954978]
 ...
 [26.83513111 26.27179449 26.16101443 ... 16.31236921 16.87338631
  16.74132307]
 [26.82404048 26.25446281 26.14994268 ... 16

In [3]:
# Count the unique labels in the training set
unique_train, counts_train = np.unique(train_labels, return_counts=True)
label_counts_train = dict(zip(unique_train, counts_train))

# Count the unique labels in the test set
unique_test, counts_test = np.unique(test_labels, return_counts=True)
label_counts_test = dict(zip(unique_test, counts_test))

print("Training label counts:", label_counts_train)
print("Test label counts:", label_counts_test)

Training label counts: {0: 28602, 1: 26628, 2: 29190}
Test label counts: {0: 18438, 1: 19740, 2: 19950}


In [4]:
# Check if dataset has missing values
missing_rows_count = np.isnan(train_x).any(axis=1).sum()
print(f"Number of rows with missing values: {missing_rows_count}")

Number of rows with missing values: 0


In [5]:
# Balance the training data using SMOTE
smote = SMOTE(random_state=42)
balanced_train_x, balanced_train_labels = smote.fit_resample(train_x, train_labels)

unique_train1, counts_train1 = np.unique(balanced_train_labels, return_counts=True)
label_counts_train1 = dict(zip(unique_train1, counts_train1))

print("Training label counts after balancing:", label_counts_train1)

Training label counts after balancing: {0: 29190, 1: 29190, 2: 29190}


In [6]:
# Shuffle indices
indices = np.arange(balanced_train_x.shape[0])
np.random.shuffle(indices)

# Use shuffled indices to shuffle train_x and train_labels
balanced_train_x_shuffled = balanced_train_x[indices]
balanced_train_labels_shuffled = balanced_train_labels[indices]

# Print the first 10 examples of shuffled training labels
print("First 10 shuffled training labels")
print(balanced_train_labels_shuffled[:10])

First 10 shuffled training labels
[0 0 2 0 0 1 1 1 0 0]


In [7]:
balanced_train_labels_shuffled_encoded = to_categorical(balanced_train_labels_shuffled, num_classes=3)
test_labels_encoded = to_categorical(test_labels, num_classes=3)

print(balanced_train_labels_shuffled_encoded[:10])
print(test_labels_encoded[:10])

[[1. 0. 0.]
 [1. 0. 0.]
 [0. 0. 1.]
 [1. 0. 0.]
 [1. 0. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [1. 0. 0.]
 [1. 0. 0.]]
[[0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]]


In [8]:
# Normalize the data
scaler = StandardScaler()
X_train = scaler.fit_transform(balanced_train_x_shuffled)
X_test = scaler.transform(test_x)

y_train = balanced_train_labels_shuffled_encoded
y_test = test_labels_encoded

y_train1 = balanced_train_labels_shuffled
y_test1 = test_labels

print("First 10 examples of training data:")
print(X_train[:10])
print()

print("Corresponding labels for the training data:")
print(y_train[:10])
print()

print("One-Hot encoded labels for the training data:")
print(y_train1[:10])
print()

print("First 10 examples of test data:")
print(X_test[:10])
print()

print("Corresponding labels for the test data:")
print(y_test[:10])
print()

print("One-Hot encoded labels for the test data:")
print(y_test1[:10])
print()

First 10 examples of training data:
[[ 1.91514221  1.9268096   1.04813218 ...  0.29733114  0.16230917
   0.35768258]
 [ 0.91635726  0.66241251  0.97348119 ...  0.75976295  0.06117537
  -0.25740715]
 [ 0.95691734  1.06908794  0.9078273  ... -0.02796822  0.26752087
   0.1176113 ]
 ...
 [-0.52247861 -0.83248713 -1.12225529 ... -0.39899161 -0.56656156
  -0.70546815]
 [-0.36918854 -0.33864369 -0.10770834 ... -0.05882406  1.08139682
   0.80224198]
 [-1.00283028 -1.0909438  -1.40705465 ... -0.90723556 -0.47301159
   0.02493582]]

Corresponding labels for the training data:
[[1. 0. 0.]
 [1. 0. 0.]
 [0. 0. 1.]
 [1. 0. 0.]
 [1. 0. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [1. 0. 0.]
 [1. 0. 0.]]

One-Hot encoded labels for the training data:
[0 0 2 0 0 1 1 1 0 0]

First 10 examples of test data:
[[ 0.44138796 -0.00202854 -0.1746563  ... -0.41053994 -0.00116352
  -0.22655811]
 [ 0.4407388  -0.00470152 -0.17757255 ... -0.40779565 -0.0018294
  -0.22787341]
 [ 0.43632692 -0.01183294 -0.18535919 ... -

In [15]:
# Set random seeds for reproducibility
np.random.seed(42)
random.seed(42)
tf.random.set_seed(42)

input_shape = 310
num_classes = 3  

# Function to create the model
def create_dcnet_GS(input_shape, num_classes, optimizer, learning_rate, regularization):
    inputs = Input(shape=(input_shape,))
    
    # Reshape the input to match the initial input shape for the deconvolution phase
    x = Reshape((1, 1, input_shape))(inputs)

    # Deconvolution Phase
    x = Conv2DTranspose(512, (2, 2), strides=(1, 1))(x)
    x = ReLU()(x)
    x = Conv2DTranspose(256, (3, 3), strides=(1, 1))(x)
    x = ReLU()(x)
    x = Conv2DTranspose(128, (5, 5), strides=(1, 1))(x)
    x = ReLU()(x)
    x = Conv2DTranspose(64, (9, 9), strides=(1, 1))(x)
    x = ReLU()(x)
    x = Conv2DTranspose(3, (9, 9), strides=(1, 1))(x)
    x = ReLU()(x)
    x = Conv2DTranspose(1, (9, 9), strides=(1, 1))(x)
    x = ReLU()(x)

    # Convolution Phase with regularization
    x = Conv2D(3, (1, 1), strides=(1, 1), kernel_regularizer=l2(regularization))(x)
    x = ReLU()(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    x = Conv2D(128, (1, 1), strides=(1, 1), kernel_regularizer=l2(regularization))(x)
    x = ReLU()(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    x = Conv2D(256, (1, 1), strides=(1, 1), kernel_regularizer=l2(regularization))(x)
    x = ReLU()(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    x = Conv2D(512, (1, 1), strides=(1, 1), kernel_regularizer=l2(regularization))(x)
    x = ReLU()(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)

    # Fully-connected layer with regularization
    x = Flatten()(x)
    x = Dense(3, activation='softmax', kernel_regularizer=l2(regularization))(x)

    model = Model(inputs, x)
    
    # Select optimizer and compile the model
    if optimizer == 'Adam':
        opt = Adam(learning_rate=learning_rate)
    elif optimizer == 'SGD':
        opt = SGD(learning_rate=learning_rate)
    elif optimizer == 'RMSprop':
        opt = RMSprop(learning_rate=learning_rate)
    else:
        raise ValueError(f"Unsupported optimizer: {optimizer}")
    
    model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])
    
    return model

# Define grid search parameters
param_grid_dcn = {
    'optimizer': ['Adam', 'SGD', 'RMSprop'],
    'learning_rate': [0.001, 0.01, 0.1],
    'regularization': [0.01, 0.001],
    'epochs': [10, 15, 20, 30]
}

# Initialize best accuracy and hyperparameters
best_accuracy = 0
best_params = {}
best_model_path = 'Best_DCNet_model.h5'

# Loop over parameter combinations
for params in ParameterGrid(param_grid_dcn):
    print(f"Training with params: {params}")
    
    # Create model
    model_GS = create_dcnet_GS(input_shape, num_classes, params['optimizer'], params['learning_rate'], params['regularization'])
    
    # Train the model
    history_GS = model_GS.fit(X_train, y_train, epochs=params['epochs'], batch_size=64, validation_split=0.1, verbose=0)
    
    # Evaluate the model
    loss, accuracy = model_GS.evaluate(X_test, y_test, verbose=0)
    print(f"Test accuracy: {accuracy:.4%}")
    
    # Check if this model is the best so far
    if accuracy > best_accuracy:
        best_accuracy = accuracy
        best_params = params

        # Save the model
        model_GS.save(best_model_path)
        print(f"Saved the best model with accuracy: {accuracy:.4%}")

    print()

print(f"Best accuracy: {best_accuracy:.4%} with params: {best_params}")

Training with params: {'epochs': 10, 'learning_rate': 0.001, 'optimizer': 'Adam', 'regularization': 0.01}
Test accuracy: 78.9740%
Saved the best model with accuracy: 78.9740%

Training with params: {'epochs': 10, 'learning_rate': 0.001, 'optimizer': 'Adam', 'regularization': 0.001}
Test accuracy: 76.9010%

Training with params: {'epochs': 10, 'learning_rate': 0.001, 'optimizer': 'SGD', 'regularization': 0.01}
Test accuracy: 38.6647%

Training with params: {'epochs': 10, 'learning_rate': 0.001, 'optimizer': 'SGD', 'regularization': 0.001}
Test accuracy: 34.3208%

Training with params: {'epochs': 10, 'learning_rate': 0.001, 'optimizer': 'RMSprop', 'regularization': 0.01}
Test accuracy: 77.9211%

Training with params: {'epochs': 10, 'learning_rate': 0.001, 'optimizer': 'RMSprop', 'regularization': 0.001}
Test accuracy: 31.7197%

Training with params: {'epochs': 10, 'learning_rate': 0.01, 'optimizer': 'Adam', 'regularization': 0.01}
Test accuracy: 34.3208%

Training with params: {'epochs':

In [16]:
best_model_path = 'Best_DCNet_model.h5'
Best_Model_DCNet = tf.keras.models.load_model(best_model_path)

loss, accuracy = Best_Model_DCNet.evaluate(X_test, y_test)
print(f"Test accuracy: {accuracy:.4%}")

Test accuracy: 80.3623%


In [10]:
# Set random seeds for reproducibility
np.random.seed(43)
random.seed(43)
tf.random.set_seed(43)

def create_nn(input_shape, num_classes, optimizer, learning_rate, regularization):
    model = Sequential([
        Dense(620, activation='relu', input_shape=(input_shape,), kernel_regularizer=l2(regularization)),
        Dense(310, activation='relu', kernel_regularizer=l2(regularization)),
        Dense(155, activation='relu', kernel_regularizer=l2(regularization)),
        Dense(72, activation='relu', kernel_regularizer=l2(regularization)),
        Dense(12, activation='relu', kernel_regularizer=l2(regularization)),
        Dense(3, activation='softmax', kernel_regularizer=l2(regularization))
    ])

    # Select optimizer and compile the model
    if optimizer == 'Adam':
        opt = Adam(learning_rate=learning_rate)
    elif optimizer == 'SGD':
        opt = SGD(learning_rate=learning_rate)
    elif optimizer == 'RMSprop':
        opt = RMSprop(learning_rate=learning_rate)
    else:
        raise ValueError(f"Unsupported optimizer: {optimizer}")
    
    model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])
    
    return model

In [17]:
# Define grid search parameters
param_grid_nn = {
    'optimizer': ['Adam', 'SGD', 'RMSprop'],
    'learning_rate': [0.001, 0.01, 0.1],
    'regularization': [0.01, 0.001],
    'epochs': [10, 20, 30, 50]
}

# Initialize best accuracy and hyperparameters
best_accuracy_nn = 0
best_params_nn = {}
best_model_path = 'Best_NN_model.h5'

# Loop over parameter combinations
for params in ParameterGrid(param_grid_nn):
    print(f"Training with params: {params}")
    
    # Create model
    model_NN = create_nn(input_shape, num_classes, params['optimizer'], params['learning_rate'], params['regularization'])
    
    # Train the model
    history_NN = model_NN.fit(X_train, y_train, epochs=params['epochs'], batch_size=64, validation_split=0.1, verbose=0)
    
    # Evaluate the model
    loss, accuracy = model_NN.evaluate(X_test, y_test)
    print(f"Test accuracy: {accuracy:.4%}")
    print()
    
    # Check if this model is the best so far
    if accuracy > best_accuracy_nn:
        best_accuracy_nn = accuracy
        best_params_nn = params
        
        # Save the model
        model_NN.save(best_model_path)
        print(f"Saved the best model with accuracy: {accuracy:.4%}")

print(f"Best accuracy of Neural Network: {best_accuracy_nn:.4%} with params: {best_params_nn}")

Training with params: {'epochs': 10, 'learning_rate': 0.001, 'optimizer': 'Adam', 'regularization': 0.01}
Test accuracy: 75.5006%

Saved the best model with accuracy: 75.5006%
Training with params: {'epochs': 10, 'learning_rate': 0.001, 'optimizer': 'Adam', 'regularization': 0.001}
Test accuracy: 78.1930%

Saved the best model with accuracy: 78.1930%
Training with params: {'epochs': 10, 'learning_rate': 0.001, 'optimizer': 'SGD', 'regularization': 0.01}
Test accuracy: 75.5436%

Training with params: {'epochs': 10, 'learning_rate': 0.001, 'optimizer': 'SGD', 'regularization': 0.001}
Test accuracy: 76.4244%

Training with params: {'epochs': 10, 'learning_rate': 0.001, 'optimizer': 'RMSprop', 'regularization': 0.01}
Test accuracy: 74.6714%

Training with params: {'epochs': 10, 'learning_rate': 0.001, 'optimizer': 'RMSprop', 'regularization': 0.001}
Test accuracy: 77.4429%

Training with params: {'epochs': 10, 'learning_rate': 0.01, 'optimizer': 'Adam', 'regularization': 0.01}
Test accurac

In [18]:
best_model_path = 'Best_NN_model.h5'
Best_Model_NN = tf.keras.models.load_model(best_model_path)

loss, accuracy = Best_Model_NN.evaluate(X_test, y_test)
print(f"Test accuracy: {accuracy:.4%}")

Test accuracy: 78.5886%
