In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
import h5py
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import tensorflow as tf
from keras.layers import Input, Conv1D, MaxPooling1D, Flatten, Dense, concatenate, BatchNormalization,LSTM,Dropout,Reshape
from keras.models import Model, Sequential
from keras.regularizers import l2

In [2]:
# set seed for random state - group #
seed = 23

In [3]:
def load_data(file_name, csv_file):

    # read the csv file with labels
    df = pd.read_csv(csv_file)

    # get the number of samples
    num_samples = min(df.shape[0], 20000)

    # get the labels and encode them
    y = LabelEncoder().fit_transform(df["trace_category"])[:num_samples]
    # y = y.reshape(len(y),1)

    # read data into numpy arrays
    data = []
    dtfl = h5py.File(file_name, 'r')
    for i in range(num_samples):
        dataset = dtfl.get("STEAD_data/{}".format(i))
        data.append(np.array(dataset))
    X = np.array(data)
    
    # train test split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=seed)

    # no preprocessing required, so return the splits
    return X_train, X_test, y_train, y_test

In [4]:
# dataset file name here - hdf5 file
file_name = r"C:\Users\krish\OneDrive\Documents\spring23\DM\CSE572-Project\CSE572-Project\STEAD_data.hdf5"

# name of the csv file with labels 
csv_file = r"C:\Users\krish\OneDrive\Documents\spring23\DM\CSE572-Project\CSE572-Project\STEAD_labels.csv"

# load the data
X_train, X_test, y_train, y_test = load_data(file_name=file_name, csv_file=csv_file)

In [6]:
print(X_train.shape)
print(y_train.shape)

(13042, 6000, 3)
(13042,)


In [5]:
callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

In [None]:
# # reshape data for nn
# X_train_cnn = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
# X_test_cnn = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

In [10]:
import tensorflow as tf
from tensorflow.keras import layers
from keras import regularizers


class InceptionBlock(tf.keras.Model):
    def __init__(self, n_filters):
        super(InceptionBlock, self).__init__()
        self.conv1 = layers.Conv1D(filters=n_filters//4, kernel_size=1, activation='relu')
        self.conv2 = layers.Conv1D(filters=n_filters//4, kernel_size=2, padding='same', activation='relu')
        self.conv3 = layers.Conv1D(filters=n_filters//4, kernel_size=3, padding='same', activation='relu')
        self.maxpool = layers.MaxPooling1D(pool_size=3, strides=1, padding='same')
        self.concat = layers.Concatenate(axis=-1)
        
    def call(self, inputs):
        x1 = self.conv1(inputs)
        x2 = self.conv2(inputs)
        x3 = self.conv3(inputs)
        x4 = self.maxpool(inputs)
        x4 = self.conv1(x4)
        x = self.concat([x1, x2, x3, x4])
        return x


def build_inception_time(input_shape, num_classes):
    inputs = layers.Input(shape=input_shape)
    
    x = inputs
    x = layers.Conv1D(filters=16, kernel_size=1, activation='relu')(x)
    x = layers.BatchNormalization()(x)
    
    # Inception Blocks
    x = InceptionBlock(n_filters=16)(x)
    x = InceptionBlock(n_filters=16)(x)
    x = layers.Dropout(0.5)(x)
    
    x = InceptionBlock(n_filters=32)(x)
    x = InceptionBlock(n_filters=32)(x)
    x = layers.Dropout(0.5)(x)
    
    x = InceptionBlock(n_filters=64)(x)
    x = InceptionBlock(n_filters=64)(x)
    x = layers.Dropout(0.5)(x)
    
    x = InceptionBlock(n_filters=128)(x)
    x = InceptionBlock(n_filters=128)(x)
    x = layers.Dropout(0.5)(x)
    
    x = layers.GlobalAveragePooling1D()(x)
    
    # Append LSTM layers with dropout and kernel regularization
    x = layers.Reshape((-1, 1))(x)
    x = layers.LSTM(32, return_sequences=True, kernel_regularizer=regularizers.L2(0.01))(x)
    x = layers.LSTM(16, kernel_regularizer=regularizers.L2(0.01))(x)
    # Dense layers with kernel regularization
    x = layers.Dense(8, activation='relu', kernel_regularizer=regularizers.L2(0.01))(x)
    x = layers.Dense(1, activation='sigmoid', kernel_regularizer=regularizers.L2(0.01))(x)

    outputs = x

    model = tf.keras.Model(inputs=inputs, outputs=outputs)

    return model

model = build_inception_time((6000,3),2)
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()


Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_3 (InputLayer)        [(None, 6000, 3)]         0         
                                                                 
 conv1d_50 (Conv1D)          (None, 6000, 16)          64        
                                                                 
 batch_normalization_2 (Batc  (None, 6000, 16)         64        
 hNormalization)                                                 
                                                                 
 inception_block_16 (Incepti  (None, 6000, 16)         396       
 onBlock)                                                        
                                                                 
 inception_block_17 (Incepti  (None, 6000, 16)         396       
 onBlock)                                                        
                                                           

In [None]:
history = model.fit(X_train, y_train, epochs=50, batch_size=128, validation_split=0.2, callbacks=[callback])

Epoch 1/50
17/82 [=====>........................] - ETA: 10:01 - loss: 0.6945 - accuracy: 0.6006

In [13]:
y_pred_cnn = list(map(lambda y: round(y), model.predict(X_test).flatten()))



In [14]:
print("Classification report for CNN: ")
print(classification_report(y_test, y_pred_cnn))

Classification report for CNN: 
              precision    recall  f1-score   support

           0       0.50      1.00      0.67      2786
           1       0.00      0.00      0.00      2804

    accuracy                           0.50      5590
   macro avg       0.25      0.50      0.33      5590
weighted avg       0.25      0.50      0.33      5590



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
