# Models for Multi-class classfication

## Import libs and read data

In [10]:
import numpy as np
import pandas as pd 
# import matplotlib as mpl   
# import matplotlib.pyplot as plt
# import seaborn as sns
# from scipy.signal import find_peaks, savgol_filter
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder
from keras import layers, models
from keras.models import Sequential
from keras.layers import Dense, Conv1D, Flatten, LeakyReLU, BatchNormalization, MaxPooling1D, Dropout
# from keras.utils import to_categorical
# import glob

In [2]:
data = pd.read_csv("../input/data.csv")

In [5]:
display(data.head().T, data.shape)

Unnamed: 0,0,1,2,3,4
320,-1.388896,16.722093,0.329687,-10.598846,10.015236
321,0.05821,-1.186429,-0.747993,-5.978379,12.575455
322,5.898464,-10.186022,-1.588051,1.822014,13.012607
323,5.849397,-3.832706,0.152671,12.90642,13.932454
324,2.37903,12.73007,5.357345,21.049637,13.677982
...,...,...,...,...,...
1648,1.384356,-4.633642,-5.380405,10.001486,35.561525
1649,16.59277,-6.431111,-6.852814,1.22302,37.348663
1650,8.760738,25.847542,-2.682321,1.914469,11.53166
Diagnostic,Streptavidin,Streptavidin,Streptavidin,S,S


(35, 1333)

## Data prep

In [29]:
def preprocess_data(data):

    X = data.drop(['Diagnostic', 'COVID-19_Status'], axis=1)
    X.columns = X.columns.astype(int)
    y = data['Diagnostic'].values

    X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.2, random_state=42)

    label_encoder = OneHotEncoder(sparse_output=False)
    y_train_encoded = label_encoder.fit_transform(y_train.reshape(-1, 1))
    y_test_encoded = label_encoder.transform(y_test.reshape(-1, 1))

    return X_train, X_test, y_train_encoded, y_test_encoded

## Simple NN

In [55]:
def simple_nn(data):

    X_train, X_test, y_train, y_test = preprocess_data(data)

    num_classes = y_train.shape[1]
    num_features = X_train.shape[1]

    model = Sequential([
    Dense(2048, activation='relu', input_shape=(num_features,)),
    Dense(1024, activation='tanh'),
    BatchNormalization(),
    Dropout(0.5),
    Dense(num_classes, activation='softmax')
])

    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    # model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

    model.fit(X_train, y_train, epochs=10, batch_size=1, verbose=1)
    # model.fit(X_train, y_train_encoded, epochs=50, batch_size=8, validation_split=0.2)

    loss, accuracy = model.evaluate(X_test, y_test, verbose=0)
    print(f'Test loss: {loss:.2f}, Test accuracy: {accuracy*100:.2f}%')

    return model

In [56]:
simple_model = simple_nn(data)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test loss: 7.89, Test accuracy: 14.29%


## LeCun architecture CNN model

In [60]:
def lecun_cnn(data):
    
    X_train, X_test, y_train, y_test = preprocess_data(data)

    spectral_length = X_train.shape[1]
    num_classes = y_train.shape[1]

    model = models.Sequential()

    # First Layer
    model.add(Conv1D(16, 21, input_shape=(spectral_length, 1)))
    model.add(BatchNormalization())
    model.add(LeakyReLU())
    model.add(MaxPooling1D(2))

    # Second Layer
    model.add(Conv1D(32, 11))
    model.add(BatchNormalization())
    model.add(LeakyReLU())
    model.add(MaxPooling1D(2))

    # Third Layer
    model.add(Conv1D(64, 5))
    model.add(BatchNormalization())
    model.add(LeakyReLU())
    model.add(MaxPooling1D(2))

    # Concatenate and Flatten
    model.add(Flatten())

    # Dense Layers
    model.add(Dense(2048, activation='tanh'))
    model.add(BatchNormalization())
    model.add(MaxPooling1D(2))
    model.add(Dropout(0.5))

    # Output Layer
    model.add(Dense(num_classes, activation='softmax'))

    # Compile the model
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

    model.fit(X_train, y_train, epochs=10, validation_data=(X_test, y_test), verbose=0)

    test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=0)
    print(f'Test Loss: {test_loss*100:.2f}% - Test Accuracy: {test_accuracy*100:.2f}%')

    return model

In [61]:
lecun_cnn_model = lecun_cnn(data)

ValueError: Input 0 of layer "max_pooling1d_5" is incompatible with the layer: expected ndim=3, found ndim=2. Full shape received: (None, 2048)

## Report