# Import libraries

In [None]:
# Fix randomness and hide warnings
seed = 42

import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
os.environ['PYTHONHASHSEED'] = str(seed)
os.environ['MPLCONFIGDIR'] = os.getcwd()+'/configs/'

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=Warning)

import numpy as np
np.random.seed(seed)

import logging

import random
random.seed(seed)

# Import tensorflow
import tensorflow as tf
from tensorflow import keras as tfk
from tensorflow.keras import layers as tfkl
tf.autograph.set_verbosity(0)
tf.get_logger().setLevel(logging.ERROR)
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
tf.random.set_seed(seed)
tf.compat.v1.set_random_seed(seed)
print(tf.__version__)

# Import other libraries
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
plt.rc('font', size=16)
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder

### Display data

In [None]:
dataset=np.load('/kaggle/input/input-data/clean_data.npz',allow_pickle=True)
data = dataset['data']
labels = dataset['labels']

Trial to display the very first image

try to display 10 first images with their labels

inspect data representation

In [None]:
print('Counting occurrences of target classes:')
print(pd.DataFrame(labels, columns=['state'])['state'].value_counts())
setsize = labels.shape[0]
print('\nTotal number of samples : ', setsize)

In [None]:
# Encode the output labels to [1,0], [0,1]

labels_map= {'healthy': 0, 'unhealthy':1}
for i in range(setsize):
  a=labels[i]
  labels[i]=labels_map[a]
labels = tfk.utils.to_categorical(labels)

# Normalized data to the range [0, 1]
X_train_val = (data / 255).astype('float32')

In [None]:
# @title Split data
seed = 42

# Split data into train_val and test sets
X_train_val, X_test, y_train_val, y_test = train_test_split(X_train_val, labels, random_state=seed, test_size=0.1, stratify=labels)

# Further split train_val into train and validation sets
X_train, X_val, y_train, y_val = train_test_split(X_train_val, y_train_val, random_state=seed, test_size=0.1, stratify=np.argmax(y_train_val,axis=1))


In [None]:
# define key model parameters
input_shape = X_train.shape[1:]  # Input shape for the model
output_shape = y_train.shape[1]  # Output shape for the model
batch_size = 128                # Batch size for training ------> need to experiment this
epochs = 200                     # Number of training epochs

# Print the defined parameters
print("Epochs:", epochs)
print("Batch Size:", batch_size)
print("Input Shape:", input_shape)
print("Output Shape:", output_shape)

In [None]:
def build_quasiVGG9(input_shape, output_shape, seed=seed):
    tf.random.set_seed(seed)

    # Build the neural network layer by layer
    input_layer = tfkl.Input(shape=input_shape, name='Input')

    x = tfkl.Conv2D(filters=32,kernel_size=3,padding='same',activation='relu',name='conv00')(input_layer)
    x = tfkl.Conv2D(filters=32,kernel_size=3,padding='same',activation='relu',name='conv01')(x)
    x = tfkl.MaxPooling2D(name='mp0')(x)

    x = tfkl.Conv2D(filters=64,kernel_size=3,padding='same',activation='relu',name='conv10')(x)
    x = tfkl.Conv2D(filters=64,kernel_size=3,padding='same',activation='relu',name='conv11')(x)
    x = tfkl.MaxPooling2D(name='mp1')(x)

    x = tfkl.Conv2D(filters=128,kernel_size=3,padding='same',activation='relu',name='conv20')(x)
    x = tfkl.Conv2D(filters=128,kernel_size=3,padding='same',activation='relu',name='conv21')(x)
    x = tfkl.MaxPooling2D(name='mp2')(x)

    x = tfkl.Conv2D(filters=256,kernel_size=3,padding='same',activation='relu',name='conv30')(x)
    x = tfkl.Conv2D(filters=256,kernel_size=3,padding='same',activation='relu',name='conv31')(x)
    x = tfkl.GlobalAveragePooling2D(name='gap')(x)

    output_layer = tfkl.Dense(units=output_shape,activation='softmax',name='Output')(x)

    # Connect input and output through the Model class
    model = tfk.Model(inputs=input_layer, outputs=output_layer, name='Convnet')

    # Compile the model
    model.compile(loss=tfk.losses.CategoricalCrossentropy(), optimizer=tfk.optimizers.Adam(weight_decay=5e-4), metrics=['accuracy'])

    # Return the model
    return model

In [None]:
def denseblock(input_pool, iterations, level):
    for k in range(iterations):
        x1 = tfkl.Conv2D(128,
            kernel_size=1,
            padding='same',
            activation='relu',
            name='conv_dense_1_'+str(k)+'_'+str(level))(input_pool)

        x2 = tfkl.Conv2D(64,
            kernel_size=(3,3),
            padding='same',
            activation='relu',
            name='conv_dense_2'+str(k)+'_'+str(level))(x1)

        output_pool= tfkl.Concatenate(
            axis=-1,
            name='concat'+str(k)+'_'+str(level))([input_pool, x2])
    return output_pool

In [None]:
def transferblock(input_transferblock, level):
    t1 = tfkl.Conv2D(128,
            kernel_size=1,
            padding='same',
            activation='relu',
            name='conv_transfer_1_1'+str(level))(input_transferblock)

    output= tfkl.AveragePooling2D(
        (2,2),
        #stride=2,
        name='ap_1'+str(level)
    )(t1)

    return output


In [None]:
# @title Build model
def build_Inception(input_shape, output_shape, seed=seed):
    tf.random.set_seed(seed)

    # Build the neural network layer by layer
    input_layer = tfkl.Input(shape=input_shape, name='Input')

    conv1 = tfkl.Conv2D(
        filters=64,
        kernel_size=(7,7),
        padding='same',
        activation='tanh',
        #stride=2,
        name='conv1',
    )(input_layer)

    pool1 = tfkl.MaxPooling2D(
        pool_size=(3,3),
        #stride=2,
        name='mp1'
    )(conv1)

#first dense block
    dense1 = denseblock(pool1, 6,1)
    transfer1 = transferblock(dense1,1)
# second
    dense2 = denseblock(transfer1, 12,2)
    transfer2 = transferblock(dense2,2)
# third
    dense3 = denseblock(transfer2, 32,3)
    transfer3 = transferblock(dense3,3)
# fourth
    dense4 = denseblock(transfer3, 32,4)
    transfer4 = transferblock(dense4,4)

    glo=tfkl.GlobalAveragePooling2D()(transfer4)

    output_layer = tfkl.Dense(
        units=output_shape,
        activation='sigmoid',
        name='Output'
    )(glo)

    # Connect input and output through the Model class
    model = tfk.Model(inputs=input_layer, outputs=output_layer, name='DenseNet')

    # Compile the model
    model.compile(loss=tfk.losses.CategoricalCrossentropy(), optimizer=tfk.optimizers.Adam(), metrics=['accuracy'])

    # Return the model
    return model

In [None]:
# Build the LeNet model and display its summary
model = build_quasiVGG9(input_shape, output_shape)
model.summary()

In [None]:
# Train the model
early_stopping = tfk.callbacks.EarlyStopping(monitor='val_accuracy', patience=10, mode='max', restore_best_weights=True)

# Train the model and save its history
history = model.fit(
    x=X_train,
    y=y_train,
    batch_size=batch_size,
    epochs=epochs,
    validation_data=(X_val, y_val),
    callbacks=[early_stopping]
).history

In [None]:
# Find the epoch with the highest validation accuracy
best_epoch = np.argmax(history['val_accuracy'])

# Plot training and validation performance metrics
plt.figure(figsize=(20, 5))

# Plot training and validation loss
plt.plot(history['loss'], label='Training', alpha=0.8, color='#ff7f0e', linewidth=3)
plt.plot(history['val_loss'], label='Validation', alpha=0.8, color='#4D61E2', linewidth=3)
plt.legend(loc='upper left')
plt.title('Categorical Crossentropy')
plt.grid(alpha=0.3)

plt.figure(figsize=(20, 5))

# Plot training and validation accuracy, highlighting the best epoch
plt.plot(history['accuracy'], label='Training', alpha=0.8, color='#ff7f0e', linewidth=3)
plt.plot(history['val_accuracy'], label='Validation', alpha=0.8, color='#4D61E2', linewidth=3)
plt.plot(best_epoch, history['val_accuracy'][best_epoch], marker='*', alpha=0.8, markersize=10, color='#4D61E2')
plt.legend(loc='upper left')
plt.title('Accuracy')
plt.grid(alpha=0.3)

plt.show()

In [None]:
# Extract activations from the first convolutional layer for a subset of test images
num_images = 100
first_conv = tfk.Sequential(model.layers[:2])
first_activations = first_conv(X_test[0:num_images])

# Extract activations from the second convolutional layer for the same subset of test images
second_conv = tfk.Sequential(model.layers[:4])
second_activations = second_conv(X_test[0:num_images])

In [None]:
# Choose a random image for visualization
image = np.random.randint(0, num_images)
re_label_map={0:'healthy', 1:'unhealthy'}

# Display the input image, true label, and predicted label
print('Input image')
fig, (ax1, ax2) = plt.subplots(1, 2)
fig.set_size_inches(18, 5)
ax1.imshow((X_test[image] + 1) / 2)
print(X_test[image].shape)
ax1.set_title('True label: ' + re_label_map[np.argmax(y_test[image])])
prediction = model.predict(np.expand_dims(X_test[image], axis=0), verbose=0)
ax2.barh(list(re_label_map.values()), np.squeeze(prediction, axis=0), color=plt.get_cmap('Paired').colors)
ax2.set_title('Predicted label: ' + re_label_map[np.argmax(prediction)])
ax2.grid(alpha=0.3)
plt.show()

# Display activations from the first convolutional layer
print('First convolutional activations')
num_row = 1
num_col = 6
fig, axes = plt.subplots(num_row, num_col, figsize=(14, 12))
for i in range(num_row * num_col):
    ax = axes[i % num_col]
    ax.imshow(first_activations[image, :, :, i], cmap='gray')
plt.tight_layout()
plt.show()

# Display activations from the second convolutional layer
print('Second convolutional activations')
num_row = 2
num_col = 8
fig, axes = plt.subplots(num_row, num_col, figsize=(14, 4))
for i in range(num_row * num_col):
    ax = axes[i // num_col, i % num_col]
    ax.imshow(second_activations[image, :, :, i], cmap='gray')
plt.tight_layout()
plt.show()

In [None]:
#make inference
# Predict labels for the entire test set
predictions = model.predict(X_test, verbose=0)

# Display the shape of the predictions
print("Predictions Shape:", predictions.shape)

In [None]:
# Compute the confusion matrix
cm = confusion_matrix(np.argmax(y_test, axis=-1), np.argmax(predictions, axis=-1))

# Compute classification metrics
accuracy = accuracy_score(np.argmax(y_test, axis=-1), np.argmax(predictions, axis=-1))
precision = precision_score(np.argmax(y_test, axis=-1), np.argmax(predictions, axis=-1), average='macro')
recall = recall_score(np.argmax(y_test, axis=-1), np.argmax(predictions, axis=-1), average='macro')
f1 = f1_score(np.argmax(y_test, axis=-1), np.argmax(predictions, axis=-1), average='macro')

# Display the computed metrics
print('Accuracy:', accuracy.round(4))
print('Precision:', precision.round(4))
print('Recall:', recall.round(4))
print('F1:', f1.round(4))

# Plot the confusion matrix
plt.figure(figsize=(5, 4))
sns.heatmap(cm.T, xticklabels=list(re_label_map.values()), yticklabels=list(re_label_map.values()), cmap='Blues')
plt.xlabel('True labels')
plt.ylabel('Predicted labels')
plt.show()