In [None]:
# Fix randomness and hide warnings
seed = 42

import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
os.environ['PYTHONHASHSEED'] = str(seed)
os.environ['MPLCONFIGDIR'] = os.getcwd()+'/configs/'

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=Warning)

import numpy as np
np.random.seed(seed)

import logging

import random
random.seed(seed)

# Import tensorflow
import tensorflow as tf
from tensorflow import keras as tfk
from tensorflow.keras import layers as tfkl
from tensorflow.keras.applications.inception_v3 import InceptionV3, preprocess_input
tf.autograph.set_verbosity(0)
tf.get_logger().setLevel(logging.ERROR)
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
tf.random.set_seed(seed)
tf.compat.v1.set_random_seed(seed)
from tensorflow.keras.optimizers import Adam

print(tf.__version__)

# Import other libraries
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
plt.rc('font', size=16)
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, confusion_matrix


In [None]:
dataset=np.load('/kaggle/input/input-data/clean_data.npz',allow_pickle=True)
data = dataset['data']
labels = dataset['labels']

In [None]:
print('Counting occurrences of target classes:')
print(pd.DataFrame(labels, columns=['state'])['state'].value_counts())
setsize = labels.shape[0]
print('\nTotal number of samples : ', setsize)

In [None]:
# Encode the output labels to [1,0], [0,1]
labels_map= {'healthy': 0, 'unhealthy':1}
#labels_2=np.array
for i in range(setsize):
  a=labels[i]
  labels[i]=labels_map[a]
y = tfk.utils.to_categorical(labels)

# Normalized data to the range [0, 1]
X = (data).astype('uint8')

In [None]:
# Split data into train_val and test sets
X_train_val, X_test, y_train_val, y_test = train_test_split(X, y, random_state=seed, test_size=0.1, stratify=np.argmax(y,axis=1))

# Further split train_val into train and validation sets
X_train, X_val, y_train, y_val = train_test_split(X_train_val, y_train_val, random_state=seed, test_size=0.1, stratify=np.argmax(y_train_val,axis=1))

In [None]:
# define key model parameters
input_shape = X_train.shape[1:]  # Input shape for the model
output_shape = y_train.shape[1]  # Output shape for the model
batch_size = 128                # Batch size for training ------> need to experiment this
epochs = 200                     # Number of training epochs

# Print the defined parameters
print("Epochs:", epochs)
print("Batch Size:", batch_size)
print("Input Shape:", input_shape)
print("Output Shape:", output_shape)

In [None]:
base_model=InceptionV3(
    input_shape=(96,96,3),
    include_top=False,
    weights='imagenet'
)

#making layers non trainable
for layer in base_model.layers:
  layer.trainable=False

In [None]:
x1=tfkl.Flatten()(base_model.output)
x2=tfkl.Dense(units=output_shape,activation='relu')(x1)
x3=tfkl.Dropout(0.2)(x2)
x4=tfkl.Dense(units=output_shape,activation='sigmoid')(x3)

model=tfk.Model(base_model.input,x4)
model.compile(loss=tfk.losses.CategoricalCrossentropy(),optimizer=Adam(), metrics=['accuracy'])
#model.summary()

In [None]:
# Train the model
early_stopping = tfk.callbacks.EarlyStopping(monitor='val_accuracy', patience=10, mode='max', restore_best_weights=True)

# Train the model and save its history
history = model.fit(
    x=preprocess_input(X_train),
    y=y_train,
    batch_size=batch_size,
    epochs=epochs,
    validation_data=(preprocess_input(X_val), y_val),
    callbacks=[early_stopping]
).history

In [None]:
# Find the epoch with the highest validation accuracy
best_epoch = np.argmax(history['val_accuracy'])

# Plot training and validation performance metrics
plt.figure(figsize=(20, 5))

# Plot training and validation loss
plt.plot(history['loss'], label='Training', alpha=0.8, color='#ff7f0e', linewidth=3)
plt.plot(history['val_loss'], label='Validation', alpha=0.8, color='#4D61E2', linewidth=3)
plt.legend(loc='upper left')
plt.title('Categorical Crossentropy')
plt.grid(alpha=0.3)

plt.figure(figsize=(20, 5))

# Plot training and validation accuracy, highlighting the best epoch
plt.plot(history['accuracy'], label='Training', alpha=0.8, color='#ff7f0e', linewidth=3)
plt.plot(history['val_accuracy'], label='Validation', alpha=0.8, color='#4D61E2', linewidth=3)
plt.plot(best_epoch, history['val_accuracy'][best_epoch], marker='*', alpha=0.8, markersize=10, color='#4D61E2')
plt.legend(loc='upper left')
plt.title('Accuracy')
plt.grid(alpha=0.3)

plt.show()

In [None]:
#make inference
# Predict labels for the entire test set
predictions = model.predict(preprocess_input(X_test), verbose=0)

# Display the shape of the predictions
print("Predictions Shape:", predictions.shape)

In [None]:
# Compute the confusion matrix
cm = confusion_matrix(np.argmax(y_test, axis=-1), np.argmax(predictions, axis=-1))

# Compute classification metrics
accuracy = accuracy_score(np.argmax(y_test, axis=-1), np.argmax(predictions, axis=-1))
precision = precision_score(np.argmax(y_test, axis=-1), np.argmax(predictions, axis=-1), average='macro')
recall = recall_score(np.argmax(y_test, axis=-1), np.argmax(predictions, axis=-1), average='macro')
f1 = f1_score(np.argmax(y_test, axis=-1), np.argmax(predictions, axis=-1), average='macro')

# Display the computed metrics
print('Accuracy:', accuracy.round(4))
print('Precision:', precision.round(4))
print('Recall:', recall.round(4))
print('F1:', f1.round(4))

# Plot the confusion matrix
plt.figure(figsize=(5, 4))
sns.heatmap(cm.T, xticklabels=list(re_label_map.values()), yticklabels=list(re_label_map.values()), cmap='Blues')
plt.xlabel('True labels')
plt.ylabel('Predicted labels')
plt.show()