In [64]:
# Ensure tensorflow is available in the notebook kernel
%pip install tensorflow -q

import os
# import pickle
# import random
# import imageio
import numpy as np
import pandas as pd
from tqdm import tqdm
from PIL import Image
# import matplotlib.cm as cm
# import matplotlib.pyplot as plt
# from sklearn.preprocessing import label_binarize
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
# from sklearn.metrics import confusion_matrix, roc_curve, auc, accuracy_score, roc_auc_score, classification_report
# import tensorflow as tf
# from tensorflow import keras
from keras.utils import Sequence, to_categorical
from keras.optimizers import Adam
# from keras import optimizers, metrics, layers, models, applications
# from keras.callbacks import ReduceLROnPlateau
from keras.models import load_model, Model, Sequential
# # from keras.preprocessing.image import ImageDataGenerator
from keras.layers import Input, Conv2D, MaxPooling2D, UpSampling2D, Flatten, Dense, GlobalAveragePooling2D, PReLU, Dropout, BatchNormalization
# from keras.applications import ResNet50

Note: you may need to restart the kernel to use updated packages.


In [65]:
# in this section, i have iterated through all the images in the covid and normal folder
# the images have been labelled if they are covid or normal, covid = 1 and normal = 0
# converted those images to RGB, then resize them and make them consistent size 
# turned those images into numpy arrays and printed their shape 
folder_path = r'C:\_Dhruti\Projects\data-visualisation-learning\data\raw'
types = ['covid', 'normal']

data = [] 
labels = []

for t in types:
    path = os.path.join(folder_path, t)
    label = 1 if t == 'covid' else 0
    for img_name in os.listdir(path):
        img_path = os.path.join(path, img_name)
        # print (img_path) 
        img = Image.open(img_path).convert('RGB')           # ensures 3 channels because not everytime there wil be RGB 
        img = img.resize((224, 224))                        # resizing to make it consistent 
        img_array = np.array(img) / 255.0                   # converting to array and normalizing
        data.append(img_array)                              # saving the image data
        labels.append(label)                                # saving the label 

data = np.array(data)
labels = np.array(labels)
print (f"Data shape: {data.shape},\n Labels shape: {labels.shape}")

Data shape: (94, 224, 224, 3),
 Labels shape: (94,)


In [66]:
# in this section, i have broken the data into 2 sets - train set and test set using
# the train_test_split function from sklearn
# and then convert the labels to categorise into 2 classes - covid and normal using to_categorical function from keras 
X_train, X_test, Y_train, Y_test = train_test_split(data, labels, test_size=0.2, random_state=42, stratify=labels)  # previous line was to split the dataset into train and test sets 
print("training data shape: ", X_train.shape)
print("test data shape: ", X_test.shape)

y_train_category = to_categorical(Y_train, num_classes=2)
y_test_category = to_categorical(Y_test, num_classes=2)

print(y_train_category[:5])

training data shape:  (75, 224, 224, 3)
test data shape:  (19, 224, 224, 3)
[[0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [1. 0.]]


In [84]:
model = Sequential([
    
    # Conv2D is the convolutional block; there are 32 filters #applied 3x3 to scan he image to the input shape 224x2224x3
    # relu makes the convolutional layer output non-negative and non-linear
    # input shape is from teh shape of data/images i have
    # First convolutional block
    Conv2D(32, (3,3), activation='relu', input_shape=(224,224,3)),

    MaxPooling2D((2,2)),
    # MaxPooling2D is the pooling layer which reduces the spatial dimensions of the feature maps by half 
    # halving the dimensions helps to reduce the number of parameters and computation in the model, which can help prevent overfitting and improve generalization
    # overfitting model means that the model performs well on the training data but poorly on unseen data
    # first convolutional block is the first layer of the model which extracts low-level features from the input images, such as edges and textures.

    # second and third convolutional blocks are deeper layers that extract more complex features, 
    # such as shapes and patterns, which are important for distinguishing between covid and normal images.
    # Second convolutional block
    Conv2D(64, (3,3), activation='relu'),
    MaxPooling2D((2,2)),
    
    # Third convolutional block
    Conv2D(128, (3,3), activation='relu'),
    MaxPooling2D((2,2)),
    
    # Flatten and fully connected layers
    Flatten(),
    # flatten function will convert the 3D feature maps into a 1D vector, which can be fed into the fully connected layers for classification.
    Dense(128, activation='relu'),
    Dropout(0.5),  # prevents overfitting

    # Dense and Dropout 
    # Dense means fully connected to flatten features - 128 neurons 
    # Dropout means randomly disables 50% of neurons during training -- prevents overfitting 
    # memorizing the training data instead of learning general patterns, which can lead to poor performance on unseen data.
    
    Dense(1, activation='sigmoid')  # binary classification
    # this dense has 1 layer which means it will output single value (binary classification) 
    # and sigmoid is outputting 1 or 0 which determines COVID or NORMAL 
    # sigmoid --> >= 0.5 then classified as COVID else NORMAL
])



  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [73]:
model.compile(
    optimizer=Adam(learning_rate=0.0001),
    loss='binary_crossentropy',   # for 2-class problems
    metrics=['accuracy']
)




In [74]:
history = model.fit(
    X_train, Y_train,           # training data
    validation_data=(X_test, Y_test),
    epochs=10,                  # start small; increase later
    batch_size=16               # adjust based on memory
)



Epoch 1/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 143ms/step - accuracy: 0.6133 - loss: 0.6970 - val_accuracy: 0.7368 - val_loss: 0.5589
Epoch 2/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 119ms/step - accuracy: 0.7333 - loss: 0.5710 - val_accuracy: 0.7368 - val_loss: 0.5264
Epoch 3/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 125ms/step - accuracy: 0.7600 - loss: 0.5180 - val_accuracy: 0.7368 - val_loss: 0.4745
Epoch 4/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 116ms/step - accuracy: 0.7333 - loss: 0.4608 - val_accuracy: 0.7368 - val_loss: 0.4304
Epoch 5/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 117ms/step - accuracy: 0.7467 - loss: 0.4819 - val_accuracy: 0.7368 - val_loss: 0.3658
Epoch 6/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 116ms/step - accuracy: 0.8133 - loss: 0.4018 - val_accuracy: 0.8947 - val_loss: 0.3031
Epoch 7/10
[1m5/5[0m [32m━━━━━━━━━━━━

In [75]:
loss, accuracy = model.evaluate(X_test, Y_test)
print(f"Test Accuracy: {accuracy*100:.2f}%")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step - accuracy: 1.0000 - loss: 0.1005
Test Accuracy: 100.00%
