In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Importing Libraries   

In [2]:
import os
import numpy as np

from tensorflow.keras.preprocessing.image import ImageDataGenerator

import tensorflow as tf
from tensorflow import keras
from keras import Sequential
from keras.layers import Dense, Conv2D, MaxPooling2D, Flatten, Dropout, BatchNormalization, ReLU, GlobalAveragePooling2D
from keras.regularizers import l2

from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

from tensorflow.keras.optimizers import Adam

# Configuration

In [3]:
COVID_DIR = "/kaggle/input/covid19-radiography-database/COVID-19_Radiography_Dataset/COVID"
NORMAL_DIR = "/kaggle/input/covid19-radiography-database/COVID-19_Radiography_Dataset/Normal"
TARGET_SIZE = (150, 150)
BATCH_SIZE = 32
VALIDATION_SPLIT = 0.2
EPOCHS = 30

# Data Generators

In [4]:
aug_gen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=10,
    zoom_range=0.1,
    width_shift_range=0.05,
    height_shift_range=0.05,
    shear_range=0.1,
    horizontal_flip=True,
    fill_mode='nearest',
    validation_split=VALIDATION_SPLIT
)

In [5]:
base_gen = ImageDataGenerator(
    rescale=1./255,
    validation_split=VALIDATION_SPLIT
)

In [6]:
train_covid = aug_gen.flow_from_directory(
    directory=COVID_DIR,
    classes=["images"],
    target_size=TARGET_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='binary',
    subset='training',
    shuffle=True
)

Found 2893 images belonging to 1 classes.


In [7]:
train_normal = aug_gen.flow_from_directory(
    directory=NORMAL_DIR,
    classes=["images"],
    target_size=TARGET_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='binary',
    subset='training',
    shuffle=True
)

Found 8154 images belonging to 1 classes.


In [8]:
val_covid = base_gen.flow_from_directory(
    directory=COVID_DIR,
    classes=["images"],
    target_size=TARGET_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='binary',
    subset='validation',
    shuffle=False
)

Found 723 images belonging to 1 classes.


In [9]:
val_normal = base_gen.flow_from_directory(
    directory=NORMAL_DIR,
    classes=["images"],
    target_size=TARGET_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='binary',
    subset='validation',
    shuffle=False
)

Found 2038 images belonging to 1 classes.


# Manual Labelling

In [10]:
def labeled_generator(generator, label):
    while True:
        x, _ = next(generator)
        y = np.full((x.shape[0],), label, dtype=np.float32)
        yield x, y

In [11]:
train_covid_labeled = labeled_generator(train_covid, 1.0)
train_normal_labeled = labeled_generator(train_normal, 0.0)
val_covid_labeled = labeled_generator(val_covid, 1.0)
val_normal_labeled = labeled_generator(val_normal, 0.0)

# Combined Generators

In [12]:
def combined_generator(gen1, gen2):
    while True:
        x1, y1 = next(gen1)
        x2, y2 = next(gen2)
        x = np.concatenate((x1, x2), axis=0)
        y = np.concatenate((y1, y2), axis=0)
        idx = np.random.permutation(len(x))
        yield x[idx], y[idx]

train_combined = combined_generator(train_covid_labeled, train_normal_labeled)
val_combined = combined_generator(val_covid_labeled, val_normal_labeled)

val_steps_balanced = min(len(val_covid) // BATCH_SIZE, len(val_normal) // BATCH_SIZE)

# Step Counts

In [13]:
train_steps = 8000 // (BATCH_SIZE * 2)
val_steps = 2000 // (BATCH_SIZE * 2)

# Model Building

In [14]:
model = Sequential()

model.add(Conv2D(32, (3, 3), padding='same', kernel_regularizer=l2(1e-4), input_shape=(150, 150, 3)))
model.add(BatchNormalization())
model.add(ReLU())
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(64, (3, 3), padding='same', kernel_regularizer=l2(1e-4)))
model.add(BatchNormalization())
model.add(ReLU())
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(128, (3, 3), padding='same', kernel_regularizer=l2(1e-4)))
model.add(BatchNormalization())
model.add(ReLU())
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(GlobalAveragePooling2D())

model.add(Dense(128, kernel_regularizer=l2(1e-4)))
model.add(BatchNormalization())
model.add(ReLU())
model.add(Dropout(0.5))

model.add(Dense(1, activation='sigmoid'))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [15]:
model.compile(optimizer=Adam(learning_rate=1e-4),
loss='binary_crossentropy',
metrics=['accuracy'])

In [16]:
early_stopping = EarlyStopping(patience=5, restore_best_weights=True, monitor='val_loss', verbose=1)

lr_reduce = ReduceLROnPlateau(
    monitor='val_loss', factor=0.5, patience=2, verbose=1, min_lr=1e-6
)

In [17]:
callbacks = [early_stopping, lr_reduce]

In [None]:
history = model.fit(
    train_combined,
    steps_per_epoch=train_steps,
    epochs=EPOCHS,
    validation_data=val_combined,
    validation_steps=val_steps_balanced,
    callbacks=callbacks
)

Epoch 1/30
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 802ms/step - accuracy: 0.6427 - loss: 0.7114

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Val Accuracy')
plt.title('Accuracy over Epochs')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.grid(True)

plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Val Loss')
plt.title('Loss over Epochs')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)

plt.tight_layout()
plt.show()


# Conclusion

***Achieved an accuracy of 87% on Train and Validation data, suggesting we are in a strong range for medical image classification, especially on a limited dataset.***