In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/lungs-disease-dataset-4-types/Lung Disease Dataset/val/Tuberculosis/test_0_5161.jpeg
/kaggle/input/lungs-disease-dataset-4-types/Lung Disease Dataset/val/Tuberculosis/test_0_4769.jpeg
/kaggle/input/lungs-disease-dataset-4-types/Lung Disease Dataset/val/Tuberculosis/test_0_1443.jpeg
/kaggle/input/lungs-disease-dataset-4-types/Lung Disease Dataset/val/Tuberculosis/test_0_5261.jpeg
/kaggle/input/lungs-disease-dataset-4-types/Lung Disease Dataset/val/Tuberculosis/Tuberculosis-392.jpg
/kaggle/input/lungs-disease-dataset-4-types/Lung Disease Dataset/val/Tuberculosis/test_0_5266.jpeg
/kaggle/input/lungs-disease-dataset-4-types/Lung Disease Dataset/val/Tuberculosis/test_0_2134.jpeg
/kaggle/input/lungs-disease-dataset-4-types/Lung Disease Dataset/val/Tuberculosis/test_0_991.jpeg
/kaggle/input/lungs-disease-dataset-4-types/Lung Disease Dataset/val/Tuberculosis/test_0_1548.jpeg
/kaggle/input/lungs-disease-dataset-4-types/Lung Disease Dataset/val/Tuberculosis/Tuberculosis-176.jpg
/ka

In [2]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import EfficientNetB5, ResNet50, VGG16
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import classification_report
import numpy as np

In [3]:
train_dir = '/kaggle/input/lungs-disease-dataset-4-types/Lung Disease Dataset/train'
val_dir = '/kaggle/input/lungs-disease-dataset-4-types/Lung Disease Dataset/val'
test_dir = '/kaggle/input/lungs-disease-dataset-4-types/Lung Disease Dataset/test'

# Data augmentation and normalization for training
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Only rescaling for validation and test
val_test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical'
)

val_generator = val_test_datagen.flow_from_directory(
    val_dir,
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical'
)

test_generator = val_test_datagen.flow_from_directory(
    test_dir,
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical',
    shuffle=False
)

Found 6054 images belonging to 5 classes.
Found 2016 images belonging to 5 classes.
Found 2025 images belonging to 5 classes.


In [4]:
def build_efficientnetb5():
    base_model = EfficientNetB5(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dense(1024, activation='relu')(x)
    predictions = Dense(5, activation='softmax')(x)
    model = Model(inputs=base_model.input, outputs=predictions)
    return model

def build_resnet50():
    base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dense(1024, activation='relu')(x)
    predictions = Dense(5, activation='softmax')(x)
    model = Model(inputs=base_model.input, outputs=predictions)
    return model

def build_vgg16():
    base_model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dense(1024, activation='relu')(x)
    predictions = Dense(5, activation='softmax')(x)
    model = Model(inputs=base_model.input, outputs=predictions)
    return model

In [5]:
def train_model(model, train_generator, val_generator, model_name):
    model.compile(optimizer=Adam(learning_rate=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])
    early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
    history = model.fit(
        train_generator,
        steps_per_epoch=train_generator.samples // train_generator.batch_size,
        validation_data=val_generator,
        validation_steps=val_generator.samples // val_generator.batch_size,
        epochs=50,
        callbacks=[early_stopping]
    )
    model.save(f'{model_name}_lung_disease.h5')
    return history

# Train EfficientNetB5
effnet_model = build_efficientnetb5()
effnet_history = train_model(effnet_model, train_generator, val_generator, 'efficientnetb5')

# Train ResNet50
resnet_model = build_resnet50()
resnet_history = train_model(resnet_model, train_generator, val_generator, 'resnet50')

# Train VGG16
vgg16_model = build_vgg16()
vgg16_history = train_model(vgg16_model, train_generator, val_generator, 'vgg16')

Downloading data from https://storage.googleapis.com/keras-applications/efficientnetb5_notop.h5
[1m115263384/115263384[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Epoch 1/50


  self._warn_if_super_not_called()


[1m189/189[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m348s[0m 928ms/step - accuracy: 0.6856 - loss: 0.8272 - val_accuracy: 0.1984 - val_loss: 1.9264
Epoch 2/50
[1m189/189[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m90s[0m 695us/step - accuracy: 0.6667 - loss: 0.6274  
Epoch 3/50


  self.gen.throw(typ, value, traceback)
  current = self.get_monitor_value(logs)


[1m189/189[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m134s[0m 686ms/step - accuracy: 0.8562 - loss: 0.3485 - val_accuracy: 0.2545 - val_loss: 2.4863
Epoch 4/50
[1m189/189[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 230us/step - accuracy: 0.8750 - loss: 0.2244  
Epoch 5/50
[1m189/189[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m132s[0m 679ms/step - accuracy: 0.8784 - loss: 0.2955 - val_accuracy: 0.4603 - val_loss: 1.7926
Epoch 6/50
[1m189/189[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 224us/step - accuracy: 0.9062 - loss: 0.2783  
Epoch 7/50
[1m189/189[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m134s[0m 683ms/step - accuracy: 0.9027 - loss: 0.2501 - val_accuracy: 0.8165 - val_loss: 0.4950
Epoch 8/50
[1m189/189[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 232us/step - accuracy: 0.9062 - loss: 0.1736  
Epoch 9/50
[1m189/189[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m134s[0m 688ms/step - accuracy: 0.9096 - loss: 0.2158 - val_accuracy: 0

In [6]:
def evaluate_model(model, test_generator):
    test_generator.reset()
    preds = model.predict(test_generator, steps=test_generator.samples // test_generator.batch_size + 1)
    pred_classes = np.argmax(preds, axis=1)
    true_classes = test_generator.classes
    class_labels = list(test_generator.class_indices.keys())
    report = classification_report(true_classes, pred_classes, target_names=class_labels)
    return report

# Evaluate EfficientNetB5
effnet_report = evaluate_model(effnet_model, test_generator)
print("EfficientNetB5 Classification Report:\n", effnet_report)

# Evaluate ResNet50
resnet_report = evaluate_model(resnet_model, test_generator)
print("ResNet50 Classification Report:\n", resnet_report)

# Evaluate VGG16
vgg16_report = evaluate_model(vgg16_model, test_generator)
print("VGG16 Classification Report:\n", vgg16_report)

[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m53s[0m 673ms/step
EfficientNetB5 Classification Report:
                       precision    recall  f1-score   support

 Bacterial Pneumonia       0.87      0.64      0.74       403
Corona Virus Disease       0.98      0.98      0.98       407
              Normal       0.91      0.98      0.94       404
        Tuberculosis       1.00      0.99      0.99       408
     Viral Pneumonia       0.69      0.83      0.75       403

            accuracy                           0.88      2025
           macro avg       0.89      0.88      0.88      2025
        weighted avg       0.89      0.88      0.88      2025

[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 318ms/step
ResNet50 Classification Report:
                       precision    recall  f1-score   support

 Bacterial Pneumonia       0.87      0.61      0.72       403
Corona Virus Disease       0.99      0.99      0.99       407
              Normal       0