In [5]:
import os
import numpy as np
import pandas as pd
from sklearn.model_selection import KFold
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import Input, Dense, Flatten, BatchNormalization
from tensorflow.keras import models,layers
from tensorflow.keras.optimizers import RMSprop  # Import the legacy RMSprop optimizer
from tensorflow.keras.utils import to_categorical

# Set the path to your dataset
dataset_path = r'E:\Downloads\lung_cls3'


In [6]:
#cross validation code
# Data preprocessing
train_datagen = ImageDataGenerator(
    rescale=1./255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    validation_split=0.2
)

train_generator = train_datagen.flow_from_directory(
    dataset_path,
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical',
    subset='training'
)

validation_generator = train_datagen.flow_from_directory(
    dataset_path,
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical',
    subset='validation'
)


Found 5523 images belonging to 3 classes.
Found 1379 images belonging to 3 classes.


In [14]:
import os
import numpy as np
import pandas as pd
from sklearn.model_selection import KFold
from sklearn.metrics import roc_auc_score
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.models import Sequential

# Set the path to your dataset
dataset_path = r'E:\Downloads\lung_cls3'

# Data preprocessing
train_datagen = ImageDataGenerator(
    rescale=1./255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    validation_split=0.2
)

# Load data using the generator
train_generator = train_datagen.flow_from_directory(
    dataset_path,
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical',
    subset='training'
)

# Initialize KFold cross-validation
n_splits = 5
kf = KFold(n_splits=n_splits, shuffle=True)

# Initialize lists to store results from each fold
accuracies = []

# Define the model
model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(224, 224, 3)))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Flatten())
model.add(Dense(512, activation='relu'))
model.add(Dense(3, activation='softmax'))  # Adjust the output size based on your classes

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Loop through each fold
for fold, (train_index, val_index) in enumerate(kf.split(train_generator)):
    print(f"Fold {fold + 1}/{n_splits}")
    
    # Set up new data generators for this fold
    train_generator_fold = train_datagen.flow_from_directory(
        dataset_path,
        target_size=(224, 224),
        batch_size=32,
        class_mode='categorical',
        subset='training',
        shuffle=False
    )

    val_generator_fold = train_datagen.flow_from_directory(
        dataset_path,
        target_size=(224, 224),
        batch_size=32,
        class_mode='categorical',
        subset='validation',
        shuffle=False
    )

    # Train the model with this fold's data
    history = model.fit(
        train_generator_fold,
        epochs=1,
        validation_data=val_generator_fold,
        steps_per_epoch=len(train_generator_fold),
        validation_steps=len(val_generator_fold)
    )

    # Evaluate the model on the test set for this fold
    test_generator_fold = train_datagen.flow_from_directory(
        dataset_path,
        target_size=(224, 224),
        batch_size=32,
        class_mode='categorical',
        shuffle=False
    )
    test_accuracy = model.evaluate(test_generator_fold)[1]
    accuracies.append(test_accuracy)

# Calculate the mean and standard deviation of accuracies
mean_accuracy = np.mean(accuracies)
std_accuracy = np.std(accuracies)

# Print the mean and standard deviation of accuracies
print(f"Mean Accuracy: {mean_accuracy:.4f} ± {std_accuracy:.4f}")




Found 5523 images belonging to 3 classes.
Fold 1/5
Found 5523 images belonging to 3 classes.
Found 1379 images belonging to 3 classes.
Found 6902 images belonging to 3 classes.
Fold 2/5
Found 5523 images belonging to 3 classes.
Found 1379 images belonging to 3 classes.
Found 6902 images belonging to 3 classes.
Fold 3/5
Found 5523 images belonging to 3 classes.
Found 1379 images belonging to 3 classes.
Found 6902 images belonging to 3 classes.
Fold 4/5
Found 5523 images belonging to 3 classes.
Found 1379 images belonging to 3 classes.
Found 6902 images belonging to 3 classes.
Fold 5/5
Found 5523 images belonging to 3 classes.
Found 1379 images belonging to 3 classes.
Found 6902 images belonging to 3 classes.
Mean Accuracy: 0.6429 ± 0.1034


In [1]:
# Build the CNN model
model = models.Sequential()

model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(224, 224, 3)))
model.add(layers.MaxPooling2D((2, 2)))

model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))

model.add(layers.Conv2D(128, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))

model.add(layers.Conv2D(128, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))

model.add(layers.Flatten())
model.add(layers.Dense(512, activation='relu'))
model.add(layers.Dense(3, activation='softmax'))  # Adjust the output size based on your classes

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model with steps_per_epoch set to 100
epochs = 100
history = model.fit(
    train_generator,
    steps_per_epoch=len(train_generator),
    epochs=epochs,
    validation_data=validation_generator,
    validation_steps=len(validation_generator)
)

# Evaluate the model
test_generator = train_datagen.flow_from_directory(
    dataset_path,
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical',
    subset='validation',
    shuffle=False
)

# Calculate accuracy
test_accuracy = model.evaluate(test_generator)[1]
print("Test Accuracy:", test_accuracy)

# Save the model
model.save('cnn_model.h5')



Found 5523 images belonging to 3 classes.
Found 1379 images belonging to 3 classes.



Epoch 1/100


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/

  saving_api.save_model(


In [3]:
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score

# ... (previous code)

# Evaluate the model
test_generator = train_datagen.flow_from_directory(
    dataset_path,
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical',
    subset='validation',
    shuffle=False
)

# Get true labels
y_true = test_generator.classes

# Get predicted probabilities
y_pred_probabilities = model.predict(test_generator)

# Get predicted labels
y_pred = np.argmax(y_pred_probabilities, axis=1)

# Calculate accuracy
test_accuracy = model.evaluate(test_generator)[1]
print("Test Accuracy:", test_accuracy)

# Print Confusion Matrix
print("Confusion Matrix:")
print(confusion_matrix(y_true, y_pred))

# Print Classification Report
print("\nClassification Report:")
print(classification_report(y_true, y_pred))

# Print AUC Score
print("\nAUC Score:")
print(roc_auc_score(to_categorical(y_true), y_pred_probabilities))


Found 1379 images belonging to 3 classes.
Test Accuracy: 0.8375634551048279
Confusion Matrix:
[[437  15   3]
 [ 20 442   0]
 [  9 177 276]]

Classification Report:
              precision    recall  f1-score   support

           0       0.94      0.96      0.95       455
           1       0.70      0.96      0.81       462
           2       0.99      0.60      0.74       462

    accuracy                           0.84      1379
   macro avg       0.87      0.84      0.83      1379
weighted avg       0.87      0.84      0.83      1379


AUC Score:
0.8966987816497086
