In [None]:
# -*- coding: utf-8 -*-
"""UNet_Classification.ipynb"""

import tensorflow.keras as keras
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, UpSampling2D, concatenate, Dense, Dropout, GlobalAveragePooling2D
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from tensorflow.keras import optimizers
import tensorflow as tf
from sklearn.metrics import classification_report, confusion_matrix, roc_curve, auc
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import cv2
import os

from google.colab import drive
drive.mount('/content/drive')

# Paths
train_path = "/content/drive/MyDrive/Data/train"
valid_path = "/content/drive/MyDrive/Data/valid"
test_path = "/content/drive/MyDrive/Data/test"

# Constants
IMAGE_SIZE = 224
N_CLASSES = 4
BATCH_SIZE = 32

# Data generators
train_datagen = ImageDataGenerator(
    rescale=1./255.,
    rotation_range=10,
    width_shift_range=0.3,
    height_shift_range=0.3,
    shear_range=0.2,
    zoom_range=0.1,
    horizontal_flip=True,
    vertical_flip=True,
    dtype='float32'
)
train_generator = tf.keras.preprocessing.image_dataset_from_directory(
    train_path,
    shuffle=True,
    image_size=(IMAGE_SIZE, IMAGE_SIZE),
    batch_size=BATCH_SIZE,
    color_mode="grayscale"
)

valid_datagen = ImageDataGenerator(dtype='float32', rescale=1./255.)
valid_generator = tf.keras.preprocessing.image_dataset_from_directory(
    valid_path,
    shuffle=True,
    image_size=(IMAGE_SIZE, IMAGE_SIZE),
    batch_size=BATCH_SIZE,
    color_mode="grayscale"
)

test_datagen = ImageDataGenerator(dtype='float32', rescale=1.0/255.0)
test_generator = tf.keras.preprocessing.image_dataset_from_directory(
    test_path,
    shuffle=True,
    image_size=(IMAGE_SIZE, IMAGE_SIZE),
    batch_size=BATCH_SIZE,
    color_mode="grayscale"
)

test_class_names = test_generator.class_names

# Convert grayscale to RGB function
def convert_to_rgb(images):
    return tf.image.grayscale_to_rgb(images) if images.shape[-1] == 1 else images

# Build U-Net model for classification
def build_unet_model(input_shape=(IMAGE_SIZE, IMAGE_SIZE, 3), n_classes=N_CLASSES):
    inputs = Input(input_shape)

    # Encoder (Downsampling)
    c1 = Conv2D(64, (3, 3), activation='relu', padding='same')(inputs)
    c1 = Conv2D(64, (3, 3), activation='relu', padding='same')(c1)
    p1 = MaxPooling2D((2, 2))(c1)

    c2 = Conv2D(128, (3, 3), activation='relu', padding='same')(p1)
    c2 = Conv2D(128, (3, 3), activation='relu', padding='same')(c2)
    p2 = MaxPooling2D((2, 2))(c2)

    c3 = Conv2D(256, (3, 3), activation='relu', padding='same')(p2)
    c3 = Conv2D(256, (3, 3), activation='relu', padding='same')(c3)
    p3 = MaxPooling2D((2, 2))(c3)

    c4 = Conv2D(512, (3, 3), activation='relu', padding='same')(p3)
    c4 = Conv2D(512, (3, 3), activation='relu', padding='same')(c4)
    p4 = MaxPooling2D((2, 2))(c4)

    # Bottleneck
    c5 = Conv2D(1024, (3, 3), activation='relu', padding='same')(p4)
    c5 = Conv2D(1024, (3, 3), activation='relu', padding='same')(c5)

    # Decoder (Upsampling)
    u6 = UpSampling2D((2, 2))(c5)
    u6 = concatenate([u6, c4])
    c6 = Conv2D(512, (3, 3), activation='relu', padding='same')(u6)
    c6 = Conv2D(512, (3, 3), activation='relu', padding='same')(c6)

    u7 = UpSampling2D((2, 2))(c6)
    u7 = concatenate([u7, c3])
    c7 = Conv2D(256, (3, 3), activation='relu', padding='same')(u7)
    c7 = Conv2D(256, (3, 3), activation='relu', padding='same')(c7)

    u8 = UpSampling2D((2, 2))(c7)
    u8 = concatenate([u8, c2])
    c8 = Conv2D(128, (3, 3), activation='relu', padding='same')(u8)
    c8 = Conv2D(128, (3, 3), activation='relu', padding='same')(c8)

    u9 = UpSampling2D((2, 2))(c8)
    u9 = concatenate([u9, c1])
    c9 = Conv2D(64, (3, 3), activation='relu', padding='same')(u9)
    c9 = Conv2D(64, (3, 3), activation='relu', padding='same')(c9)

    # Classification head
    x = GlobalAveragePooling2D()(c9)
    x = Dense(128, activation='relu')(x)
    x = Dropout(0.5)(x)
    outputs = Dense(n_classes, activation='softmax')(x)

    model = Model(inputs, outputs)
    return model

# Build and compile the model
model = build_unet_model(input_shape=(IMAGE_SIZE, IMAGE_SIZE, 3))
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# Callbacks
checkpointer = ModelCheckpoint('chestmodel_unet.keras', verbose=1, save_best_only=True)
early_stopping = EarlyStopping(monitor='val_loss', patience=10)

# Train the model
history = model.fit(
    train_generator.map(lambda x, y: (convert_to_rgb(x), y)),
    epochs=5,
    validation_data=valid_generator.map(lambda x, y: (convert_to_rgb(x), y)),
    callbacks=[checkpointer, early_stopping]
)

# Evaluate the model
train_result = model.evaluate(train_generator.map(lambda x, y: (convert_to_rgb(x), y)))
print(f'Training Loss: {train_result[0]}, Training Accuracy: {train_result[1]}')

test_result = model.evaluate(test_generator.map(lambda x, y: (convert_to_rgb(x), y)))
print(f'Test Loss: {test_result[0]}, Test Accuracy: {test_result[1]}')

# Predictions and Evaluation
y_true = []
for _, labels in test_generator:
    y_true.extend(labels.numpy())
y_true = np.array(y_true)

y_pred = model.predict(test_generator.map(lambda x, y: (convert_to_rgb(x), y)))
y_pred_classes = np.argmax(y_pred, axis=1)

class_names = ['Adenocarcinoma', 'Squamous Cell Carcinoma', 'Large Cell Carcinoma', 'Small Cell Lung Cancer']

print("Classification Report:")
print(classification_report(y_true, y_pred_classes, target_names=class_names))

conf_matrix = confusion_matrix(y_true, y_pred_classes)
plt.figure(figsize=(10, 7))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues',
            xticklabels=class_names, yticklabels=class_names)
plt.ylabel('Actual')
plt.xlabel('Predicted')
plt.title('Confusion Matrix')
plt.show()

# ROC Curves
fpr = {}
tpr = {}
roc_auc = {}
for i in range(N_CLASSES):
    fpr[i], tpr[i], _ = roc_curve(y_true == i, y_pred[:, i])
    roc_auc[i] = auc(fpr[i], tpr[i])

plt.figure()
for i in range(N_CLASSES):
    plt.plot(fpr[i], tpr[i], label='ROC curve (area = %0.2f) for class %s' % (roc_auc[i], class_names[i]))
plt.plot([0, 1], [0, 1], 'k--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic')
plt.legend(loc="lower right")
plt.show()

# Single image prediction function
def predict(model, img):
    img_array = tf.keras.preprocessing.image.img_to_array(img)
    img_array = convert_to_rgb(img_array)
    img_array = tf.image.resize(img_array, (IMAGE_SIZE, IMAGE_SIZE))
    img_array = tf.expand_dims(img_array, 0)
    predictions = model.predict(img_array)
    predicted_class = test_class_names[np.argmax(predictions[0])]
    confidence = round(100 * (np.max(predictions[0])), 2)
    return predicted_class, confidence

# CT Scan analysis with U-Net
classifier = tf.keras.models.load_model('chestmodel_unet.keras')
class_labels = ['adenocarcinoma', 'large_cell_carcinoma', 'normal', 'squamous_cell_carcinoma']

# Define the model's testing accuracy (replace with actual value)
MODEL_ACCURACY = 85.0  # Example: 85% accuracy, update with your model's test accuracy

# Function to preprocess the image for prediction
def preprocess_for_prediction(img):
    img = cv2.resize(img, (224, 224))
    img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
    img = img.astype('float32')
    img = img / 255.0  # Simple rescaling since U-Net doesn't use MobileNet preprocessing
    img = np.expand_dims(img, axis=0)
    return img

# Load the CT scan image
image_path = '/content/drive/MyDrive/Data/test/adenocarcinoma/000109 (2).png'
image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)

if image is None:
    print("Error: Unable to load the image. Please check the file format and path.")
else:
    blurred = cv2.GaussianBlur(image, (5, 5), 0)
    _, binary = cv2.threshold(blurred, 50, 255, cv2.THRESH_BINARY)
    contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    output_image = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)
    predictions = []

    for contour in contours:
        area = cv2.contourArea(contour)
        if area > 100:
            x, y, w, h = cv2.boundingRect(contour)
            cv2.rectangle(output_image, (x, y), (x + w, y + h), (0, 255, 0), 2)
            cropped_image = image[y:y + h, x:x + w]
            preprocessed_image = preprocess_for_prediction(cropped_image)
            confidence_scores = classifier.predict(preprocessed_image)[0]
            predicted_class_index = np.argmax(confidence_scores)
            predicted_class = class_labels[predicted_class_index]
            confidence = round(100 * confidence_scores[predicted_class_index], 2)
            predictions.append((predicted_class, confidence))
            cv2.putText(output_image, f"{predicted_class} ({confidence}%)", (x, y - 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2)

    plt.figure(figsize=(10, 5))
    plt.subplot(1, 2, 1)
    plt.title('Original CT Scan')
    plt.imshow(image, cmap='gray')
    plt.axis('off')

    plt.subplot(1, 2, 2)
    plt.title('Detected Areas with Predictions')
    plt.imshow(output_image)
    plt.axis('off')
    plt.show()

    # Print individual predictions
    for i, (pred_class, conf) in enumerate(predictions):
        print(f'Detected area {i + 1}: {pred_class} with confidence {conf}%')

    # Detection logic incorporating model accuracy
    if predictions:
        reliable_predictions = [(p, c) for p, c in predictions if c >= MODEL_ACCURACY]

        if reliable_predictions:
            final_pred_class, final_conf = max(reliable_predictions, key=lambda x: x[1])
            print(f'Final prediction result (considering {MODEL_ACCURACY}% model accuracy): '
                  f'{final_pred_class} with confidence {final_conf}%')

            cancer_types = ['adenocarcinoma', 'large_cell_carcinoma', 'squamous_cell_carcinoma']
            if final_pred_class.lower() in cancer_types:
                print(f"Cancer detected: {final_pred_class} with confidence {final_conf}% "
                      f"(model accuracy: {MODEL_ACCURACY}%)")
            else:
                print(f"No cancer detected: {final_pred_class} with confidence {final_conf}% "
                      f"(model accuracy: {MODEL_ACCURACY}%)")

            print(f"The image is classified as: {final_pred_class} with confidence {final_conf}%")
        else:
            print(f"No reliable predictions above model accuracy threshold ({MODEL_ACCURACY}%)")
            print("The image type cannot be confidently determined due to low confidence scores.")
    else:
        print("No significant areas detected.")
        print("The image type cannot be determined due to no detectable areas.")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Found 613 files belonging to 4 classes.
Found 72 files belonging to 4 classes.
Found 315 files belonging to 4 classes.
Epoch 1/5
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 208s/step - accuracy: 0.2517 - loss: 318.8781  
Epoch 1: val_loss improved from inf to 1.37874, saving model to chestmodel_unet.keras
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4343s[0m 216s/step - accuracy: 0.2524 - loss: 309.8278 - val_accuracy: 0.1806 - val_loss: 1.3787
Epoch 2/5
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 208s/step - accuracy: 0.2762 - loss: 1.3565  
Epoch 2: val_loss improved from 1.37874 to 1.34420, saving model to chestmodel_unet.keras
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4315s[0m 215s/step - accuracy: 0.2768 - loss: 1.3560 - val_accuracy: 0.5000 - val_loss: 1.3442
Epoch 3/5
[1m20/20[0m [

In [None]:
from google.colab import drive
drive.mount('/content/drive')