In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:

# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES
# TO THE CORRECT LOCATION (/kaggle/input) IN YOUR NOTEBOOK,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.

import os
import sys
from tempfile import NamedTemporaryFile
from urllib.request import urlopen
from urllib.parse import unquote, urlparse
from urllib.error import HTTPError
from zipfile import ZipFile
import tarfile
import shutil

CHUNK_SIZE = 40960
DATA_SOURCE_MAPPING = 'chest-ctscan-images:https%3A%2F%2Fstorage.googleapis.com%2Fkaggle-data-sets%2F839140%2F1432479%2Fbundle%2Farchive.zip%3FX-Goog-Algorithm%3DGOOG4-RSA-SHA256%26X-Goog-Credential%3Dgcp-kaggle-com%2540kaggle-161607.iam.gserviceaccount.com%252F20240422%252Fauto%252Fstorage%252Fgoog4_request%26X-Goog-Date%3D20240422T203048Z%26X-Goog-Expires%3D259200%26X-Goog-SignedHeaders%3Dhost%26X-Goog-Signature%3D63cc7109f58b33eba1e8c1f3773af4c1fae053d3e00a591da1e16d7f8c93f91c6298c27a7aca59b27ec60a44976adb26c98153a5ee65db7f60c7d2c23ae3afb6596e18769d6f791359e3a42e9f6bc9531c81dd2ec53e618b1a9b4cd7273d9f6cd9a0b2db7e42d5ea99df79f537e342dfc9e6cf100c1d09145436f339bb67b66daa1e09540b81558a8a656bada950b2eaea534d10b137348a3a47ba3d98419afd3defca6170c0b95312cd58061d923cb098bd404c8767c1875fef0a1890c6d79add700c319ac6475a816facf156df68bc4b9a4add2aad669aed7e4c00c983ddf373cad1732ddb6e13a7cf4ec94be87ff0b38f6018449eff14ecc7c716b4594153'

KAGGLE_INPUT_PATH='/kaggle/input'
KAGGLE_WORKING_PATH='/kaggle/working'
KAGGLE_SYMLINK='kaggle'

!umount /kaggle/input/ 2> /dev/null
shutil.rmtree('/kaggle/input', ignore_errors=True)
os.makedirs(KAGGLE_INPUT_PATH, 0o777, exist_ok=True)
os.makedirs(KAGGLE_WORKING_PATH, 0o777, exist_ok=True)

try:
  os.symlink(KAGGLE_INPUT_PATH, os.path.join("..", 'input'), target_is_directory=True)
except FileExistsError:
  pass
try:
  os.symlink(KAGGLE_WORKING_PATH, os.path.join("..", 'working'), target_is_directory=True)
except FileExistsError:
  pass

for data_source_mapping in DATA_SOURCE_MAPPING.split(','):
    directory, download_url_encoded = data_source_mapping.split(':')
    download_url = unquote(download_url_encoded)
    filename = urlparse(download_url).path
    destination_path = os.path.join(KAGGLE_INPUT_PATH, directory)
    try:
        with urlopen(download_url) as fileres, NamedTemporaryFile() as tfile:
            total_length = fileres.headers['content-length']
            print(f'Downloading {directory}, {total_length} bytes compressed')
            dl = 0
            data = fileres.read(CHUNK_SIZE)
            while len(data) > 0:
                dl += len(data)
                tfile.write(data)
                done = int(50 * dl / int(total_length))
                sys.stdout.write(f"\r[{'=' * done}{' ' * (50-done)}] {dl} bytes downloaded")
                sys.stdout.flush()
                data = fileres.read(CHUNK_SIZE)
            if filename.endswith('.zip'):
              with ZipFile(tfile) as zfile:
                zfile.extractall(destination_path)
            else:
              with tarfile.open(tfile.name) as tarfile:
                tarfile.extractall(destination_path)
            print(f'\nDownloaded and uncompressed: {directory}')
    except HTTPError as e:
        print(f'Failed to load (likely expired) {download_url} to path {destination_path}')
        continue
    except OSError as e:
        print(f'Failed to load {download_url} to path {destination_path}')
        continue

print('Data source import complete.')


Failed to load (likely expired) https://storage.googleapis.com/kaggle-data-sets/839140/1432479/bundle/archive.zip?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=gcp-kaggle-com%40kaggle-161607.iam.gserviceaccount.com%2F20240422%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20240422T203048Z&X-Goog-Expires=259200&X-Goog-SignedHeaders=host&X-Goog-Signature=63cc7109f58b33eba1e8c1f3773af4c1fae053d3e00a591da1e16d7f8c93f91c6298c27a7aca59b27ec60a44976adb26c98153a5ee65db7f60c7d2c23ae3afb6596e18769d6f791359e3a42e9f6bc9531c81dd2ec53e618b1a9b4cd7273d9f6cd9a0b2db7e42d5ea99df79f537e342dfc9e6cf100c1d09145436f339bb67b66daa1e09540b81558a8a656bada950b2eaea534d10b137348a3a47ba3d98419afd3defca6170c0b95312cd58061d923cb098bd404c8767c1875fef0a1890c6d79add700c319ac6475a816facf156df68bc4b9a4add2aad669aed7e4c00c983ddf373cad1732ddb6e13a7cf4ec94be87ff0b38f6018449eff14ecc7c716b4594153 to path /kaggle/input/chest-ctscan-images
Data source import complete.


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sn
import skimage.io
import keras.backend as K
import tensorflow as tf
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization, Activation
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import ReLU, LeakyReLU
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.metrics import Accuracy
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ReduceLROnPlateau, ModelCheckpoint, EarlyStopping
from sklearn.metrics import confusion_matrix, accuracy_score

In [None]:
gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True)

In [None]:
train_path = "/content/drive/MyDrive/Data/train"
valid_path = "/content/drive/MyDrive/Data/valid"
test_path = "/content/drive/MyDrive/Data/test"

In [None]:
datagen = ImageDataGenerator()

In [None]:
train_dataset = datagen.flow_from_directory(
    directory=train_path,
    target_size=(48, 48),
    class_mode="categorical",
    batch_size=64
)

Found 613 images belonging to 4 classes.


In [None]:
valid_dataset = datagen.flow_from_directory(
    directory=valid_path,
    target_size=(48, 48),
    class_mode="categorical",
    batch_size=64
)

Found 72 images belonging to 4 classes.


In [None]:
test_dataset = datagen.flow_from_directory(
    directory=test_path,
    target_size=(48, 48),
    class_mode="categorical",
    batch_size=64
)

Found 315 images belonging to 4 classes.


In [None]:
'''
model = Sequential([
    Conv2D(32, (3, 3), input_shape=(48, 48, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Conv2D(128, (3, 3), activation='relu'),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.25),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    Dense(4, activation='softmax')  # Output for 4-class classification
])'''

"\nmodel = Sequential([\n    Conv2D(32, (3, 3), input_shape=(48, 48, 3), activation='relu'),\n    MaxPooling2D((2, 2)),\n    Conv2D(64, (3, 3), activation='relu'),\n    MaxPooling2D((2, 2)),\n    Conv2D(128, (3, 3), activation='relu'),\n    Flatten(),\n    Dense(128, activation='relu'),\n    Dropout(0.25),\n    Conv2D(64, (3, 3), activation='relu'),\n    MaxPooling2D((2, 2)),\n    Conv2D(64, (3, 3), activation='relu'),\n    Dense(4, activation='softmax')  # Output for 4-class classification\n])"

In [None]:
from tensorflow.keras import regularizers

In [None]:
from tensorflow.keras.callbacks import ReduceLROnPlateau
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.3, patience=3, min_lr=1e-6)

In [None]:
model = Sequential([
    Conv2D(32, (3, 3), input_shape=(48, 48, 3), activation='relu', kernel_regularizer=regularizers.l2(0.001)),
    BatchNormalization(),
    MaxPooling2D((2, 2)),

    Conv2D(64, (3, 3), activation='relu', kernel_regularizer=regularizers.l2(0.001)),
    BatchNormalization(),
    Conv2D(64, (3, 3), activation='relu', kernel_regularizer=regularizers.l2(0.001)),
    BatchNormalization(),
    MaxPooling2D((2, 2)),

    Conv2D(128, (3, 3), activation='relu', kernel_regularizer=regularizers.l2(0.001)),
    BatchNormalization(),

    Flatten(),
    Dense(128, activation='relu', kernel_regularizer=regularizers.l2(0.001)),
    Dropout(0.4),  # Adjusted dropout rate

    Dense(4, activation='softmax')
])

In [None]:
model.compile(
    loss="categorical_crossentropy",
    optimizer=Adam(learning_rate=0.0001),  # Adjusted learning rate
    metrics=["accuracy"]
)

In [None]:
print("Length of train_dataset:", len(train_dataset))

print("Length of valid_dataset:", len(valid_dataset))

Length of train_dataset: 10
Length of valid_dataset: 2


In [None]:
'''
callbacks = [
    EarlyStopping(monitor="val_loss", patience=5, restore_best_weights=True)
]'''

'\ncallbacks = [\n    EarlyStopping(monitor="val_loss", patience=5, restore_best_weights=True)\n]'

In [None]:
import numpy as np
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping

# Get the class labels from the train dataset
class_labels = np.unique(train_dataset.classes)

# Calculate class weights
class_counts = np.bincount(train_dataset.classes)
class_weights = {i: len(train_dataset.classes) / (len(class_labels) * class_counts[i]) for i in range(len(class_labels))}

# Use class weights in model.fit
history = model.fit(
    train_dataset,
    validation_data=valid_dataset,
    epochs=30,
    class_weight=class_weights,  # Pass the class weights here
    callbacks=[ReduceLROnPlateau(monitor='val_loss', factor=0.3, patience=3, min_lr=1e-6),
               EarlyStopping(monitor="val_loss", patience=5, restore_best_weights=True)],
    verbose=1
)




Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [None]:
model.evaluate(test_dataset)



[1.2955355644226074, 0.7015873193740845]

In [None]:
y_pred_prob = model.predict(test_dataset)
y_pred = np.argmax(y_pred_prob, axis=1)

y_true = test_dataset.classes

conf_matrix = confusion_matrix(y_true, y_pred)
print('Confusion Matrix:')
print(conf_matrix)

Confusion Matrix:
[[47 13 24 36]
 [18  8 10 15]
 [26  7  7 14]
 [29 12 25 24]]


In [None]:
from sklearn.metrics import classification_report
class_names = ['adenocarcinoma','large.cell.carcinoma','normal','squamous.cell.carcinoma']
report = classification_report(y_true, y_pred, target_names=class_names)
print(report)

                         precision    recall  f1-score   support

         adenocarcinoma       0.39      0.39      0.39       120
   large.cell.carcinoma       0.20      0.16      0.18        51
                 normal       0.11      0.13      0.12        54
squamous.cell.carcinoma       0.27      0.27      0.27        90

               accuracy                           0.27       315
              macro avg       0.24      0.24      0.24       315
           weighted avg       0.28      0.27      0.27       315

