In [1]:
!pip install tensorflow keras opencv-python scikit-learn pandas numpy matplotlib




In [2]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("adityamahimkar/iqothnccd-lung-cancer-dataset")

print("Path to dataset files:", path)

Path to dataset files: /kaggle/input/iqothnccd-lung-cancer-dataset


In [3]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("justinkirby/the-cancer-imaging-archive-lidcidri")

print("Path to dataset files:", path)

Downloading from https://www.kaggle.com/api/v1/datasets/download/justinkirby/the-cancer-imaging-archive-lidcidri?dataset_version_number=1...


100%|██████████| 9.23M/9.23M [00:00<00:00, 120MB/s]

Extracting files...





Path to dataset files: /root/.cache/kagglehub/datasets/justinkirby/the-cancer-imaging-archive-lidcidri/versions/1


In [4]:
data_dir = '/kaggle/input/iqothnccd-lung-cancer-dataset/The IQ-OTHNCCD lung cancer dataset'


In [5]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

img_size = (224, 224)
batch_size = 32

train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    horizontal_flip=True,
    validation_split=0.2  # 20% for validation
)

# Training data
train_generator = train_datagen.flow_from_directory(
    data_dir,
    target_size=img_size,
    batch_size=batch_size,
    class_mode='categorical',
    subset='training',
    shuffle=True
)

# Validation data
val_generator = train_datagen.flow_from_directory(
    data_dir,
    target_size=img_size,
    batch_size=batch_size,
    class_mode='categorical',
    subset='validation',
    shuffle=False
)


Found 878 images belonging to 1 classes.
Found 219 images belonging to 1 classes.


In [6]:
print(train_generator.class_indices)


{'The IQ-OTHNCCD lung cancer dataset': 0}


In [7]:
test_dir = '/kaggle/input/iqothnccd-lung-cancer-dataset/Test cases'

test_datagen = ImageDataGenerator(rescale=1./255)

test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=img_size,
    batch_size=batch_size,
    class_mode='categorical',
    shuffle=False
)


Found 0 images belonging to 0 classes.


In [8]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

img_size = (224, 224)
batch_size = 32

datagen = ImageDataGenerator(
    rescale=1./255,
    validation_split=0.2  # 80% train, 20% validation
)

train_generator = datagen.flow_from_directory(
    data_dir,
    target_size=img_size,
    batch_size=batch_size,
    class_mode='categorical',
    subset='training',
    shuffle=True
)

val_generator = datagen.flow_from_directory(
    data_dir,
    target_size=img_size,
    batch_size=batch_size,
    class_mode='categorical',
    subset='validation',
    shuffle=False
)


Found 878 images belonging to 1 classes.
Found 219 images belonging to 1 classes.


In [9]:
data_dir = '/kaggle/input/iqothnccd-lung-cancer-dataset/The IQ-OTHNCCD lung cancer dataset/The IQ-OTHNCCD lung cancer dataset'


In [10]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

img_size = (224, 224)
batch_size = 32

datagen = ImageDataGenerator(
    rescale=1./255,
    validation_split=0.2  # 20% for validation
)

train_generator = datagen.flow_from_directory(
    data_dir,
    target_size=img_size,
    batch_size=batch_size,
    class_mode='categorical',
    subset='training',
    shuffle=True
)

val_generator = datagen.flow_from_directory(
    data_dir,
    target_size=img_size,
    batch_size=batch_size,
    class_mode='categorical',
    subset='validation',
    shuffle=False
)


Found 878 images belonging to 3 classes.
Found 219 images belonging to 3 classes.


In [11]:
print(train_generator.class_indices)


{'Bengin cases': 0, 'Malignant cases': 1, 'Normal cases': 2}


In [12]:
import tensorflow as tf
from tensorflow.keras.applications import DenseNet121
from tensorflow.keras.layers import Conv2D, Multiply, GlobalAveragePooling2D, Dense, Input, Softmax, Add
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam


In [13]:
def soft_attention_block(inputs):
    # 1x1 Conv to produce attention map
    attention = Conv2D(filters=inputs.shape[-1], kernel_size=(1,1), activation='softmax')(inputs)
    # Multiply input features by attention map
    attended = Multiply()([inputs, attention])
    # Optionally add original features (residual connection)
    output = Add()([inputs, attended])
    return output


In [14]:
input_shape = (224, 224, 3)
base_model = DenseNet121(weights='imagenet', include_top=False, input_shape=input_shape)

x = base_model.output
x = soft_attention_block(x)
x = GlobalAveragePooling2D()(x)
x = Dense(256, activation='relu')(x)
output = Dense(train_generator.num_classes, activation='softmax')(x)

model = Model(inputs=base_model.input, outputs=output)


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/densenet/densenet121_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m29084464/29084464[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [15]:
model.compile(
    optimizer=Adam(learning_rate=0.0001),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)


In [16]:
from tensorflow.keras.callbacks import EarlyStopping

early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

history = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=100,
    callbacks=[early_stop]
)


  self._warn_if_super_not_called()


Epoch 1/100
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m275s[0m 4s/step - accuracy: 0.7560 - loss: 0.5377 - val_accuracy: 0.8539 - val_loss: 0.5490
Epoch 2/100
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 311ms/step - accuracy: 0.9868 - loss: 0.0531 - val_accuracy: 0.8676 - val_loss: 0.4889
Epoch 3/100
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 305ms/step - accuracy: 0.9951 - loss: 0.0199 - val_accuracy: 0.7626 - val_loss: 0.6806
Epoch 4/100
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 362ms/step - accuracy: 0.9993 - loss: 0.0066 - val_accuracy: 0.7534 - val_loss: 0.7161
Epoch 5/100
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 305ms/step - accuracy: 0.9973 - loss: 0.0056 - val_accuracy: 0.7169 - val_loss: 0.8222
Epoch 6/100
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 304ms/step - accuracy: 1.0000 - loss: 0.0020 - val_accuracy: 0.7123 - val_loss: 0.9068
Epoch 7/100
[1m28/28

In [17]:
val_loss, val_acc = model.evaluate(val_generator)
print(f'Validation accuracy: {val_acc*100:.2f}%')


[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 131ms/step - accuracy: 0.7397 - loss: 0.8446
Validation accuracy: 86.76%


In [18]:
model.save('lung_cancer_attention_model.h5')




In [19]:
# Evaluate on validation or test set
loss, accuracy = model.evaluate(val_generator)
print(f"Validation accuracy: {accuracy*100:.2f}%")


[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 128ms/step - accuracy: 0.7397 - loss: 0.8446
Validation accuracy: 86.76%


In [22]:
import numpy as np
from sklearn.metrics import classification_report

# Get true labels and predictions
y_true = val_generator.classes
y_pred_probs = model.predict(val_generator)
y_pred = np.argmax(y_pred_probs, axis=1)

# Print classification report
print(classification_report(y_true, y_pred, target_names=list(train_generator.class_indices.keys())))


[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 162ms/step
                 precision    recall  f1-score   support

   Bengin cases       0.83      0.42      0.56        24
Malignant cases       1.00      1.00      1.00       112
   Normal cases       0.85      0.98      0.91        83

       accuracy                           0.93       219
      macro avg       0.90      0.80      0.82       219
   weighted avg       0.93      0.93      0.92       219



In [21]:
from sklearn.utils.class_weight import compute_class_weight
import numpy as np

class_weights = compute_class_weight(
    class_weight='balanced',
    classes=np.unique(train_generator.classes),
    y=train_generator.classes
)
class_weights = dict(enumerate(class_weights))
model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=100,
    callbacks=[early_stop],
    class_weight=class_weights
)


Epoch 1/100
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m200s[0m 3s/step - accuracy: 0.9878 - loss: 0.0592 - val_accuracy: 0.8447 - val_loss: 0.8434
Epoch 2/100
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 306ms/step - accuracy: 0.9686 - loss: 0.1067 - val_accuracy: 0.3836 - val_loss: 2.8632
Epoch 3/100
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 309ms/step - accuracy: 0.9952 - loss: 0.0613 - val_accuracy: 0.8813 - val_loss: 0.8816
Epoch 4/100
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 315ms/step - accuracy: 0.9927 - loss: 0.0371 - val_accuracy: 0.8493 - val_loss: 0.8908
Epoch 5/100
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 309ms/step - accuracy: 0.9991 - loss: 0.0134 - val_accuracy: 0.7032 - val_loss: 1.2388
Epoch 6/100
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 339ms/step - accuracy: 0.9920 - loss: 0.0158 - val_accuracy: 0.9087 - val_loss: 0.7932
Epoch 7/100
[1m28/28

<keras.src.callbacks.history.History at 0x7ab2b3c98310>