In [35]:
import pandas as pd
import numpy as np
import os
import tensorflow as tf
from tensorflow.keras.applications import Xception, InceptionResNetV2, MobileNetV2
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout, BatchNormalization
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.regularizers import l2
from tensorflow.keras.constraints import max_norm
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight
import matplotlib.pyplot as plt

In [36]:

# ---------------------- DATA PREPARATION ----------------------
# Load and prepare data
df = pd.read_csv('/kaggle/input/labels/dataset_with_image_links.csv')
available_subjects = [d for d in os.listdir('/kaggle/working/lidc_train_test') 
                    if os.path.isdir(os.path.join('/kaggle/working/lidc_train_test', d))]
df = df[df['Subject ID'].isin(available_subjects)]
df['Image_Link'] = df['Image_Link'].str.replace('/kaggle/input/lidcidri/LIDC-IDRI-slices/', '/kaggle/working/lidc_train_test/')

# Split data
train_df, temp_df = train_test_split(df, test_size=0.3, stratify=df['Cancer_Label'], random_state=42)
val_df, test_df = train_test_split(temp_df, test_size=0.5, stratify=temp_df['Cancer_Label'], random_state=42)

def create_image_df(df):
    image_paths = []
    labels = []
    for _, row in df.iterrows():
        image_dir = row['Image_Link']
        if not os.path.exists(image_dir):
            continue
        label = row['Cancer_Label']
        for file in os.listdir(image_dir):
            if file.endswith(('.png', '.jpg')):
                image_paths.append(os.path.join(image_dir, file))
                labels.append(label)
    return pd.DataFrame({'image_path': image_paths, 'label': labels})

train_images = create_image_df(train_df)
val_images = create_image_df(val_df)
test_images = create_image_df(test_df)


In [37]:
# ---------------------- CLASS BALANCING ----------------------
class_weights = compute_class_weight('balanced', 
                                   classes=np.unique(train_images['label']), 
                                   y=train_images['label'])
class_weights = dict(enumerate(class_weights))


In [38]:
# ---------------------- ENHANCED DATA AUGMENTATION ----------------------
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=40,
    width_shift_range=0.3,
    height_shift_range=0.3,
    shear_range=0.3,
    zoom_range=0.3,
    horizontal_flip=True,
    vertical_flip=True,
    fill_mode='nearest'
)

val_test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_dataframe(
    dataframe=train_images,
    x_col='image_path',
    y_col='label',
    target_size=(256, 256),
    batch_size=32,
    class_mode='raw',
    shuffle=True
)

val_generator = val_test_datagen.flow_from_dataframe(
    dataframe=val_images,
    x_col='image_path',
    y_col='label',
    target_size=(256, 256),
    batch_size=32,
    class_mode='raw',
    shuffle=False
)


Found 9837 validated image filenames.
Found 2067 validated image filenames.


In [39]:

# ---------------------- MODEL ARCHITECTURE ----------------------
def create_model(base_model, fine_tune=False):
    # Freeze base model initially
    base_model.trainable = False
    
    # Unfreeze top layers for fine-tuning
    if fine_tune:
        base_model.trainable = True
        for layer in base_model.layers[:-20]:
            layer.trainable = False
            
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dropout(0.6)(x)
    x = Dense(128, activation='relu', 
             kernel_regularizer=l2(0.01),
             kernel_constraint=max_norm(3))(x)
    x = BatchNormalization()(x)
    x = Dropout(0.6)(x)
    outputs = Dense(2, activation='softmax')(x)
    
    return Model(inputs=base_model.input, outputs=outputs)


In [40]:
def get_callbacks(model_name):
    return [
        EarlyStopping(monitor='val_accuracy', patience=5, restore_best_weights=True, mode='max'),
        ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=2, min_lr=1e-6),
        ModelCheckpoint(f'best_{model_name}.keras', save_best_only=True)  # Changed to .keras
    ]

In [41]:

# ---------------------- TRAINING PIPELINE ----------------------
def train_model(base_model, model_name):
    # Phase 1: Feature extraction
    model = create_model(base_model)
    model.compile(optimizer='adam',
                loss='sparse_categorical_crossentropy',
                metrics=['accuracy'])
    
    print(f"\nPhase 1: Feature Extraction ({model_name})")
    history = model.fit(
        train_generator,
        epochs=50,
        validation_data=val_generator,
        class_weight=class_weights,
        callbacks=get_callbacks(model_name)
    )
    
    # Phase 2: Fine-tuning
    print(f"\nPhase 2: Fine-Tuning ({model_name})")
    fine_tune_model = create_model(base_model, fine_tune=True)
    fine_tune_model.set_weights(model.get_weights())
    
    fine_tune_model.compile(
        optimizer=tf.keras.optimizers.Adam(1e-5),
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )
    
    history_fine = fine_tune_model.fit(
        train_generator,
        epochs=30,
        initial_epoch=history.epoch[-1],
        validation_data=val_generator,
        class_weight=class_weights,
        callbacks=get_callbacks(f'{model_name}_fine')
    )
    
    # Combine histories
    combined_history = {
        'accuracy': history.history['accuracy'] + history_fine.history['accuracy'],
        'val_accuracy': history.history['val_accuracy'] + history_fine.history['val_accuracy'],
        'loss': history.history['loss'] + history_fine.history['loss'],
        'val_loss': history.history['val_loss'] + history_fine.history['val_loss']
    }
    
    return fine_tune_model, combined_history


In [42]:

# ---------------------- MODEL TRAINING ----------------------
# Initialize base models with correct input shapes
xception_base = Xception(weights='imagenet', include_top=False, input_shape=(256, 256, 3))
inception_base = InceptionResNetV2(weights='imagenet', include_top=False, input_shape=(256, 256, 3))
mobilenet_base = MobileNetV2(weights='imagenet', include_top=False, input_shape=(224, 224, 3))  # Updated shape

# MobileNet-specific generator (224x224)
mobilenet_train_generator = train_datagen.flow_from_dataframe(
    dataframe=train_images,
    x_col='image_path',
    y_col='label',
    target_size=(224, 224),  # Adjusted size
    batch_size=32,
    class_mode='raw',
    shuffle=True
)

mobilenet_val_generator = val_test_datagen.flow_from_dataframe(
    dataframe=val_images,
    x_col='image_path',
    y_col='label',
    target_size=(224, 224),  # Adjusted size
    batch_size=32,
    class_mode='raw',
    shuffle=False
)
# Train models with appropriate generators
xception_model, xception_history = train_model(xception_base, 'Xception')
inception_model, inception_history = train_model(inception_base, 'InceptionResNetV2')
mobilenet_model, mobilenet_history = train_model(
    mobilenet_base, 
    'MobileNetV2',
    train_generator=mobilenet_train_generator,  # Pass custom generators
    val_generator=mobilenet_val_generator
)


Found 9837 validated image filenames.
Found 2067 validated image filenames.

Phase 1: Feature Extraction (Xception)
Epoch 1/50


  self._warn_if_super_not_called()


[1m308/308[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m165s[0m 493ms/step - accuracy: 0.5055 - loss: 3.1114 - val_accuracy: 0.2284 - val_loss: 2.0846 - learning_rate: 0.0010
Epoch 2/50
[1m308/308[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m142s[0m 451ms/step - accuracy: 0.5613 - loss: 1.6022 - val_accuracy: 0.2912 - val_loss: 1.4590 - learning_rate: 0.0010
Epoch 3/50
[1m308/308[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m142s[0m 451ms/step - accuracy: 0.5852 - loss: 1.1924 - val_accuracy: 0.4436 - val_loss: 1.1038 - learning_rate: 0.0010
Epoch 4/50
[1m308/308[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m140s[0m 444ms/step - accuracy: 0.5922 - loss: 1.0059 - val_accuracy: 0.1708 - val_loss: 1.2330 - learning_rate: 0.0010
Epoch 5/50
[1m308/308[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m141s[0m 449ms/step - accuracy: 0.5852 - loss: 0.9467 - val_accuracy: 0.7441 - val_loss: 0.8111 - learning_rate: 0.0010
Epoch 6/50
[1m308/308[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m

E0000 00:00:1745599511.786437      93 gpu_timer.cc:82] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup execution, please investigate in Nsight Systems.
E0000 00:00:1745599511.933810      93 gpu_timer.cc:82] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup execution, please investigate in Nsight Systems.
E0000 00:00:1745599512.722304      93 gpu_timer.cc:82] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup execution, please investigate in Nsight Systems.
E0000 00:00:1745599512.862132      93 gpu_timer.cc:82] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup execution, please investigate in Nsight Systems.
E0000 00:00:1745599513.216903      93 gpu_timer.cc:82] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup execution, please investigate in Nsight Systems.
E0000 00:0

[1m308/308[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m171s[0m 481ms/step - accuracy: 0.5765 - loss: 0.9061 - val_accuracy: 0.4906 - val_loss: 0.9310 - learning_rate: 1.0000e-05
Epoch 11/30
[1m308/308[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m144s[0m 457ms/step - accuracy: 0.5827 - loss: 0.8523 - val_accuracy: 0.5448 - val_loss: 0.8838 - learning_rate: 1.0000e-05
Epoch 12/30
[1m308/308[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m144s[0m 457ms/step - accuracy: 0.5921 - loss: 0.8283 - val_accuracy: 0.5385 - val_loss: 0.8866 - learning_rate: 1.0000e-05
Epoch 13/30
[1m308/308[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m144s[0m 458ms/step - accuracy: 0.5904 - loss: 0.8174 - val_accuracy: 0.4886 - val_loss: 0.9129 - learning_rate: 1.0000e-05
Epoch 14/30
[1m308/308[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m143s[0m 455ms/step - accuracy: 0.6073 - loss: 0.8127 - val_accuracy: 0.4640 - val_loss: 0.9281 - learning_rate: 2.0000e-06
Epoch 15/30
[1m308/308[0m [32m━━━━

TypeError: train_model() got an unexpected keyword argument 'train_generator'

In [None]:
# ---------------------- VISUALIZATION ----------------------
def plot_metrics(history, model_name):
    plt.figure(figsize=(12, 5))
    
    plt.subplot(1, 2, 1)
    plt.plot(history['accuracy'], label='Train')
    plt.plot(history['val_accuracy'], label='Validation')
    plt.title(f'{model_name} Accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend()
    
    plt.subplot(1, 2, 2)
    plt.plot(history['loss'], label='Train')
    plt.plot(history['val_loss'], label='Validation')
    plt.title(f'{model_name} Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    
    plt.tight_layout()
    plt.savefig(f'{model_name}_metrics.png')
    plt.show()

plot_metrics(xception_history, 'Xception')
plot_metrics(inception_history, 'InceptionResNetV2')
plot_metrics(mobilenet_history, 'MobileNetV2')

In [None]:
# ---------------------- MODEL EVALUATION ----------------------
def evaluate_model(model, generator):
    results = model.evaluate(generator)
    print(f"Loss: {results[0]:.4f} - Accuracy: {results[1]:.4f}")

print("\nXception Evaluation:")
evaluate_model(xception_model, val_generator)

print("\nInceptionResNetV2 Evaluation:")
evaluate_model(inception_model, val_generator)

print("\nMobileNetV2 Evaluation:")
evaluate_model(mobilenet_model, val_generator)

In [None]:
# ---------------------- MODEL SAVING ----------------------
xception_model.save('xception_finall.h5')
inception_model.save('inception_finall.h5')
mobilenet_model.save('mobilenet_finall.h5')