In [2]:
# Define paths to the training, validation, and test datasets
train_folder='./dataset/train'
test_folder='./dataset/test'
validate_folder = './dataset/valid'

In [3]:
# Define paths to the specific classes within the dataset
normal_folder = '/normal'
adenocarcinoma_folder = '/adenocarcinoma_left.lower.lobe_T2_N0_M0_Ib'
large_cell_carcinoma_folder = '/large.cell.carcinoma_left.hilum_T2_N2_M0_IIIa'
squamous_cell_carcinoma_folder = '/squamous.cell.carcinoma_left.hilum_T1_N2_M0_IIIa'

In [4]:
# Import necessary libraries
import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.decomposition import PCA
from sklearn.preprocessing import LabelEncoder

import tensorflow as tf
import tensorflow.keras
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, SpatialDropout2D, Activation, Lambda, Flatten, LSTM
from tensorflow.keras.layers import Conv2D, MaxPooling2D, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam, RMSprop
from tensorflow.keras import utils


In [5]:
print("Libraries Imported")

Libraries Imported


In [6]:
# Set the image size for resizing
IMAGE_SIZE = (350, 350)

In [7]:
# Initialize the image data generators for training and testing
print("Reading training images from:", train_folder)
print("Reading validation images from:", validate_folder)

Reading training images from: ./dataset/train
Reading validation images from: ./dataset/valid


In [8]:
train_datagen = ImageDataGenerator(rescale=1./255, horizontal_flip=True)
test_datagen = ImageDataGenerator(rescale=1./255)

In [9]:
# Define the batch size for training
batch_size = 8

In [10]:
# Create the training data generator
train_generator = train_datagen.flow_from_directory(
    train_folder,
    target_size=IMAGE_SIZE,
    batch_size=batch_size,
    color_mode="rgb",
    class_mode='categorical'
)

Found 613 images belonging to 4 classes.


In [11]:
# Create the validation data generator
validation_generator = test_datagen.flow_from_directory(
    test_folder,
    target_size=IMAGE_SIZE,
    batch_size=batch_size,
    color_mode="rgb",
    class_mode='categorical'
)

Found 315 images belonging to 4 classes.


In [12]:
# Set up callbacks for learning rate reduction, early stopping, and model checkpointing
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping, ModelCheckpoint
learning_rate_reduction = ReduceLROnPlateau(monitor='loss', patience=5, verbose=2, factor=0.5, min_lr=0.000001)
early_stops = EarlyStopping(monitor='loss', min_delta=0, patience=6, verbose=2, mode='auto')
checkpointer = ModelCheckpoint(filepath='best_model.weights.h5', verbose=2, save_best_only=True, save_weights_only=True)

In [13]:
# Define the number of output classes
OUTPUT_SIZE = 4

# Load a pre-trained model (Xception) without the top layers and freeze its weights
pretrained_model = tf.keras.applications.Xception(weights='imagenet', include_top=False, input_shape=[*IMAGE_SIZE, 3])
pretrained_model.trainable = False

# Create a new model with the pre-trained base and additional layers for classification
model = Sequential()
model.add(pretrained_model)
model.add(GlobalAveragePooling2D())
model.add(Dense(OUTPUT_SIZE, activation='softmax'))

print("Pretrained model used:")
pretrained_model.summary()

print("Final model created:")
model.summary()

# Compile the model with an optimizer, loss function, and evaluation metric
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

Pretrained model used:


Final model created:


In [15]:
print("Validation data samples:", len(validation_generator))


Validation data samples: 40


In [16]:
validation_steps = max(1, len(validation_generator) // batch_size)


In [17]:
print("Train data samples:", len(train_generator))


Train data samples: 77


KeyboardInterrupt: 

In [18]:
# Train the model with the training and validation data generators
history = model.fit(
    train_generator,
    steps_per_epoch=train_generator.samples // batch_size,  # Ensures complete batch usage
    epochs=50,
    callbacks=[learning_rate_reduction, checkpointer],
    validation_data=validation_generator,
    validation_steps=validation_generator.samples // batch_size  # Ensures complete batch usage
)

# Print the final training and validation accuracy safely
print("Final training accuracy =", history.history.get('accuracy', [0])[-1])
print("Final validation accuracy =", history.history.get('val_accuracy', [0])[-1])


Epoch 1/50
[1m76/76[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - accuracy: 0.5047 - loss: 1.1229
Epoch 1: val_loss improved from inf to 0.98084, saving model to best_model.weights.h5
[1m76/76[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m253s[0m 3s/step - accuracy: 0.5055 - loss: 1.1214 - val_accuracy: 0.5064 - val_loss: 0.9808 - learning_rate: 0.0010
Epoch 2/50
[1m 1/76[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m2:33[0m 2s/step - accuracy: 0.7500 - loss: 0.6963
Epoch 2: val_loss improved from 0.98084 to 0.54451, saving model to best_model.weights.h5
[1m76/76[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 29ms/step - accuracy: 0.7500 - loss: 0.6963 - val_accuracy: 0.6667 - val_loss: 0.5445 - learning_rate: 0.0010
Epoch 3/50
[1m76/76[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - accuracy: 0.6509 - loss: 0.8247
Epoch 3: val_loss did not improve from 0.54451
[1m76/76[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m243s[0m 3s/step - accuracy: