In [1]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adamax
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import classification_report
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
import warnings
warnings.filterwarnings("ignore")
import seaborn as sns
sns.set_style('darkgrid')
import matplotlib.pyplot as plt

print('modules loaded')


modules loaded


In [2]:
data_dir = r"D:\CAPSTONE 400A\cse438\lung_colon_image_set\lung_image_sets"
filepaths = []
labels = []

for root, dirs, files in os.walk(data_dir):
    for file in files:
        file_path = os.path.join(root, file)
        filepaths.append(file_path)
        label = os.path.basename(root)
        labels.append(label)

df = pd.DataFrame({'filepaths': filepaths, 'labels': labels})
labels = df['labels']
print(df.head())


                                           filepaths    labels
0  D:\CAPSTONE 400A\cse438\lung_colon_image_set\l...  lung_aca
1  D:\CAPSTONE 400A\cse438\lung_colon_image_set\l...  lung_aca
2  D:\CAPSTONE 400A\cse438\lung_colon_image_set\l...  lung_aca
3  D:\CAPSTONE 400A\cse438\lung_colon_image_set\l...  lung_aca
4  D:\CAPSTONE 400A\cse438\lung_colon_image_set\l...  lung_aca


In [3]:
batch_size = 32
img_size = (224, 224)
channels = 3
img_shape = (img_size[0], img_size[1], channels)
epochs = 3
k = 2  

def get_callbacks(model_name):
    callbacks = []
    checkpoint = ModelCheckpoint(filepath=f'model.{model_name}.h5', verbose=1, monitor='val_accuracy', mode='max')
    callbacks.append(checkpoint)
    reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=1e-6, verbose=1)
    callbacks.append(reduce_lr)
    early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True, verbose=1)
    callbacks.append(early_stopping)
    return callbacks

def build_model():
    base_model = tf.keras.applications.Xception(input_shape=img_shape, include_top=False, weights='imagenet')
    base_model.trainable = True
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dropout(0.55)(x)
    y = Dense(512, activation='relu')(x)

    y = Dropout(0.55)(y)
    predictions = Dense(class_count, activation='softmax', name='final')(y)
    model = Model(inputs=base_model.input, outputs=predictions)
    model.compile(optimizer=Adamax(0.001), loss='categorical_crossentropy', metrics=['accuracy'])
    return model
    #x = Dropout(0.5)(x)  
    #y = Dense(256, activation='relu')(x)
    #y = Dropout(0.5)(y)

In [4]:
skf = StratifiedKFold(n_splits=k, shuffle=True, random_state=123)
fold_no = 1
accuracies = []


In [None]:
for train_index, test_index in skf.split(df['filepaths'], df['labels']):
    train_df = df.iloc[train_index]
    test_df = df.iloc[test_index]

    tr_gen = ImageDataGenerator()
    ts_gen = ImageDataGenerator()

    train_gen = tr_gen.flow_from_dataframe(train_df, x_col='filepaths', y_col='labels',
                                           target_size=img_size, class_mode='categorical',
                                           color_mode='rgb', shuffle=True, batch_size=batch_size)

    test_gen = ts_gen.flow_from_dataframe(test_df, x_col='filepaths', y_col='labels',
                                          target_size=img_size, class_mode='categorical',
                                          color_mode='rgb', shuffle=False, batch_size=batch_size)

    g_dict = train_gen.class_indices
    classes = list(g_dict.keys())
    class_count = len(classes)

    model = build_model()
    callbacks = get_callbacks(f'Xception_fold_{fold_no}')

    history = model.fit(train_gen,
                        epochs=epochs,
                        validation_data=test_gen,
                        callbacks=callbacks)

    ts_length = len(test_df)
    test_batch_size = min(32, ts_length)
    test_steps = ts_length // test_batch_size

    test_score = model.evaluate(test_gen, steps=test_steps, verbose=1)
    print(f"Fold {fold_no} - Test Loss: ", test_score[0])
    print(f"Fold {fold_no} - Test Accuracy: ", test_score[1])

    preds = model.predict(test_gen)
    y_pred = np.argmax(preds, axis=1)
    print(f"Classification Report for Fold {fold_no}")
    print(classification_report(test_gen.classes, y_pred, target_names=classes))

    accuracies.append(test_score[1])
    fold_no += 1


Found 7500 validated image filenames belonging to 3 classes.
Found 7500 validated image filenames belonging to 3 classes.
Epoch 1/3
Epoch 1: saving model to model.Xception_fold_1.h5
Epoch 2/3
Epoch 2: saving model to model.Xception_fold_1.h5
Epoch 3/3
Epoch 3: saving model to model.Xception_fold_1.h5
Fold 1 - Test Loss:  0.004586650989949703
Fold 1 - Test Accuracy:  0.9987980723381042
Classification Report for Fold 1
              precision    recall  f1-score   support

    lung_aca       1.00      1.00      1.00      2500
      lung_n       1.00      1.00      1.00      2500
    lung_scc       1.00      1.00      1.00      2500

    accuracy                           1.00      7500
   macro avg       1.00      1.00      1.00      7500
weighted avg       1.00      1.00      1.00      7500

Found 7500 validated image filenames belonging to 3 classes.
Found 7500 validated image filenames belonging to 3 classes.
Epoch 1/3
 22/235 [=>............................] - ETA: 1:05:36 - loss: 0.

In [None]:
print(f'Mean accuracy over {k} folds: {np.mean(accuracies)}')
print(f'Standard Deviation over {k} folds: {np.std(accuracies)}')
