In [2]:
import pandas as pd
import numpy as np
import time
import math
import gc
import os
import PIL
from PIL import Image
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
import matplotlib.image as mpimg
import seaborn as sns
import skimage
from skimage.filters import sobel
from skimage import segmentation
from skimage.color import label2rgb
from skimage.color import rgb2hed , hed2rgb
from skimage.exposure import rescale_intensity
from skimage.measure import regionprops , regionprops_table
from scipy import ndimage as ndi
import tifffile as tifi
from sklearn.preprocessing import StandardScaler
from pathlib import Path
from datetime import datetime, timedelta
import openslide
from openslide import OpenSlide
import cv2 
import random
import copy
import pyarrow.parquet as pq
import pyarrow as pa
from dateutil.relativedelta import relativedelta
from tqdm import tqdm

from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dropout, Flatten, Dense, GlobalMaxPooling2D, GlobalAveragePooling2D, BatchNormalization
from tensorflow.keras.callbacks import LearningRateScheduler, EarlyStopping, Callback
from tensorflow.keras.applications import EfficientNetB0, EfficientNetB1, EfficientNetB2, EfficientNetB3, EfficientNetB5

import warnings
warnings.filterwarnings('ignore')

In [3]:
"""The purpose of setting the random seed is to ensure that the results of the program are 
reproducible. By setting the same seed every time, the random number generator will produce 
the same sequence of numbers, which is useful for debugging, testing, and comparing different 
models or algorithms.
"""
def seed_everything(seed):
    os.environ["PL_GLOBAL_SEED"] = str(seed)
    random.seed(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)
    
seed_everything(31)

These lines of code read in two CSV files, train.csv and test.csv, using the read_csv() function from the Pandas library. The ../input/mayo-clinic-strip-ai/ prefix before the file names suggests that the files are located in a directory or path within the project.

After reading in the files, the train_df and test_df variables store the contents of the files as Pandas dataframes. The .head() function is then called on train_df to display the first few rows of the dataframe, giving a preview of the data.

In [4]:
train_df = pd.read_csv('../input/mayo-clinic-strip-ai/train.csv')
test_df  = pd.read_csv('../input/mayo-clinic-strip-ai/test.csv')
train_df.head()

Unnamed: 0,image_id,center_id,patient_id,image_num,label
0,006388_0,11,006388,0,CE
1,008e5c_0,11,008e5c,0,CE
2,00c058_0,11,00c058,0,LAA
3,01adc5_0,11,01adc5,0,LAA
4,026c97_0,4,026c97,0,CE


These lines of code add three new columns to the train_df dataframe and one new column to the test_df dataframe:

"file_path" column in train_df: This column is created by applying a lambda function to the "image_id" column. The lambda function concatenates the image ID with a file path string to create the full file path of the corresponding image in the training dataset.

"file_path" column in test_df: This column is created in a similar way as the "file_path" column in train_df, but uses a different file path to access the test images.

"target" column in train_df: This column is created by applying another lambda function to the "label" column. The lambda function checks whether the label is "CE" and assigns a value of '1' if it is, and '0' otherwise.

In [5]:
train_df["file_path"] = train_df["image_id"].apply(lambda x: "../input/mayo-clinic-strip-ai-png-train-files/train_images/" + x + ".png")
test_df["file_path"] = test_df["image_id"].apply(lambda x: "../input/mayo-clinic-strip-ai/test/" + x + ".tif")
train_df["target"] = train_df["label"].apply(lambda x : '1' if x=="CE" else '0')
train_df.head()

Unnamed: 0,image_id,center_id,patient_id,image_num,label,file_path,target
0,006388_0,11,006388,0,CE,../input/mayo-clinic-strip-ai-png-train-files/...,1
1,008e5c_0,11,008e5c,0,CE,../input/mayo-clinic-strip-ai-png-train-files/...,1
2,00c058_0,11,00c058,0,LAA,../input/mayo-clinic-strip-ai-png-train-files/...,0
3,01adc5_0,11,01adc5,0,LAA,../input/mayo-clinic-strip-ai-png-train-files/...,0
4,026c97_0,4,026c97,0,CE,../input/mayo-clinic-strip-ai-png-train-files/...,1


In [6]:
test_df.head()

Unnamed: 0,image_id,center_id,patient_id,image_num,file_path
0,006388_0,11,006388,0,../input/mayo-clinic-strip-ai/test/006388_0.tif
1,008e5c_0,11,008e5c,0,../input/mayo-clinic-strip-ai/test/008e5c_0.tif
2,00c058_0,11,00c058,0,../input/mayo-clinic-strip-ai/test/00c058_0.tif
3,01adc5_0,11,01adc5,0,../input/mayo-clinic-strip-ai/test/01adc5_0.tif


In [7]:
train_df.shape

(754, 7)

In [8]:
train,test = train_test_split(train_df,test_size=0.2)
train.head()

Unnamed: 0,image_id,center_id,patient_id,image_num,label,file_path,target
393,83198b_0,9,83198b,0,CE,../input/mayo-clinic-strip-ai-png-train-files/...,1
311,63f341_0,11,63f341,0,CE,../input/mayo-clinic-strip-ai-png-train-files/...,1
165,369366_0,3,369366,0,CE,../input/mayo-clinic-strip-ai-png-train-files/...,1
105,23d2c1_0,7,23d2c1,0,CE,../input/mayo-clinic-strip-ai-png-train-files/...,1
10,03d1ec_0,11,03d1ec,0,LAA,../input/mayo-clinic-strip-ai-png-train-files/...,0


In [9]:
train.shape, test.shape

((603, 7), (151, 7))

These lines of code define an image data generator using the ImageDataGenerator class from the Keras library. This generator will be used to augment and preprocess the training and testing images before they are fed into a model.

The ImageDataGenerator is initialized with several parameters that specify the types of image augmentations to be performed, including rotation, horizontal and vertical shifts, horizontal flip, and brightness adjustment. These augmentations help to increase the diversity of the training images, which can improve the performance of the model.

The train_gen and test_gen variables are then created by calling the flow_from_dataframe() method on the dataframes train and test, respectively. These methods generate batches of images and labels by reading from the dataframes and applying the specified data augmentations.

In [10]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
datagen = ImageDataGenerator(
    rotation_range=10,  # rotation
    width_shift_range=0.2,  # horizontal shift
    height_shift_range=0.2,  # vertical shift
    horizontal_flip=True,  # horizontal flip
    brightness_range=[0.2, 1.2],
)  # brightness)

train_gen = datagen.flow_from_dataframe(
    train,
    #directory = '../input/jpg-images-strip-ai/train',
    x_col='file_path', 
    y_col='target', 
    target_size=(512, 512), 
    color_mode='rgb',
    class_mode='binary', 
    batch_size=32,
    seed = 31,
)

test_gen = datagen.flow_from_dataframe(
    test,
    #directory = '../input/jpg-images-strip-ai/train',
    x_col='file_path', 
    y_col='target', 
    target_size=(512, 512), 
    color_mode='rgb',
    class_mode='binary', 
    batch_size=1,
    shuffle = False
)

Found 601 validated image filenames belonging to 2 classes.
Found 151 validated image filenames belonging to 2 classes.


In [11]:
def step_decay(epoch):
    initial_lrate = 0.001
    drop = 0.5
    epochs_drop = 10.0
    lrate = initial_lrate * math.pow(drop, math.floor((epoch)/epochs_drop))
    return lrate

lrate = LearningRateScheduler(step_decay)
earstop = EarlyStopping(monitor = 'val_loss', min_delta = 0, patience = 3)

These lines of code define a function called model_MobileNetV2() that creates a classification model using the MobileNetV2 architecture from Keras.

The function first loads the MobileNetV2 model pre-trained on the ImageNet dataset and freezes all its layers so they are not updated during training.

Next, the function rebuilds the top layers of the model by adding a global average pooling layer to reduce the spatial dimensions of the feature maps, followed by batch normalization and dropout layers to prevent overfitting. Then, two dense layers with ReLU activation functions are added, and a final dense layer with a sigmoid activation function is added to produce a binary classification output.

Finally, the function compiles the model using the Adam optimizer with a given learning rate and binary crossentropy loss function, and sets the binary accuracy metric to evaluate the model's performance during training.

By creating this function, the code provides a simple and modular way to create a classification model using the MobileNetV2 architecture with customized top layers. The function can be easily adjusted by changing the hyperparameters, such as the learning rate or dropout rate, to fine-tune the model's performance.

In [21]:
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import GlobalAveragePooling2D, BatchNormalization, Dropout, Dense
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam


def model_MobileNetV2(lr=0.001, dr_rate=0.15):
    model = MobileNetV2(include_top=False, weights='imagenet')
    model.trainable = False

    # Rebuild top
    x = GlobalAveragePooling2D()(model.output)
    x = BatchNormalization()(x)
    x = Dropout(dr_rate)(x)
    dense_1 = Dense(64, activation="relu")(x)
    dense_2 = Dense(32, activation="relu")(dense_1)
    outputs = Dense(1, activation="sigmoid")(dense_2)

    # Compile
    model = Model(model.inputs, outputs, name="MobileNetV2")
    optimizer = Adam(learning_rate=lr)
    model.compile(
        optimizer=optimizer, loss="binary_crossentropy", metrics=["binary_accuracy"]
    )
    return model


In [26]:
model_MobileNetV2 = model_MobileNetV2()
model_MobileNetV2.summary()

Model: "MobileNetV2"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_4 (InputLayer)            [(None, None, None,  0                                            
__________________________________________________________________________________________________
Conv1 (Conv2D)                  (None, None, None, 3 864         input_4[0][0]                    
__________________________________________________________________________________________________
bn_Conv1 (BatchNormalization)   (None, None, None, 3 128         Conv1[0][0]                      
__________________________________________________________________________________________________
Conv1_relu (ReLU)               (None, None, None, 3 0           bn_Conv1[0][0]                   
________________________________________________________________________________________

These lines of code train the MobileNetV2 model using the fit() method of the Keras Model class. The fit() method trains the model on a given dataset, which in this case is the train_gen generator created earlier.

The epochs parameter specifies the number of training epochs to run, and the batch_size parameter sets the number of samples to use in each training batch.

The validation_data parameter is set to test_gen to use the testing dataset as the validation set. The verbose parameter is set to 1 to display progress bars during training.

The callbacks parameter is used to pass two callback functions, lrate and earstop, which are used for learning rate scheduling and early stopping during training, respectively.

In [23]:
history_0 = model_MobileNetV2.fit(
    train_gen,
    epochs = 5,
    batch_size=32,
    validation_data = test_gen,
    verbose = 1,
    callbacks = [lrate, earstop]
)



These lines of code use the trained MobileNetV2 model to make predictions on the testing dataset, using the predict() method of the Keras Model class.


In [24]:
# Find prediction for test_gen
from sklearn.metrics import accuracy_score
preds = model_MobileNetV2.predict(test_gen)

These lines of code compute the accuracy score of the MobileNetV2 model's predictions on the testing dataset, using the accuracy_score() function from the Scikit-learn library.

In [25]:
accuracy_score(test['target'].astype(int), np.round(preds).astype(int))

0.6622516556291391