In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import tensorflow as tf
from PIL import Image
from keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, ReLU, MaxPool2D, GlobalAvgPool2D
from tensorflow.keras.layers import Input, Add, ZeroPadding2D, Activation, BatchNormalization, Flatten, Conv2D, AveragePooling2D, GlobalMaxPooling2D, GlobalAveragePooling2D
from tensorflow.keras.initializers import glorot_uniform
from tensorflow.keras.models import Model
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder

# Data Preprocessing

In [None]:
supp_dir = '/kaggle/input/ubc-ovarian-cancer-competition-supplemental-masks'
data_dir = '/kaggle/input/UBC-OCEAN'

train_csv = pd.read_csv(data_dir + '/train.csv')
test_csv = pd.read_csv(data_dir + '/test.csv')

In [None]:
#filter for WSI
train_csv = train_csv[train_csv['is_tma'] == False]
train_data, val_data = train_test_split(train_csv, test_size=0.2, random_state=42)

#image paths
train_image_paths = [data_dir + '/train_thumbnails/' + str(img_id) + '_thumbnail.png' for img_id in train_data['image_id']]
val_image_paths = [data_dir + '/train_thumbnails/' + str(img_id) + '_thumbnail.png' for img_id in val_data['image_id']]
test_image_paths = [data_dir + '/test_thumbnails/' + str(img_id) + '_thumbnail.png' for img_id in test_csv['image_id']]

#multi-class classification: encoding labels for model (one-hot encoding)
one_hot_encoder = OneHotEncoder(sparse_output=False)

# Reshape the labels to a 2D array before applying OneHotEncoder
train_labels = np.array(train_data['label'])
val_labels = np.array(val_data['label'])

train_labels_reshaped = train_labels.reshape(-1, 1)
val_labels_reshaped = val_labels.reshape(-1, 1)

train_labels_one_hot = one_hot_encoder.fit_transform(train_labels_reshaped)
val_labels_one_hot = one_hot_encoder.transform(val_labels_reshaped)


#print(test_labels_one_hot)
#print(val_labels_one_hot)

In [None]:
#(1) feature scales first (all pixel values are now between 0 and 1), image augmentation transforms (shear_range, zoom_range, horizontal_flip) to prevent overfitting
datagen = ImageDataGenerator(rescale = 1./255,
                                   shear_range = 0.2,
                                   #zoom_range = 0.2,
                                   horizontal_flip = True
                            )

def load_and_augment_img(img_path):
    img = Image.open(img_path)
    img = img.resize((224, 224))  # Resize to desired dimensions
    img = np.array(img)  # Convert to numpy array
    img = img.reshape((1,) + img.shape)  # Reshape to (1, height, width, channels) for flow()
    img = datagen.flow(img, batch_size=1).next()  # Apply data augmentation
    return img[0]

# Apply data augmentation to training, validation, and test images
train_images_augmented = [load_and_augment_img(path) for path in train_image_paths]
val_images_augmented = [load_and_augment_img(path) for path in val_image_paths]
test_images_augmented = [load_and_augment_img(path) for path in test_image_paths]

In [None]:
import matplotlib.pyplot as plt
def visualize(image):
    plt.figure(figsize=(10, 10))
    plt.axis('off')
    plt.imshow(image)

In [None]:
from IPython.display import Image
Image(filename=train_image_paths[0], width=768, height=768) 

In [None]:
visualize(train_images_augmented[0])

# Define ResNet-50 Architecture

In [None]:
unique_classes = np.unique(train_labels)
uni_classes = list(unique_classes)
length = len(unique_classes)

print(uni_classes)

In [None]:
input_tensor = (224, 224, 3)
NUMBER_OF_CLASSES = 5

In [None]:
# DEFINE THE RESNET-50 ARCHITECTURE *************************************************************

def conv_batchnorm_relu(x, filters, kernel_size, strides=1):
    x = Conv2D(filters=filters, kernel_size=kernel_size, strides=strides, padding = 'same')(x)
    x = BatchNormalization()(x)
    x = ReLU()(x)
    return x

def identity_block(tensor, filters):
    x = conv_batchnorm_relu(tensor, filters=filters, kernel_size=1, strides=1)
    x = conv_batchnorm_relu(x, filters=filters, kernel_size=3, strides=1)
    x = Conv2D(filters=4*filters, kernel_size=1, strides=1)(x)
    x = BatchNormalization()(x)
    x = Add()([tensor,x]) 
    x = ReLU()(x)
    return x

def projection_block(tensor, filters, strides):
    x = conv_batchnorm_relu(tensor, filters=filters, kernel_size=1, strides=strides)     
    x = conv_batchnorm_relu(x, filters=filters, kernel_size=3, strides=1)     
    x = Conv2D(filters=4*filters, kernel_size=1, strides=1)(x)     
    x = BatchNormalization()(x) 
    shortcut = Conv2D(filters=4*filters, kernel_size=1, strides=strides)(tensor)     
    shortcut = BatchNormalization()(shortcut)          
    x = Add()([shortcut,x])       
    x = ReLU()(x)          
    return x 
    
def resnet_block(x, filters, reps, strides):
    x = projection_block(x, filters, strides)
    for _ in range(reps-1):
        x = identity_block(x,filters)
    return x 

In [None]:
input = Input(shape=input_tensor)


x = conv_batchnorm_relu(input, filters=64, kernel_size=7, strides=2)
x = MaxPool2D(pool_size=3, strides=2)(x)
x = resnet_block(x, filters=64, reps=3, strides=1)
x = resnet_block(x, filters=128, reps=4, strides=2)
x = resnet_block(x, filters=256, reps=6, strides=2)
x = resnet_block(x, filters=512, reps=3, strides=2)
x = GlobalAvgPool2D()(x)


output = Dense(NUMBER_OF_CLASSES, activation ='softmax')(x)


model = Model(inputs=input, outputs=output)

In [None]:
model.summary()

In [None]:
'''
from tensorflow.keras.utils import plot_model

plot_model(model)
'''
# NOTE: Dont run this cell unless you want to visually see a graph of network

In [None]:
from tensorflow.keras.optimizers import Adam
#from tensorflow.keras.callbacks import EarlyStopping

model.compile(optimizer=Adam(learning_rate=0.001), loss="categorical_crossentropy", metrics=['accuracy'])

results = model.fit(
    np.array(train_images_augmented), train_labels_one_hot,
    epochs=15, 
    batch_size=32,
    validation_data=(np.array(val_images_augmented), val_labels_one_hot ),
)



In [None]:
#SUBMISSION FILE

pred = model.predict(np.array(test_images_augmented))
test = np.argmax(pred,axis=1)
predicted_labels = [uni_classes[i] for i in test]

submission = [[test_csv["image_id"][i], predicted_labels[i]] for i in range(len(test_csv)) ]
df = pd.DataFrame(submission,columns = ["image_id","label"])

df.to_csv("submission.csv", index=False)

#PLOTTING ACCURACY AND LOSS GRAPHS
fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(10, 4))

axes[0].plot(results.history['accuracy'], label = 'Training')
axes[0].plot(results.history['val_accuracy'], label = 'Validation')

axes[0].set_title("Model accuracy")
axes[0].set_xlabel('Epoch')
axes[0].set_ylabel('Accuracy rate')

axes[0].legend()  

axes[1].plot(results.history['loss'], label = 'Training')
axes[1].plot(results.history['val_loss'], label = 'Validation')

axes[1].set_title("Model loss")
axes[1].set_xlabel('Epoch')
axes[1].set_ylabel('Loss')

training_accuracy = results.history['accuracy']
validation_accuracy = results.history['val_accuracy']

overall_training_accuracy = sum(training_accuracy) / len(training_accuracy)
overall_validation_accuracy = sum(validation_accuracy) / len(validation_accuracy)

print(f"Overall Training Accuracy: {overall_training_accuracy}%")
print(f"Overall Validation Accuracy: {overall_validation_accuracy}%")

plt.legend()  
plt.show()