# Import all the libraries

In [37]:
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
import cv2
import gc
import os
import tensorflow as tf
from openslide import open_slide
from openslide.deepzoom import DeepZoomGenerator
from tensorflow.keras import layers
from tensorflow.keras import applications
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import matplotlib.pyplot as plt

In [38]:
inp_size=224
model_path = 'MODELS/new_final_effB0_res152_vgg19_stacked.h5'

# Hyper Parameters to be Tuned

In [39]:
epochs=40
batch_size=32
learning_rate = 1e-3
optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)  #adam or sgd

# Fixed Parameters and paths

In [40]:
TRAIN_DATASET_DIR = "train_dataset"

#--------------------
classMode = 'categorical'
loss = 'categorical_crossentropy'
val_split = 0.2
Num_tiles_to_consider_for_prediction = 3
# validation_steps= 10
# steps_per_epoch = 10
# val_batch_size=16

# Load and compile the models

In [45]:
loaded_model = tf.keras.models.load_model(model_path)
loaded_model.summary()



In [47]:
loaded_model.compile(
        optimizer=optimizer,
        loss=loss,
        metrics=['accuracy'])

In [43]:
# Data Augmentation & Preprocessing
datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=45, 
    validation_split=val_split,
    width_shift_range=0.3,
    height_shift_range=0.3,
    brightness_range=[0.5,2.0],
    horizontal_flip=True,
    vertical_flip=True
)

print(TRAIN_DATASET_DIR)
train_generator = datagen.flow_from_directory(
    TRAIN_DATASET_DIR,
    target_size=(inp_size, inp_size),
    batch_size=batch_size,
    shuffle=True,
    class_mode=classMode,
    subset='training',
)

validation_generator = datagen.flow_from_directory(
    TRAIN_DATASET_DIR,
    target_size=(inp_size, inp_size),
    batch_size=batch_size,
    shuffle=True,
    class_mode=classMode,
    subset='validation',
)

class_indices = train_generator.class_indices
print(class_indices)

train_dataset
Found 3587 images belonging to 2 classes.
Found 895 images belonging to 2 classes.
{'CE': 0, 'LAA': 1}


In [28]:
  # Train Model
history = loaded_model.fit(
    train_generator,
    validation_data=validation_generator,
    # steps_per_epoch=steps_per_epoch,
    # validation_steps=validation_steps,
    epochs=epochs,
    verbose=1
)

  self._warn_if_super_not_called()


Epoch 1/40
[1m113/113[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1529s[0m 13s/step - accuracy: 0.5582 - loss: 0.8117 - val_accuracy: 0.5855 - val_loss: 0.6791
Epoch 2/40
[1m113/113[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1530s[0m 14s/step - accuracy: 0.6309 - loss: 0.6479 - val_accuracy: 0.5855 - val_loss: 0.6647
Epoch 3/40
[1m113/113[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1513s[0m 13s/step - accuracy: 0.6638 - loss: 0.6304 - val_accuracy: 0.5966 - val_loss: 0.6552
Epoch 4/40
[1m113/113[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1513s[0m 13s/step - accuracy: 0.6554 - loss: 0.6155 - val_accuracy: 0.6592 - val_loss: 0.6007
Epoch 5/40
[1m113/113[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1518s[0m 13s/step - accuracy: 0.6593 - loss: 0.6175 - val_accuracy: 0.6156 - val_loss: 0.6223
Epoch 6/40
[1m113/113[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1462s[0m 13s/step - accuracy: 0.6691 - loss: 0.6093 - val_accuracy: 0.5933 - val_loss: 0.6552
Epoch 7/40

KeyboardInterrupt: 

In [29]:

plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.legend()
plt.title("Model Accuracy Over Epochs")
plt.show()


NameError: name 'history' is not defined

In [51]:
loaded_model.save(f'{model_path}_trained.keras')

# model testing

In [10]:
def make_test_file(x):
    return os.path.join(test_imgs_dir,x+'.tif')
test = pd.read_csv(test_csv)
test_data = pd.DataFrame({'image_id': test.image_id.apply(make_test_file)})
test_data.head()

Unnamed: 0,image_id
0,/kaggle/input/mayo-clinic-strip-ai/test/006388...
1,/kaggle/input/mayo-clinic-strip-ai/test/008e5c...
2,/kaggle/input/mayo-clinic-strip-ai/test/00c058...
3,/kaggle/input/mayo-clinic-strip-ai/test/01adc5...


In [11]:
preds=[]
for x in range(int(test_data.size)):
    img_path = test_data.image_id[x]
    slide = open_slide(img_path)
    tiles=DeepZoomGenerator(slide,tile_size=inp_size,overlap=0,limit_bounds=False)
    cols,rows = tiles.level_tiles[tiles.level_count-1]
    print(x)
    temp_preds=[]
    count=0
    
    for row in range(0,rows,5):
        for col in range(0,cols,5):
            tile=tiles.get_tile(tiles.level_count-1,(col,row))
            tile=tile.convert("RGB")
            tile=np.array(tile)
            try:
                if tile.mean()<180 and tile.std()>50:                    
                    tile = np.reshape(tile, [1,inp_size, inp_size, 3])
                    p=[i.predict(tile/255) for i in models]
                    t_p = sum(p)/len(p)                    
                    temp_preds.append(t_p)
                    count+=1
                    if count>Num_tiles_to_consider_for_prediction:break
            except :
                pass        
            if count>Num_tiles_to_consider_for_prediction:break
    if len(temp_preds) > 0:
        preds.append(sum(temp_preds)/len(temp_preds))
    else:
        preds.append([[0.5,0.5]])
    del slide
    del tiles
    gc.collect()

0
1
2
3


In [12]:
preds

[array([[0.49939284, 0.50060713]], dtype=float32),
 array([[0.54945433, 0.45054564]], dtype=float32),
 array([[0.4477892, 0.5522108]], dtype=float32),
 array([[0.42307842, 0.57692164]], dtype=float32)]

In [13]:
preds = pd.DataFrame(np.concatenate(preds))
submission = pd.read_csv('/kaggle/input/mayo-clinic-strip-ai/sample_submission.csv')
submission.CE = preds.iloc[ : , : 1]
submission.LAA = preds.iloc[ : , 1: 2]
submission = submission.groupby("patient_id").mean()
submission = submission[["CE", "LAA"]].round(6).reset_index()
submission.fillna(0.5)
submission

Unnamed: 0,patient_id,CE,LAA
0,006388,0.499393,0.500607
1,008e5c,0.549454,0.450546
2,00c058,0.447789,0.552211
3,01adc5,0.423078,0.576922


In [14]:
submission[["patient_id", "CE", "LAA"]].to_csv("submission.csv", index=False)
!head submission.csv

patient_id,CE,LAA
006388,0.499393,0.500607
008e5c,0.549454,0.450546
00c058,0.447789,0.552211
01adc5,0.423078,0.576922


In [None]:
loss, accuracy = model.evaluate(val_generator)
print(f"Validation Accuracy: {accuracy:.2f}")
