In [6]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline 
import seaborn as sns
from tqdm import tqdm

import cv2
import os
import tensorflow as tf
from tensorflow import keras
import sklearn as sk

import warnings
warnings.filterwarnings('ignore')


In [7]:
# Import data_labels_mainData.csv into a DataFrame
main_data = pd.read_csv('data_labels_mainData.csv')

# Import data_labels_extraData.csv into a DataFrame
extra_data = pd.read_csv('data_labels_extraData.csv')

In [8]:
from sklearn.model_selection import train_test_split

#splitting data into train and validation 
train_task1, val_task1 = train_test_split(main_data[['ImageName', 'isCancerous']], 
                                              test_size=0.3, random_state=9)


train_task2, val_task2 = train_test_split(main_data[['ImageName','cellType']], 
                                              test_size=0.3, random_state=9)                                        

print('Training data shape task 1:', train_task1.shape)
print('Validation data shape task 1:', val_task1.shape)

print('Training data shape task 2:', train_task2.shape)
print('Validation data shape task 2:', val_task2.shape)

Training data shape task 1: (6927, 2)
Validation data shape task 1: (2969, 2)
Training data shape task 2: (6927, 2)
Validation data shape task 2: (2969, 2)


In [9]:
#Create a a function to add image according to the name given from the list
from PIL import Image
def GetImage(directory):
    images=[]
    for name in tqdm(directory, desc="Adding images"):
        image = cv2.imread("patch_images/"+name)
        image = Image.fromarray(image,'RGB')
        images.append(np.array(image))
    result = np.array(images)
    print("\ngetImage COMPLETED!")
    return result



In [10]:
#Create a function to generate sample to fix the Imblance of the dataset
from imblearn.over_sampling import RandomOverSampler
def GenerateSample(X,Y):
    ros = RandomOverSampler(random_state = 1)
    x, y = ros.fit_resample(X.values.reshape(-1,1), Y)
    x = x.flatten()
    return x,y

In [11]:
x1_train = train_task1['ImageName']
y1_train = train_task1['isCancerous']
print("Original Dataset:\n",y1_train.value_counts())

#Generate sample
x1_train, y1_train = GenerateSample(x1_train,y1_train)
print("Sampled Dataset:\n",y1_train.value_counts())
x1_train = GetImage(x1_train)

x1_test = val_task1['ImageName']
x1_test = GetImage(x1_test)

y1_test = val_task1['isCancerous']
train_ds = x1_train
val_ds = y1_train

Original Dataset:
 0    4030
1    2897
Name: isCancerous, dtype: int64
Sampled Dataset:
 0    4030
1    4030
Name: isCancerous, dtype: int64


Adding images: 100%|██████████| 8060/8060 [00:19<00:00, 414.03it/s] 



getImage COMPLETED!


Adding images: 100%|██████████| 2969/2969 [00:08<00:00, 369.70it/s]


getImage COMPLETED!





In [12]:
x2_train = train_task2['ImageName']
y2_train = train_task2['cellType']
print("Original Dataset:\n",y2_train.value_counts())

#Generate sample
x2_train, y2_train = GenerateSample(x2_train,y2_train)
print("Sampled Dataset:\n",y2_train.value_counts())
x2_train = GetImage(x2_train)

x2_test = val_task2['ImageName']
x2_test = GetImage(x2_test)

y2_test = val_task2['cellType']

x2_test,x2_val,y2_test,y2_val = train_test_split(x2_test, y2_test,
                                              test_size=0.5, random_state=9)


Original Dataset:
 2    2897
1    1778
0    1302
3     950
Name: cellType, dtype: int64
Sampled Dataset:
 1    2897
2    2897
3    2897
0    2897
Name: cellType, dtype: int64


Adding images: 100%|██████████| 11588/11588 [00:01<00:00, 6063.33it/s]



getImage COMPLETED!


Adding images: 100%|██████████| 2969/2969 [00:00<00:00, 6090.97it/s]


getImage COMPLETED!





In [13]:
print("TASK 1 SHAPE:")
print("TRAIN SHAPE:")
print("x1 shape:", x1_train.shape)
print("y1 shape:", y1_train.shape)
print("TEST SHAPE:")
print("x1 shape:", x1_test.shape)
print("y1 shape:", y1_test.shape)

print("TASK 2 SHAPE:")
print("x1 shape:", x2_train.shape)
print("y1 shape:", y2_train.shape)
print("VALIDATION SHAPE:")
print("x1 shape:", x2_val.shape)
print("y1 shape:", y2_val.shape)
print("TEST SHAPE:")
print("x1 shape:", x2_test.shape)
print("y1 shape:", y2_test.shape)

TASK 1 SHAPE:
TRAIN SHAPE:
x1 shape: (8060, 27, 27, 3)
y1 shape: (8060,)
TEST SHAPE:
x1 shape: (2969, 27, 27, 3)
y1 shape: (2969,)
TASK 2 SHAPE:
x1 shape: (11588, 27, 27, 3)
y1 shape: (11588,)
VALIDATION SHAPE:
x1 shape: (1485, 27, 27, 3)
y1 shape: (1485,)
TEST SHAPE:
x1 shape: (1484, 27, 27, 3)
y1 shape: (1484,)


In [14]:
import random
num_known_classes = 3
class_list = random.sample(population=range(3), k=num_known_classes)

classes_per_batch = 3
# Passing multiple examples per class per batch ensures that each example has
# multiple positive pairs. This can be useful when performing triplet mining or
# when using losses like `MultiSimilarityLoss` or `CircleLoss` as these can
# take a weighted mix of all the positive pairs. In general, more examples per
# class will lead to more information for the positive pairs, while more classes
# per batch will provide more varied information in the negative pairs. However,
# the losses compute the pairwise distance between the examples in a batch so
# the upper limit of the batch size is restricted by the memory.
examples_per_class_per_batch = 1

print(
    "Batch size is: "
    f"{min(classes_per_batch, num_known_classes) * examples_per_class_per_batch}"
)

print(" Create Training Data ".center(34, "#"))
train_ds = tfsim.samplers.SingleShotMemorySampler(
    x1_train,
    y1_train,
    examples_per_batch = 2

)

print("\n" + " Create Validation Data ".center(34, "#"))
val_ds = tfsim.samplers.TFDatasetMultiShotMemorySampler(
    x1_test,
    classes_per_batch=classes_per_batch,
    splits="test",
    total_examples_per_class=100,
)

Batch size is: 3
###### Create Training Data ######


NameError: name 'tfsim' is not defined

In [27]:
import tensorflow_similarity as tfsim
from tensorflow import keras
from tensorflow.keras import layers

tfsim.utils.tf_cap_memory()

In [28]:
embedding_size = 256

inputs = keras.layers.Input((32, 32, 3))
x = keras.layers.Rescaling(scale=1.0 / 255)(inputs)
x = keras.layers.Conv2D(64, 3, activation="relu")(x)
x = keras.layers.BatchNormalization()(x)
x = keras.layers.Conv2D(128, 3, activation="relu")(x)
x = keras.layers.BatchNormalization()(x)
x = keras.layers.MaxPool2D((4, 4))(x)
x = keras.layers.Conv2D(256, 3, activation="relu")(x)
x = keras.layers.BatchNormalization()(x)
x = keras.layers.Conv2D(256, 3, activation="relu")(x)
x = keras.layers.GlobalMaxPool2D()(x)
outputs = tfsim.layers.MetricEmbedding(embedding_size)(x)

# building model
model = tfsim.models.SimilarityModel(inputs, outputs)
model.summary()

Model: "similarity_model_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_3 (InputLayer)        [(None, 32, 32, 3)]       0         
                                                                 
 rescaling_2 (Rescaling)     (None, 32, 32, 3)         0         
                                                                 
 conv2d_8 (Conv2D)           (None, 30, 30, 64)        1792      
                                                                 
 batch_normalization_6 (Batc  (None, 30, 30, 64)       256       
 hNormalization)                                                 
                                                                 
 conv2d_9 (Conv2D)           (None, 28, 28, 128)       73856     
                                                                 
 batch_normalization_7 (Batc  (None, 28, 28, 128)      512       
 hNormalization)                                

In [29]:
epochs = 3
learning_rate = 0.002
val_steps = 50

# init similarity loss
loss = tfsim.losses.MultiSimilarityLoss()

# compiling and training
model.compile(
    optimizer=keras.optimizers.Adam(learning_rate), loss=loss, steps_per_execution=10,
)
history = model.fit(
    train_ds, epochs=epochs, validation_data=val_ds, validation_steps=val_steps
)

Distance metric automatically set to cosine use the distance arg to override.


ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().