In [None]:
import os
import numpy as np

import skimage
import sklearn

from matplotlib import pyplot as plt

import metric_dissimilarity

In [None]:
# Read the images
X = []
Y = []

for cls in os.listdir("data/processed"):
  for img_filename in os.listdir(f"data/processed/{cls}"):
    img = skimage.io.imread(f"data/processed/{cls}/{img_filename}")
    X.append(img)
    Y.append(cls)

# Convert to numpy
X = np.array(X, dtype = np.uint8)
Y = sklearn.preprocessing.LabelEncoder().fit_transform(Y)

# Subset the problem to only 10 classes
X = X[Y < 10]
Y = Y[Y < 10]

# Split the data
X_train, X_test, Y_train, Y_test = sklearn.model_selection.train_test_split(X, Y, random_state = 42, stratify = Y)

In [None]:
# Visualize some images and their corresponding labels.
plt.figure(figsize = (15, 8))
for i in range(30):
  ax = plt.subplot(3, 10, i + 1)
  plt.imshow(X_train[i] / 255.)
  plt.title(Y_train[i])
  plt.axis("off")

In [None]:
model = metric_dissimilarity.train(X_train, Y_train, model_type="contrastive", model_file="cache/model.pth", backbone="efficientnet_v2_s",
                                   embeddingsize=128, patch_size=(200,200), pretrained=True,
                                   batch=32, iterations=10000, lr=0.001,
                                   batch_warmup=64,
                                   clf_warmup=False, clf_warmup_epochs=10, clf_epochs=50, clf_warmup_lr=0.01, clf_lr=0.001,
                                   warmup_iterations=5000, lr_warmup=0.01,
                                   temperature_warmup=0.5, temperature=0.5)

In [None]:
train_embeddings = metric_dissimilarity.generate_embedding(model, X_train, patch_size=(200, 200), cache="cache/train_embedding.pkl")
test_embeddings = metric_dissimilarity.generate_embedding(model, X_test, patch_size=(200, 200), cache="cache/test_embedding.pkl")

In [None]:
metric_dissimilarity.umap_projection(train_embeddings, Y_train)
metric_dissimilarity.umap_projection(test_embeddings, Y_test)

In [None]:
X_prot, Y_prot = metric_dissimilarity.compute_prototypes(train_embeddings, Y_train, n_prototypes=5, method="kmeans++", cache="cache/prototypes.pkl")

In [None]:
metric_dissimilarity.umap_projection(X_prot, Y_prot)

In [None]:
contr_space_train = metric_dissimilarity.space_representation(model, train_embeddings, X_prot, cache="cache/contr-space-train.pkl")
contr_space_test = metric_dissimilarity.space_representation(model, test_embeddings, X_prot, cache="cache/contr-space-test.pkl")

In [None]:
contr_vector_X_train, contr_vector_Y_train = metric_dissimilarity.vector_representation(model, 
                                                                                       X_train, Y_train, X_prot, Y_prot, patch_size=(200, 200), variations=20,
                                                                                       cache="cache/contr-vector-train.pkl")

contr_vector_X_test, _ = metric_dissimilarity.vector_representation(model, 
                                                                   X_test, Y_test, X_prot, Y_prot, patch_size=(200, 200), variations=20,
                                                                   cache="cache/contr-vector-test.pkl")

In [None]:
# Embedding classification
np.random.seed(1234)

# Train a regular classifier
clf = sklearn.linear_model.LogisticRegression()
clf.fit(train_embeddings, Y_train)

# Evaluate the classifier
Y_pred = clf.predict(test_embeddings)
acc = sklearn.metrics.accuracy_score(Y_test, Y_pred)

print(f"Accuracy: {acc * 100:.1f}%")

In [None]:
# Contrastive dissimilarity space classification
np.random.seed(1234)

# Train a regular classifier
clf = sklearn.linear_model.LogisticRegression()
clf.fit(contr_space_train, Y_train)

# Evaluate the classifier
Y_pred = clf.predict(contr_space_test)
acc = sklearn.metrics.accuracy_score(Y_test, Y_pred)

print(f"Accuracy: {acc * 100:.1f}%")

In [None]:
# Contrastive dissimilarity vector classification
np.random.seed(1234)

# Train a regular classifier
clf = sklearn.linear_model.LogisticRegression()
clf.fit(contr_vector_X_train, contr_vector_Y_train)

X_pred_proba = clf.predict_proba(contr_vector_X_test)

# Transform the binary classification back into multiclass
X_pred = metric_dissimilarity.vector_to_class(X_pred_proba, Y_test, Y_prot)

# Evaluate the classifier
acc = sklearn.metrics.accuracy_score(Y_test, Y_pred)

print(f"Accuracy: {acc * 100:.1f}%")