In [None]:
import os
import numpy as np

import skimage
import sklearn

import metric_dissimilarity

In [None]:
# Read the images
X = []
Y = []

for cls in os.listdir("data/processed"):
  for img_filename in os.listdir(f"data/processed/{cls}"):
    img = skimage.io.imread(f"data/processed/{cls}/{img_filename}")
    X.append(img)
    Y.append(cls)

# Convert to numpy
X = np.array(X, dtype = np.uint8)
Y = sklearn.preprocessing.LabelEncoder().fit_transform(Y)

# Subset the problem to only 10 classes
X = X[Y < 10]
Y = Y[Y < 10]

# Transform X into LBP features
X_lbp = []
for i in range(len(X)):
  # Convert to grayscale
  img = skimage.color.rgb2gray(X[i])

  # Convert to uint8
  img = skimage.img_as_ubyte(img)

  # Compute the LBP representation of the image
  lbp = skimage.feature.local_binary_pattern(img, 3 * 8, 3, "uniform")

  # Compute histogram of LBP
  # Number of bins: for 'uniform', the number of patterns is n_points + 2
  n_bins = int(lbp.max() + 1)
  hist, _ = np.histogram(lbp, bins=n_bins, range=(0, n_bins), density=True)

  X_lbp.append(hist)

X_lbp = np.array(X_lbp)

# Split the data
X_train, X_test, Y_train, Y_test = sklearn.model_selection.train_test_split(X_lbp, Y, random_state = 42, stratify = Y)

In [None]:
# Display some summary statistics
print(f"Training set: {len(X_train)} samples")
print(f"Test set: {len(X_test)} samples")
print(f"Number of features: {X_train.shape}")

In [None]:
model = metric_dissimilarity.train(X_train, Y_train, model_type="contrastive", model_file="cache/model.pth", backbone=None,
                                   embeddingsize=None,
                                   batch=32, iterations=10000, lr=0.001,
                                   batch_warmup=64,
                                   clf_warmup=False, clf_warmup_epochs=10, clf_epochs=50, clf_warmup_lr=0.01, clf_lr=0.001,
                                   warmup_iterations=5000, lr_warmup=0.01,
                                   temperature_warmup=0.5, temperature=0.5)

In [None]:
X_prot, Y_prot = metric_dissimilarity.compute_prototypes(X_train, Y_train, n_prototypes=5, method="kmeans++", cache="cache_features/prototypes.pkl")

In [None]:
contr_dissimilarity.umap_projection(X_prot, Y_prot)

In [None]:
contr_space_train = metric_dissimilarity.space_representation(model, X_train, X_prot, cache="cache_features/contr-space-train.pkl")
contr_space_test = metric_dissimilarity.space_representation(model, X_test, X_prot, cache="cache_features/contr-space-test.pkl")

In [None]:
contr_vector_X_train, contr_vector_Y_train = metric_dissimilarity.vector_representation(model, 
                                                                                       X_train, Y_train, X_prot, Y_prot, variations=20,
                                                                                       cache="cache/contr-vector-train.pkl")

contr_vector_X_test, _ = metric_dissimilarity.vector_representation(model, 
                                                                   X_test, Y_test, X_prot, Y_prot, variations=20,
                                                                   cache="cache/contr-vector-test.pkl")

In [None]:
# Original feature classification
np.random.seed(1234)

# Train a regular classifier
clf = sklearn.linear_model.LogisticRegression()
clf.fit(X_train, Y_train)

# Evaluate the classifier
Y_pred = clf.predict(X_test)
acc = sklearn.metrics.accuracy_score(Y_test, Y_pred)

print(f"Accuracy: {acc * 100:.1f}%")

In [None]:
# Contrastive dissimilarity space classification
np.random.seed(1234)

# Train a regular classifier
clf = sklearn.linear_model.LogisticRegression()
clf.fit(contr_space_train, Y_train)

# Evaluate the classifier
Y_pred = clf.predict(contr_space_test)
acc = sklearn.metrics.accuracy_score(Y_test, Y_pred)

print(f"Accuracy: {acc * 100:.1f}%")

In [None]:
# Contrastive dissimilarity vector classification
np.random.seed(1234)

# Train a regular classifier
clf = sklearn.linear_model.LogisticRegression()
clf.fit(contr_vector_X_train, contr_vector_Y_train)

X_pred_proba = clf.predict_proba(contr_vector_X_test)

# Transform the binary classification back into multiclass
X_pred = contr_dissimilarity.vector_to_class(X_pred_proba, Y_test, Y_prot)

# Evaluate the classifier
acc = sklearn.metrics.accuracy_score(Y_test, Y_pred)

print(f"Accuracy: {acc * 100:.1f}%")