In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import os
from os import path
path = "/content/drive/MyDrive/Colab Notebooks/DISSERTATION/DINO/KNN_Eval/test_100%"
print(os.path.exists(path))

True


In [None]:
import torch

train_features = torch.load(path + "/trainfeat.pth")
test_features = torch.load(path + "/testfeat.pth")
train_labels = torch.load(path + "/trainlabels.pth")
test_labels = torch.load(path + "/testlabels.pth")

print(train_features.shape, test_features.shape)
print(train_labels.shape, test_labels.shape)

torch.Size([89996, 384]) torch.Size([7180, 384])
torch.Size([89996]) torch.Size([7180])


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix, classification_report

# 1-NN predictions
similarity = torch.mm(test_features, train_features.t())
_, indices = similarity.topk(1, largest=True, sorted=True)
preds = train_labels[indices.view(-1)]

# confusion matrix
cm = confusion_matrix(test_labels.numpy(), preds.numpy())
plt.figure(figsize=(8,6))
sns.heatmap(cm, annot=True, fmt="d", cmap="Oranges", cbar=True)
plt.xlabel("Predicted")
plt.ylabel("True")
plt.title("DINO - Confusion Matrix (100% of Labels)")
plt.show()

# metrics
print("DINO - Classification Report (100% of Labels):\n")
print(classification_report(
    test_labels.numpy(),
    preds.numpy(),
    digits=6
))

t-SNE Graph

In [None]:
from sklearn.manifold import TSNE

# PathMNIST has 9 tissue classes
class_names = [
    "Label 0: Adipose",                             # 0
    "Label 1: Background",                          # 1
    "Label 2: Debris",                              # 2
    "Label 3: Lymphocytes",                         # 3
    "Label 4: Mucus",                               # 4
    "Label 5: Smooth Muscle",                       # 5
    "Label 6: Normal Colon Mucosa",                 # 6
    "Label 7: Cancer-Associated Stroma",            # 7
    "Label 8: Colorectal Adenocarcinoma Epithelium" # 8
]

# use test features for visualization
X = test_features.numpy()
y = test_labels.numpy()

# dimensionality reduction
X_embedded = TSNE(n_components=2, perplexity=30, metric='cosine', random_state=42).fit_transform(X)

# plot
plt.figure(figsize=(12,8))
scatter = plt.scatter(X_embedded[:,0], X_embedded[:,1], c=y, cmap="tab10", s=5)

# legend with class names
handles, labels = scatter.legend_elements()

# clean up matplotlib's latex-style labels like "$\\mathdefault{0}$"
labels = [class_names[int(lbl.strip('$\\mathdefault{}'))] for lbl in labels]

plt.legend(handles, labels, title="Classes", bbox_to_anchor=(1.01, 1), loc="upper left")

plt.title("t-SNE Visualization of DINO Feature Embeddings by Class")
plt.tight_layout()
plt.show()