In [1]:
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity, euclidean_distances
from sklearn.metrics import classification_report

# CLS

In [2]:
def compute_distance_sums(selected_test_image_embedding, memory_embeddings, memory_labels, predicted_softmax_label= None, static_distance = 1, distance_metric='cosine', num_classes=10):

    distance_sums = np.zeros(num_classes)

    # Initialize the one-hot vectors based on memory_labels
    memory_labels_onehot = np.zeros((len(memory_labels), num_classes))

    # Create the one-hot encoding for each memory label
    for i, label in enumerate(memory_labels):
        if label < num_classes:
            memory_labels_onehot[i, label] = 1
        else:
            raise ValueError(f"Label {label} is out of range. Expected between 0 and {num_classes - 1}.")

    # Loop over each memory embedding and compute distances
    for i, memory_embedding in enumerate(memory_embeddings):
        # One-hot encode the memory label
        memory_label_onehot = memory_labels_onehot[i]
        
        # When two embeddings are identical the cosine = 1 and the Euclidean is 0
        # when two embeddings are very different then cosine = -1 and euclidean = inf
        # Compute the 1/(1+d) for Euclidean in order to be 1 for identical and 0 for very different,
        # Compute the distance (Cosine or Euclidean based on the parameter)
        if distance_metric == 'cosine':
            dist = cosine_similarity([selected_test_image_embedding], [memory_embedding])[0][0]
        elif distance_metric == 'euclidean':
            dist = 1/(1+euclidean_distances([selected_test_image_embedding], [memory_embedding])[0][0]) # 1/(1+d)
        else:
            raise ValueError("Unsupported distance metric. Choose 'cosine' or 'euclidean'.")
        
        # print(f'######Iteration {i}#########')
        # print(memory_labels[i])
        # print(memory_label_onehot)
        # print(dist)
        # print(f'Before: {distance_sums}')
        # Add the distance to the initialized array
        distance_sums += memory_label_onehot * dist
        # print(f'After: {distance_sums}')
        # print('###############')

    
    # Final calculation based on the distance metric
    if predicted_softmax_label is not None:
        print('########## Final #########')
        
        # Add the predicted value weighted by static distance
        distance_sums += predicted_softmax_label * static_distance
        
        print(predicted_softmax_label * static_distance)
        print(f'Final: {distance_sums}')

    return distance_sums

In [59]:
def get_predicted_class(distance_sums):
    index = np.argmax(distance_sums)
    return index

In [8]:
def load_data_cls(file_path: str):
    # Load the .npz file
    data = np.load(file_path)
    
    # Extract the arrays
    test_cls = data['test_cls']
    test_labels = data['test_labels']
    neighbor_cls = data['neighbor_cls']
    neighbor_labels = data['neighbor_labels']
    print("Data loaded successfully.")
    
    return test_cls, test_labels, neighbor_cls, neighbor_labels

In [12]:
k_list = [5,7,9,11,13,15,17,19,21]
for k in k_list:
    print(f'Processing k={k}')
    file_path = f"output_data/cls/memory_for_k_{k}.npz"  # Specify the file path
    test_embeddings, test_labels, all_memory_embeddings, all_memory_labels = load_data_cls(file_path)
    num_classes = len(np.unique(test_labels, return_counts=False))

    print(f"Test CLS: {test_embeddings.shape}, Type: {type(test_embeddings)}")
    print(f"Test Labels: {test_labels.shape}, Type: {type(test_labels)}")
    print(f"Neighbor CLS: {all_memory_embeddings.shape}, Type: {type(all_memory_embeddings)}")
    print(f"Neighbor Labels: {all_memory_labels.shape}, Type: {type(all_memory_labels)}")
    print(f"Distince Number of Classes: {num_classes}")

    # For CLS
    y_pred = []

    distance_metric='cosine'
    #distance_metric='euclidean'

    for selected_test_image_embedding, memory_embeddings, memory_labels in zip(test_embeddings, all_memory_embeddings, all_memory_labels):
        #print(memory_labels)
        distance_sums = compute_distance_sums(selected_test_image_embedding, memory_embeddings, memory_labels, predicted_softmax_label= None, static_distance = 1, distance_metric=distance_metric, num_classes=num_classes)
        predicted_class = get_predicted_class(distance_sums)
        y_pred.append(predicted_class)
        #break

    # Compute accuracy
    print(classification_report(test_labels, y_pred))

Processing k=5
Data loaded successfully.
Test CLS: (10000, 192), Type: <class 'numpy.ndarray'>
Test Labels: (10000,), Type: <class 'numpy.ndarray'>
Neighbor CLS: (10000, 5, 192), Type: <class 'numpy.ndarray'>
Neighbor Labels: (10000, 5), Type: <class 'numpy.ndarray'>
Distince Number of Classes: 10
              precision    recall  f1-score   support

           0       0.78      0.75      0.76      1000
           1       0.73      0.73      0.73      1000
           2       0.84      0.55      0.66      1000
           3       0.57      0.52      0.54      1000
           4       0.71      0.67      0.69      1000
           5       0.62      0.62      0.62      1000
           6       0.66      0.86      0.75      1000
           7       0.73      0.76      0.75      1000
           8       0.81      0.79      0.80      1000
           9       0.66      0.80      0.73      1000

    accuracy                           0.71     10000
   macro avg       0.71      0.71      0.70     100

# Patch

In [54]:
def process_lists(lists, mode="sum"):
    if not isinstance(lists, list):
        raise ValueError("Input must be a list of lists.")
    
    lists = [lst.tolist() if isinstance(lst, np.ndarray) else lst for lst in lists]
    
    if not all(isinstance(lst, list) for lst in lists):
        raise ValueError("Each element in the input must be a list.")
    
    if mode not in {"sum", "average"}:
        raise ValueError("Mode must be 'sum' or 'average'.")

    # Convert to NumPy array for vectorized operations
    array = np.array(lists)
    
    if mode == "sum":
        processed = np.sum(array, axis=0)
    else:  # mode == "average"
        processed = np.mean(array, axis=0)
    
    return processed.tolist()  # Return as a list

In [12]:
def load_data_patch(file_path: str):
    # Load the .npz file
    data = np.load(file_path)
    
    # Extract the arrays
    test_patch = data['test_patch']
    test_labels = data['test_labels']
    neighbor_patch = data['neighbor_path']
    neighbor_labels = data['neighbor_labels']
    print("Data loaded successfully.")
    
    return test_patch, test_labels, neighbor_patch, neighbor_labels

In [55]:
patch_file_path = f"output_data/patch/patch_memory.npz"
test_embeddings, test_labels, all_memory_embeddings, all_memory_labels = load_data_patch(patch_file_path)
num_classes = len(np.unique(test_labels, return_counts=False))

print(f"Test Patch: {test_embeddings.shape}, Type: {type(test_embeddings)}")
print(f"Test Labels: {test_labels.shape}, Type: {type(test_labels)}")
print(f"Neighbor Patch: {all_memory_embeddings.shape}, Type: {type(all_memory_embeddings)}")
print(f"Neighbor Labels: {all_memory_labels.shape}, Type: {type(all_memory_labels)}")
print(f"Distince Number of Classes: {num_classes}")

Test Patch: (10000, 196, 192), Type: <class 'numpy.ndarray'>
Test Labels: (10000,), Type: <class 'numpy.ndarray'>
Neighbor Patch: (10000, 196, 5, 192), Type: <class 'numpy.ndarray'>
Neighbor Labels: (10000, 196, 5), Type: <class 'numpy.ndarray'>
Distince Number of Classes: 10


In [92]:
# For Patch
y_pred = []

calculation_mode = 'sum' # 'average'
distance_metric = 'cosine' # 'euclidean'

for selected_test_image_embedding, specific_memory_embeddings, specific_memory_labels in zip(test_embeddings, all_memory_embeddings, all_memory_labels):
    distance_sums_list = []

    for patch_embedding, memory_embeddings, memory_labels in zip(selected_test_image_embedding, specific_memory_embeddings, specific_memory_labels):
        #print(memory_embeddings.shape)
        distance_sums_list.append(compute_distance_sums(patch_embedding, memory_embeddings, memory_labels, predicted_softmax_label= None, static_distance = 1, distance_metric=distance_metric))
    distance_sums = process_lists(distance_sums_list, mode=calculation_mode)
    predicted_class = get_predicted_class(distance_sums)
    y_pred.append(predicted_class)
    

# Compute accuracy
print(classification_report(test_labels, y_pred))

              precision    recall  f1-score   support

           0       0.72      0.65      0.69      1000
           1       0.79      0.69      0.74      1000
           2       0.83      0.45      0.58      1000
           3       0.65      0.37      0.48      1000
           4       0.71      0.59      0.64      1000
           5       0.58      0.70      0.64      1000
           6       0.61      0.83      0.70      1000
           7       0.68      0.79      0.73      1000
           8       0.80      0.80      0.80      1000
           9       0.59      0.91      0.72      1000

    accuracy                           0.68     10000
   macro avg       0.70      0.68      0.67     10000
weighted avg       0.70      0.68      0.67     10000

