In [1]:
import numpy as np
import pandas as pd
from PIL import Image as Image2
import matplotlib.pyplot as plt
import time
import argparse
import yaml
import pickle
from datetime import datetime
import gc
import os
import wandb

import torch
from transformers import ViTFeatureExtractor
from SCL_reID.utils.pytorch_data import *
from SCL_reID.models.pytorch_models import *
from pytorch_metric_learning import losses, miners
from pytorch_metric_learning.distances import CosineSimilarity
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix
import torch.optim as optim
import torch.optim.lr_scheduler as lr_scheduler


  from .autonotebook import tqdm as notebook_tqdm
2024-09-25 10:21:53.741197: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
# FUNCTION TO PERFORM KNN EVALUATION
#
def knn_evaluation(train_images, train_labels, test_images, test_labels, n_neighbors, per_class=True, conf_matrix=True):
    # BUILD KNN MODEL AND PREDICT
    results = {}
    print(f'Training kNN classifier with k={n_neighbors}')
    my_knn = KNeighborsClassifier(n_neighbors=n_neighbors, metric='cosine')
    my_knn.fit(train_images, train_labels)
    knn_pred = my_knn.predict(test_images)
    knn_acc = np.round(np.sum([1 for pred, label in zip(knn_pred, test_labels) if pred == label])/test_labels.shape[0],4)
    print(f'{n_neighbors}NN test accuracy: {knn_acc}')
    # store results
    results['n_neighbors'] = n_neighbors
    results['knn'] = knn_acc
    label_list = np.unique(train_labels)
    results['label_list'] = label_list
    if per_class:
        knn_class = np.zeros(len(label_list))
        print(f'\nPer label {n_neighbors}NN test accuracy:')
        for k, label in enumerate(label_list):
            mask = test_labels == label
            knn_class[k] = np.round(np.sum(knn_pred[mask]==test_labels[mask])/np.sum(mask),4)
            print(f'{label}\t{knn_class[k]:.2f}')
        # store results
        results['knn_class'] = knn_class
    if conf_matrix:
        knn_conf = confusion_matrix(test_labels, knn_pred)
        results['knn_conf'] = knn_conf
        print('\nPrinting Confusion Matrix:')
        print(results['knn_conf'])
    return results
#########################################################################################

In [3]:
#load config file params:
config_file = "/home/lmeyers/contrastive_learning_new_training/64_ids_batch1_sample_num_64/64_ids_batch1_sample_num_64.yml"
verbose = True

try:
    with open(config_file) as f:
        config = yaml.safe_load(f)
    model_config = config['model_settings'] # settings for model building
    train_config = config['train_settings'] # settings for model training
    data_config = config['data_settings'] # settings for data loading
    eval_config = config['eval_settings'] # settings for evaluation
    torch_seed = config['torch_seed']
    verbose = config['verbose']
except Exception as e:
    print('ERROR - unable to open experiment config file. Terminating.')
    print('Exception msg:',e)
if verbose:
    # ADD PRINT OF DATE AND TIME
    now = datetime.now() # current date and time
    dt = now.strftime("%y-%m-%d %H:%M")
    print(f'Date and time when this experiment was started: {dt}')
    print(f'Date and time when this experiment was started: {dt}')
    print("Data Settings:")
    print(data_config)
    print("Train Settings:")
    print(train_config)
    print("Model Settings:")
    print(model_config)

Date and time when this experiment was started: 24-09-25 10:21
Date and time when this experiment was started: 24-09-25 10:21
Data Settings:
{'aug_p': 0.3, 'batch_size': 64, 'crop_height': None, 'crop_left': None, 'crop_top': None, 'crop_width': None, 'cropped': False, 'datafiles': {'gallery': '/home/lmeyers/ReID_complete/summer_2023_reid_galleries_closed.csv', 'query': '/home/lmeyers/summer_bee_data_reextract/new_open_max_ids_batch2/summer_bee_dataset_open_train_bee_64_ids_batch2_sample_num_max.csv', 'reference': '/home/lmeyers/summer_bee_data_reextract/new_open_max_ids_batch2/summer_bee_dataset_open_train_bee_64_ids_batch2_sample_num_02.csv', 'test': '/home/lmeyers/summer_bee_data_reextract/new_open_max_ids_batch2/summer_bee_dataset_open_train_bee_64_ids_batch2_sample_num_max.csv', 'train': '/home/lmeyers/summer_bee_data_reextract/new_open_max_ids_batch1/summer_bee_dataset_open_train_bee_64_ids_batch1_sample_num_64.csv', 'valid': ''}, 'dataset': 'summer_2023', 'fname_col': 'new_filep

In [4]:
import sys 
sys.path.insert(0,"/home/lmeyers/ReID_complete/SCL_reID")

In [5]:
# #tweak config file data to test different scenarios
# data_config['datafiles']['reference'] = '/home/gsantiago/summer_bee_data/closed_sets_max_ids_batch1/summer_bee_dataset_closed_train_bee_balanced_batch1_sample_num_2.csv'

In [10]:
from SCL_reID.models.pytorch_models import *
from SCL_reID.utils import *

model = ViTForReID()
model.load(model_path)#ViTForReID(load_saved=True,model_path=model_path)

#model = torch.load('/home/lmeyers/ReID_complete/checkpoints/300.pth')
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 
if verbose:
    print(f'Device: {device}')
model.to(device)

AttributeError: 'ViTForReID' object has no attribute 'load'

In [8]:
miner = miners.MultiSimilarityMiner()
loss_fn = losses.TripletMarginLoss(train_config['margin'], distance = CosineSimilarity())

In [9]:

##########################################################################################
# FUNCTION TO GET EMBEDDINGS AND LABELS FOR EVALUATING MODEL
def get_embeddings(model, dataloader, loss_fn, miner, device, feature_extractor=None):
    embeddings = []
    all_labels = []
    loss = 0.0
    with torch.no_grad():
        for k, batch in enumerate(dataloader):
            if feature_extractor is None:
                images = batch['image'].to(device)
            else:
                images = [transforms.functional.to_pil_image(x) for x in batch['image']]
                images = np.concatenate([feature_extractor(x)['pixel_values'] for x in images])
                images = torch.tensor(images, dtype=torch.float).to(device)
            labels = batch['label'].to(device)
            outputs = model(images)
            hard_pairs = miner(outputs, labels)
            loss += loss_fn(outputs, labels, hard_pairs).detach().cpu().numpy()
            embeddings.append(outputs.detach().cpu().numpy())
            all_labels += list(labels.detach().cpu().numpy())
    embeddings = np.vstack(embeddings)
    all_labels = np.array(all_labels)
    loss/=k
    return embeddings, all_labels, loss
##########################################################################################

In [10]:
# load VIT feature extractor if needed

# print('Getting ViT feature extractor...')
# model_name = 'google/vit-base-patch16-224-in21k'
feature_extractor = ViTFeatureExtractor.from_pretrained(model_name)


Getting ViT feature extractor...




In [24]:
# Set model to eval 
if verbose:
    print('Evaluating model...')
model.eval()

Evaluating model...


ViTForReID(
  (vit): ViTModel(
    (embeddings): ViTEmbeddings(
      (patch_embeddings): ViTPatchEmbeddings(
        (projection): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ViTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ViTLayer(
          (attention): ViTAttention(
            (attention): ViTSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ViTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ViTIntermediate(
            (dense): Linear(in_features=768, out_feature

In [26]:
#get dataloaders and embeddings 
print("generating dataloaders from dataset")

test_dataloader, reference_dataloader = get_dataset(data_config, 'test',generate_valid=True) #generate valid automatically

reference_embeddings, reference_labels, reference_loss = get_embeddings(model, reference_dataloader, loss_fn, miner, device, feature_extractor)
print('Got_reference_embeddings')
print(reference_embeddings.shape)
test_embeddings, test_labels, test_loss = get_embeddings(model, test_dataloader, loss_fn, miner, device, feature_extractor)
print('Got test embeddings')
print(test_embeddings.shape)


print(f'Reference (or Train) Loss: {reference_loss:.4f}')
print(f'Test (or Query) Loss: {test_loss:.4f}')


generating dataloaders from dataset
Using 2173 samples for reference set
8693 total test samples
Got_reference_embeddings
(2173, 128)
Got test embeddings
(8693, 128)
Reference (or Train) Loss: 0.2061
Test (or Query) Loss: 0.2015


In [14]:
reference_data_batch = os.path.dirname(data_config['datafiles']['reference'])#[-1:]
print(reference_data_batch)
query_data_batch = os.path.dirname(data_config['datafiles']['query'])#[-1:]
print(query_data_batch)


/home/lmeyers/summer_bee_data_reextract/new_open_max_ids_batch2
/home/lmeyers/summer_bee_data_reextract/new_open_max_ids_batch2


In [34]:
b1_to_b2 = {10: 74, 11: 75, 15: 79, 14: 78, 12: 76, 16: 80, 13: 77, 9: 73, 18: 82, 19: 83, 23: 87, 22: 86, 20: 84, 24: 88, 21: 85, 17: 81, 50: 114, 51: 115, 55: 119, 54: 118, 52: 116, 56: 120, 53: 117, 49: 113, 42: 106, 43: 107, 47: 111, 46: 110, 44: 108, 48: 112, 45: 109, 41: 105, 26: 90, 27: 91, 31: 95, 30: 94, 28: 92, 32: 96, 29: 93, 25: 89, 58: 122, 59: 123, 63: 127, 62: 126, 60: 124, 64: 128, 61: 125, 57: 121, 34: 98, 35: 99, 39: 103, 38: 102, 36: 100, 40: 104, 37: 101, 33: 97, 2: 66, 3: 67, 7: 71, 6: 70, 68: 68, 8: 72, 5: 69, 1: 65}
b2_to_b1 = {74: 10, 75: 11, 79: 15, 78: 14, 76: 12, 80: 16, 77: 13, 73: 9, 82: 18, 83: 19, 87: 23, 86: 22, 84: 20, 88: 24, 85: 21, 81: 17, 114: 50, 115: 51, 119: 55, 118: 54, 116: 52, 120: 56, 117: 53, 113: 49, 106: 42, 107: 43, 111: 47, 110: 46, 108: 44, 112: 48, 109: 45, 105: 41, 90: 26, 91: 27, 95: 31, 94: 30, 92: 28, 96: 32, 93: 29, 89: 25, 122: 58, 123: 59, 127: 63, 126: 62, 124: 60, 128: 64, 125: 61, 121: 57, 98: 34, 99: 35, 103: 39, 102: 38, 100: 36, 104: 40, 101: 37, 97: 33, 66: 2, 67: 3, 71: 7, 70: 6, 68: 68, 72: 8, 69: 5, 65: 1}

reference_data_batch = os.path.dirname(data_config['datafiles']['reference'])[-1:]
query_data_batch = os.path.dirname(data_config['datafiles']['query'])[-1:]


if reference_data_batch != query_data_batch and data_config['label_col'] != 'color_num':
    if reference_data_batch > query_data_batch:
        for i in range(len(test_labels)):
            test_labels[i] = b1_to_b2[test_labels[i]]
    else: 
        for i in range(len(test_labels)):
            test_labels[i] = b2_to_b1[test_labels[i]]


In [27]:

results = knn_evaluation(reference_embeddings, reference_labels, test_embeddings, test_labels, 
                        eval_config['n_neighbors'], eval_config['per_class'], eval_config['conf_matrix'])


Training kNN classifier with k=3
3NN test accuracy: 0.033

Per label 3NN test accuracy:
65	0.11
66	0.00
67	0.01
68	0.00
69	0.01
70	0.00
71	0.00
72	0.12
73	0.16
74	0.00
75	0.05
76	0.11
77	0.01
78	0.24
79	0.00
80	0.03
81	0.17
82	0.08
83	0.03
84	0.00
85	0.00
86	0.00
87	0.03
88	0.00
89	0.00
90	0.07
91	0.00
92	0.03
93	0.08
94	0.00
95	0.00
96	0.00
97	0.00
98	0.02
99	0.00
100	0.00
101	0.00
102	0.00
103	0.00
104	0.00
105	0.00
106	0.00
107	0.00
108	0.00
109	0.00
110	0.00
111	0.00
112	0.00
113	0.00
114	0.00
115	0.00
116	0.00
117	0.00
118	0.00
119	0.00
120	0.00
121	0.07
122	0.00
123	0.00
124	0.00
125	0.00
126	0.00
127	0.00
128	0.00

Printing Confusion Matrix:
[[19  0  0 ...  0  0  0]
 [14  0  0 ...  0  0  0]
 [34  0  1 ...  0  0  0]
 ...
 [ 7  0  0 ...  0  0  0]
 [ 5  0  0 ...  0  0  0]
 [ 8  0  0 ...  0  0  0]]
