In [1]:
from datetime import datetime
import numpy as np
import pandas as pd
import torch
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix

from pytorch_models import build_model
from pytorch_data import *
import yaml

  from .autonotebook import tqdm as notebook_tqdm
2023-10-13 03:31:26.910323: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
#load config file params:
config_file = '/home/lmeyers/ReID_complete/summer_data_closed_reid_color_code.yml'
verbose = True

try:
    with open(config_file) as f:
        config = yaml.safe_load(f)
    model_config = config['model_settings'] # settings for model building
    train_config = config['train_settings'] # settings for model training
    data_config = config['data_settings'] # settings for data loading
    eval_config = config['eval_settings'] # settings for evaluation
    torch_seed = config['torch_seed']
    verbose = config['verbose']
except Exception as e:
    print('ERROR - unable to open experiment config file. Terminating.')
    print('Exception msg:',e)
if verbose:
    # ADD PRINT OF DATE AND TIME
    now = datetime.now() # current date and time
    dt = now.strftime("%y-%m-%d %H:%M")
    print(f'Date and time when this experiment was started: {dt}')
    print(f'Date and time when this experiment was started: {dt}')
    print("Data Settings:")
    print(data_config)
    print("Train Settings:")
    print(train_config)
    print("Model Settings:")
    print(model_config)

Date and time when this experiment was started: 23-10-13 03:31
Date and time when this experiment was started: 23-10-13 03:31
Data Settings:
{'dataset': 'summer_2023_color_detect', 'split_type': 'closed', 'fname_col': 'filename', 'label_col': 'color_num', 'cropped': False, 'crop_top': None, 'crop_left': None, 'crop_height': None, 'crop_width': None, 'input_size': [250, 250], 'batch_size': 64, 'aug_p': 0.3, 'datafiles': {'train': '/home/lmeyers/ReID_complete/summer_2023_reid_train_closed.csv', 'valid': '/home/lmeyers/ReID_complete/summer_2023_reid_valid_closed.csv', 'test': '/home/lmeyers/ReID_complete/summer_2023_reid_test_closed.csv', 'reference': '/home/lmeyers/ReID_complete/summer_2023_reid_train_closed.csv', 'query': '/home/lmeyers/ReID_complete/summer_2023_reid_test_closed.csv', 'gallery': '/home/lmeyers/ReID_complete/summer_2023_reid_galleries_closed.csv'}, 'n_distractors': 9, 'image_id_col': 'image_id', 'gallery_id': 'gallery_id', 'iteration_id': 'iteration_id'}
Train Settings:


In [3]:
#data_config['datafiles']['gallery'] = '/home/lmeyers/ReID_complete/galleries_test.csv'

In [4]:
###################################################################################################
# FUNCTION TO PERFORM CMC GALLERY EVALUATION
#
# INPUTS
# 1) model: a Pytorch model
# 2) model_type: string, specifies model type ('SCL', 'UCL', 'MTL')
# 3) data_config: dictionary, contains necessary parameters to load images, including 'gallery_fname', 'fname_col', 'gallery_id',
#                             'iteration_id', 'image_id_col' and 'n_distractors'
# 4) verbose: bool, whether print out comments
#
# OUTPUTS
# 1) ranks: float list, cmc scores from top-1 to top-k
#
def evaluate_cmc(model_config,eval_config, data_config, verbose=False):

    # load model
    model = build_model(model_config)
    model = torch.load(model_config['model_path'])
    model.eval()

    if verbose:
        print('Getting gallery images')
    df = pd.read_csv(data_config['datafiles']['gallery'])
    # get the first n_distractors plus anchor and positive (data frame can have more distractors)
    df = df[df[data_config['image_id_col']] < data_config['n_distractors'] + 2]
    # get images
    dataloader = get_galleries(data_config)
    # get embeddings for images
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 
    model.to(device)
    predictions = []
    with torch.no_grad():
        for data in dataloader:
                images = data['image']
                outputs = model(images.to(device))
                predictions.append(outputs.detach().cpu().numpy())
    predictions = np.concatenate(predictions, axis=0)
    if verbose:
        print('Finished embedding images')

    query_gallery_size = df[data_config['image_id_col']].max() + 1
    n_distractors = query_gallery_size - 2
    query_gallery_size, n_distractors

    # calculate total num of galleries across all iterations
    galleries_per_iteraration = len(df[data_config['gallery_id']].unique())
    iterations = df[data_config['iteration_id']].max() + 1
    total_galleries =  galleries_per_iteraration * iterations
    galleries_per_iteraration, iterations, total_galleries

    # get queries embedding (i % query_gallery_size == 0)
    queries_emb = predictions[::query_gallery_size]
    print(queries_emb.size)
    # get the gallery embeddings, i.e. everything other than the query embeddings
    pred_idx = np.arange(0, len(predictions))
    print(pred_idx.shape)
    galleries_emb = predictions[np.mod(pred_idx, query_gallery_size) != 0]
    print(galleries_emb.shape)


    # for each gallery, one query of shape 128
    queries_emb = queries_emb.reshape(total_galleries, 1, -1)
    # for each gallery, n_distractors + P images of shape n_embedding
    galleries_emb = galleries_emb.reshape(total_galleries, n_distractors + 1, -1 )
    galleries_emb = galleries_emb.transpose((0, 2, 1))


    # Calculate distance
    cos_dist = np.dot(queries_emb, galleries_emb.T)
    euclid_dist = -(cos_dist - 1)

    # Calculate Rank
    r = np.argmin(np.argsort(euclid_dist), axis=2)
    r = np.squeeze(r)
    ranks = np.zeros(n_distractors)
    for i in range(n_distractors):
        ranks[i] = np.mean(r < (i + 1))

    return ranks
###################################################################################################



In [5]:
print(evaluate_cmc(model_config,eval_config,data_config,verbose=True))

Getting gallery images
Finished embedding images
1280000
(110000,)
(100000, 128)
[0.10004968 0.20016592 0.30025013 0.400309   0.50033644 0.60027885
 0.70019559 0.80011335 0.90010365]
