In [2]:
import os
import re
import numpy as np
from sklearn.metrics import accuracy_score, recall_score, f1_score, precision_score
import tabulate
import warnings
import torch
import multiprocessing as mp
warnings.filterwarnings('ignore')

import data_preprocessing
import EDCR_pipeline
import vit_pipeline
import utils

# EDCR Results

In [32]:
def get_metrics(test_true: np.array, 
                prior_predictions: np.array, 
                post_predictions: np.array) -> dict:
    return {prior_or_post: ({'acc': accuracy_score(y_true=test_true, 
                                                       y_pred=(prior_predictions 
                                                       if prior_or_post == 'prior' else post_predictions))} | 
                                {metric_name: metric_value(y_true=test_true, 
                                                           y_pred=(prior_predictions 
                                                                   if prior_or_post == 'prior' else post_predictions), 
                                                           average='weighted') 
                                 for metric_name, metric_value in {'pre': precision_score, 'rec': recall_score, 'f1': f1_score}.items()})
                 for prior_or_post in ['prior', 'post']}


def gather_EDCR_data() -> dict:
    data = {} 
    
    # Iterate through filenames to collect accuracy data
    for filename in os.listdir(EDCR_pipeline.figs_folder):
        secondary_granularity_match = re.match(
            pattern='main_(fine|coarse)_(.+?)_lr(.+?)_secondary_(fine|coarse)_(.+?)_lr(.+)',
            string=filename
        )
        
        if secondary_granularity_match:
            (   match,
                main_granularity,
                main_model_name,
                main_lr,
                secondary_granularity,
                secondary_model_name,
                secondary_lr
            ) = (secondary_granularity_match.group(i) for i in range(7))
            
            main_suffix = '_coarse' if main_granularity == 'coarse' else ''
            test_true = np.load(os.path.join(EDCR_pipeline.data_folder, f'test_true{main_suffix}.npy'))
            
            prior_predictions = np.load(os.path.join(EDCR_pipeline.data_folder, rf'{main_model_name}_test_pred_lr{main_lr}_e3{main_suffix}.npy'))
            
            
            secondary_suffix = '_coarse' if secondary_granularity == 'coarse' else ''
            post_predictions = np.load(f'figs/{match}/results{secondary_suffix}.npy')

            # Store accuracy data in the data dictionary
            if main_granularity not in data:
                data[main_granularity] = {}
            if main_model_name not in data[main_granularity]:
                data[main_granularity][main_model_name] = {}
            if secondary_granularity not in data[main_granularity][main_model_name]:
                data[main_granularity][main_model_name][secondary_granularity] = {}
            if secondary_model_name not in data[main_granularity][main_model_name][secondary_granularity]:
                data[main_granularity][main_model_name][secondary_granularity][secondary_model_name] = {}
            if main_lr not in data[main_granularity][main_model_name][secondary_granularity][secondary_model_name]:
                data[main_granularity][main_model_name][secondary_granularity][secondary_model_name][main_lr] = {}
                
            data[main_granularity][main_model_name][secondary_granularity][secondary_model_name][main_lr][secondary_lr] = get_metrics(test_true=test_true,
                                                                                                                                      prior_predictions=prior_predictions,
                                                                                                                                      post_predictions=post_predictions)
        else:
            no_secondary_granularity_match = re.match(pattern='main_(fine|coarse)_(.+)_lr(.+)_secondary_(.+)_lr(.+)',
                                                      string=filename)
            
            if no_secondary_granularity_match:
                
                (match,
                 main_granularity,
                 main_model_name,
                 main_lr,
                 secondary_model_name,
                 secondary_lr 
                ) = (no_secondary_granularity_match.group(i) for i in range(6))
                
                main_suffix = '_coarse' if main_granularity == 'coarse' else ''
                test_true = np.load(os.path.join(EDCR_pipeline.data_folder, f'test_true{main_suffix}.npy'))
                
                prior_predictions = np.load(os.path.join(EDCR_pipeline.data_folder, rf'{main_model_name}_test_pred_lr{main_lr}_e3{main_suffix}.npy'))
                
                try:
                    post_predictions = np.load(f'figs/{match}/results.npy')
                except FileNotFoundError:
                    post_predictions = np.load(f'figs/{match}/results_coarse.npy')
                    
                if main_granularity not in data:
                    data[main_granularity] = {}
                if main_model_name not in data[main_granularity]:
                    data[main_granularity][main_model_name] = {}
                if secondary_model_name not in data[main_granularity][main_model_name]:
                    data[main_granularity][main_model_name][secondary_model_name] = {}
                if main_lr not in data[main_granularity][main_model_name][secondary_model_name]:
                    data[main_granularity][main_model_name][secondary_model_name][main_lr] = {}
                
                data[main_granularity][main_model_name][secondary_model_name][main_lr][secondary_lr] = get_metrics(test_true=test_true,
                                                                                                                   prior_predictions=prior_predictions,
                                                                                                                   post_predictions=post_predictions)
    return data


def get_row_addition(secondary_lr: float, 
                     curr_data: dict,
                     max_accuracy: float = None) -> (str, float):
    roundoff = 2
    curr_prior_data = curr_data['prior']
    curr_post_data = curr_data['post']
    
    curr_prior_accuracy = round(curr_prior_data['acc'] * 100, roundoff)
    curr_post_accuracy = round(curr_post_data['acc'] * 100, roundoff)
    curr_accuracy_diff = round(curr_post_accuracy - curr_prior_accuracy, roundoff)
    
    post_acc_str = (utils.blue_text(curr_post_accuracy) 
                    if max_accuracy is not None and abs(curr_post_accuracy - max_accuracy) < 1e-5 
                    else str(curr_post_accuracy))
    acc_diff_sign_str = (utils.green_text('+') if curr_accuracy_diff > 0 else '')
    
    
    curr_prior_average_f1 = round(curr_prior_data['f1'] * 100, roundoff)
    curr_post_average_f1 = round(curr_post_data['f1'] * 100, roundoff)
    curr_average_f1_diff = round(curr_post_average_f1 - curr_prior_average_f1, roundoff)
    average_f1_diff_sign_str = (utils.green_text('+') if curr_average_f1_diff > 0 else '')
    
    row_addition = (f"{secondary_lr}: acc: {post_acc_str}%, ({acc_diff_sign_str}"  + 
                    (utils.green_text(f'{curr_accuracy_diff}%') if curr_accuracy_diff > 0 
                     else utils.red_text(f'{curr_accuracy_diff}%')) + f'), f1: {curr_post_average_f1}% ({average_f1_diff_sign_str}{curr_average_f1_diff})' + '\n')
    
    return row_addition, curr_prior_accuracy, curr_prior_average_f1


def get_row_data(main_lr_data: dict,
                 secondary_lr: float) -> (str, float):
    curr_data = main_lr_data[secondary_lr]
    row_addition, curr_prior_acc, curr_prior_average_f1 = get_row_addition(secondary_lr=secondary_lr, 
                                                                                   curr_data=curr_data)
    
    return row_addition, curr_prior_acc, curr_prior_average_f1


def print_one_secondary_granularity(main_model_data: dict,
                                    k: str,
                                    main_granularity: str,
                                    main_model_name: str):

    secondary_granularity_data = main_model_data[k]
    main_learning_rates = sorted(secondary_granularity_data[list(secondary_granularity_data.keys())[0]].keys())
    header = [''] + main_learning_rates
    table_data = [header]
    priors = {}

    for secondary_model_name in sorted(secondary_granularity_data.keys()):
        secondary_model_data = secondary_granularity_data[secondary_model_name]
        row = [secondary_model_name]
        
        for main_lr in sorted(secondary_model_data.keys()):
            main_lr_data = secondary_model_data[main_lr]
            row_add = ''
            
            for secondary_lr in sorted(main_lr_data.keys()):
                row_addition, curr_prior_acc, curr_prior_average_f1 = get_row_data(main_lr_data=main_lr_data,
                                                                                      secondary_lr=secondary_lr)
                row_add += row_addition
                priors[main_lr] = {'acc': curr_prior_acc, 'f1': curr_prior_average_f1}
                
            row += [row_add]
        table_data += [row]
    
    table_data[0] = [''] + [f"{main_lr} (acc: {priors[main_lr]['acc']}%, f1: {priors[main_lr]['f1']}%)" for main_lr in main_learning_rates]
    
    # Rest of your code to create and print the table remains unchanged
    table = tabulate.tabulate(
        tabular_data=table_data, 
        headers='firstrow', 
        tablefmt='grid'
    )
    print(f"Main model: {main_granularity.capitalize()}-grain {main_model_name}, "
          f"secondary granularity: {k}")
    print(table)
    print("\n")


def print_two_secondary_granularities(main_model_data: dict,
                                      two_secondary_table_data: list,
                                      k: str,
                                      main_granularity: str,
                                      main_model_name: str):
    main_learning_rates = sorted(vit_pipeline.lrs)
    
    priors = {}
    
    # Initialize the table_data with header if it's empty
    if len(two_secondary_table_data) == 0:
        header = [''] + main_learning_rates
        two_secondary_table_data += [header]
        
    secondary_model_data = main_model_data[k]
    row = [k]
    
    for main_lr in sorted(secondary_model_data.keys()):
        main_lr_data = secondary_model_data[main_lr]
        row_add = ''
        
        for secondary_lr in sorted(main_lr_data.keys()):
            row_addition, curr_prior_acc, curr_prior_average_f1 = get_row_data(main_lr_data=main_lr_data,
                                                    secondary_lr=secondary_lr)
            row_add += row_addition
            priors[main_lr] = {'acc': curr_prior_acc, 'f1': curr_prior_average_f1}
    
        row += [row_add]

    two_secondary_table_data += [row]
    
    # Modify the generated table data to highlight the cell with the maximal accuracy in blue
    
    if len(two_secondary_table_data) == len(main_learning_rates) + 1:
        
        two_secondary_table_data[0] = [''] + [f"{main_lr} acc: ({priors[str(main_lr)]['acc']}%)" for main_lr in main_learning_rates]
        
        # Create the table using tabulate
        table = tabulate.tabulate(
            tabular_data=two_secondary_table_data,
            headers='firstrow',
            tablefmt='grid'
        )
        
        # Print the main model name and the corresponding table
        print(f"Main model: {main_granularity.capitalize()}-grain {main_model_name} "
              f"with both fine and coarse grain secondary models")
        print(table)
        print("\n")
    else:
        return two_secondary_table_data


def print_EDCR_tables():
    data = gather_EDCR_data()
    
    for main_granularity in sorted(data.keys()):
        
        print('#' * 40 + f' Main granularity: {main_granularity} ' + '#' * 40 + '\n' + '#' * 104 + '\n')
        main_granularity_data = data[main_granularity]
        
        for main_model_name in sorted(main_granularity_data.keys()):
            main_model_data = main_granularity_data[main_model_name]
            two_secondary_table_data = []

            for k in (sorted(set(main_model_data.keys()).intersection(data_preprocessing.granularities.values())) + 
                      sorted(set(main_model_data.keys()).intersection(vit_pipeline.vit_model_names))):
            
                if k in data_preprocessing.granularities.values():
                    print_one_secondary_granularity(main_model_data=main_model_data,
                        k=k,
                        main_granularity=main_granularity,
                        main_model_name=main_model_name)
                else:
                    two_secondary_table_data = print_two_secondary_granularities(main_model_data=main_model_data,
                                                      two_secondary_table_data=two_secondary_table_data,
                                                      k=k,
                                                      main_granularity=main_granularity,
                                                      main_model_name=main_model_name)
            print('#' * 100)

print_EDCR_tables()

######################################## Main granularity: coarse ########################################
########################################################################################################

Main model: Coarse-grain vit_b_16, secondary granularity: coarse
+----------+--------------------------------------------------+----------------------------------------------------+--------------------------------------------------+
|          | 1e-05 (acc: 80.94%, f1: 80.89%)                  | 1e-06 (acc: 65.64%, f1: 61.96%)                    | 5e-05 (acc: 83.71%, f1: 83.48%)                  |
| vit_b_32 | 1e-05: acc: 81.0%, ([92m+[0m[92m0.06%[0m), f1: 80.94% ([92m+[0m0.05)  | 1e-05: acc: 73.04%, ([92m+[0m[92m7.4%[0m), f1: 68.94% ([92m+[0m6.98)    | 1e-05: acc: 82.36%, ([91m-1.35%[0m), f1: 82.0% (-1.48)  |
|          | 1e-06: acc: 80.94%, ([91m0.0%[0m), f1: 80.89% (0.0)     | 1e-06: acc: 65.33%, ([91m-0.31%[0m), f1: 61.84% (-0.12)   | 1e-06: acc: 83.71%, 

In [6]:
best_coarse_main_model = 'vit_b_16'
best_coarse_main_lr = '5e-05'
best_coarse_secondary_model = 'vit_l_16'
best_coarse_secondary_lr = '1e-05'
best_coarse_folder = f'main_coarse_{best_coarse_main_model}_lr{best_coarse_main_lr}_secondary_{best_coarse_secondary_model}_lr{best_coarse_secondary_lr}'
best_coarse_results = np.load(rf'{EDCR_pipeline.figs_folder}{best_coarse_folder}/results.npy')
coarse_test_true = np.load(os.path.join(EDCR_pipeline.data_folder, f'test_true_coarse.npy'))

best_fine_main_model = 'vit_l_16'
best_fine_main_lr = '1e-06'
best_fine_secondary_model = 'vit_b_16'
best_fine_secondary_lr = '1e-06'
best_fine_folder = f'main_fine_{best_fine_main_model}_lr{best_fine_main_lr}_secondary_{best_fine_secondary_model}_lr{best_fine_secondary_lr}'
best_fine_results = np.load(rf'{EDCR_pipeline.figs_folder}{best_fine_folder}/results.npy')
fine_test_true = np.load(os.path.join(EDCR_pipeline.data_folder, f'test_true.npy'))

from models import  VITFineTuner
from data_preprocessing import get_datasets, get_loaders, granularities
from vit_pipeline import cwd, batch_size

def m(fine_data, coarse_test_loader):
    fine_images, fine_labels, fine_names = fine_data[0], fine_data[1], fine_data[2]
        
    for fine_image in fine_images:
        print(fine_image)
        for coarse_data in coarse_test_loader:
            coarse_images, coarse_labels, coarse_names = coarse_data[0], coarse_data[1], coarse_data[2]
            for coarse_image in coarse_images:
                if torch.all(coarse_image == fine_image):
                    print('hi')
                        
def get_datasets_correspondence(coarse_results: np.array, fine_results: np.array):
    fine_tuners = {}
    loaders = {}
    
    for granularity in granularities.values():
        train_folder_name = f'train_{granularity}'
        test_folder_name = f'test_{granularity}'
        vit_model_names = [best_coarse_main_model]
        datasets, n = get_datasets(model_names=vit_model_names,
                                   cwd=cwd,
                                   train_folder_name=train_folder_name,
                                   test_folder_name=test_folder_name)
        granularity_fine_tuners = [VITFineTuner(model_name, vit_model_names, n) for model_name in vit_model_names]
        granularity_loaders = get_loaders(datasets=datasets,
                              batch_size=batch_size,
                              model_names=vit_model_names,
                              train_folder_name=train_folder_name,
                              test_folder_name=test_folder_name)
        
        fine_tuners[granularity] = granularity_fine_tuners
        loaders[granularity] = granularity_loaders
    
    fine_fine_tuner = fine_tuners['fine'][0]
    coarse_fine_tuner = fine_tuners['coarse'][0]
    
    fine_test_loader = loaders['fine'][f'{fine_fine_tuner}_test_fine']
    coarse_test_loader = loaders['coarse'][f'{coarse_fine_tuner}_test_coarse']
    
    
    args_list = [(fine_data, coarse_test_loader)
                 for fine_data in fine_test_loader]
    
    with mp.Pool(processes=10) as pool:
        pool.map(m, args_list)
        # for fine_data in fine_test_loader:
            
          
get_datasets_correspondence(coarse_results=best_coarse_results, fine_results=best_fine_results)

Process SpawnPoolWorker-11:
Traceback (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/multiprocessing/process.py", line 315, in _bootstrap
    self.run()
  File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/multiprocessing/pool.py", line 114, in worker
    task = get()
  File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/multiprocessing/queues.py", line 367, in get
    return _ForkingPickler.loads(res)
AttributeError: Can't get attribute 'm' on <module '__main__' (built-in)>
Process SpawnPoolWorker-12:
Traceback (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/multiprocessing/process.py", line 315, in _bootstrap
    self.run()
  File "/Library/Frameworks/Python.framework/Versions/3.9/lib/pyth

KeyboardInterrupt: 

In [None]:
fine_to_coarse = {}
training_df = EDCR_pipeline.dataframes_by_sheet['Training']

assert set(training_df['Fine-Grain Ground Truth'].unique().tolist()).intersection(EDCR_pipeline.fine_grain_classes) == set(EDCR_pipeline.fine_grain_classes)

for fine_grain_class in EDCR_pipeline.fine_grain_classes:
    curr_fine_grain_training_data = training_df[training_df['Fine-Grain Ground Truth'] == fine_grain_class]
    assert curr_fine_grain_training_data['Course-Grain Ground Truth'].nunique() == 1
    fine_to_coarse[fine_grain_class] = curr_fine_grain_training_data['Course-Grain Ground Truth'].iloc[0]


def get_num_of_inconsistencies(coarse_results, 
                               fine_results):
    num_of_inconsistencies = 0
    for coarse_prediction_index, fine_prediction_index in zip(coarse_results, fine_results):
        fine_prediction = EDCR_pipeline.fine_grain_classes[fine_prediction_index]
        coarse_prediction = EDCR_pipeline.coarse_grain_classes[coarse_prediction_index]
        if fine_to_coarse[fine_prediction] != coarse_prediction:
            num_of_inconsistencies += 1
    
    return num_of_inconsistencies
    
get_num_of_inconsistencies(coarse_results=coarse_test_true,
                           fine_results=fine_test_true)

In [7]:
import json
file_path = 'res.json'
with open(file_path, 'r') as json_file:
    loaded_dict = json.load(json_file)

# Print the loaded dictionary
print('Loaded Dictionary:', loaded_dict)

Loaded Dictionary: [None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None]


In [1]:
from EDCR_pipeline import fine_to_coarse
fine_to_coarse

{'2S19_MSTA': 'Self Propelled Artillery',
 '30N6E': 'Air Defense',
 'BM-30': 'Self Propelled Artillery',
 'BMD': 'BMD',
 'BMP-1': 'BMP',
 'BMP-2': 'BMP',
 'BMP-T15': 'BMP',
 'BRDM': 'BTR',
 'BTR-60': 'BTR',
 'BTR-70': 'BTR',
 'BTR-80': 'BTR',
 'D-30': 'Self Propelled Artillery',
 'Iskander': 'Air Defense',
 'MT_LB': 'MT_LB',
 'Pantsir-S1': 'Air Defense',
 'Rs-24': 'Air Defense',
 'T-14': 'Tank',
 'T-62': 'Tank',
 'T-64': 'Tank',
 'T-72': 'Tank',
 'T-80': 'Tank',
 'T-90': 'Tank',
 'Tornado': 'Self Propelled Artillery',
 'TOS-1': 'Self Propelled Artillery'}