In [1]:
import pandas as pd
import numpy as np
from typing import Sequence, Callable
from sklearn.metrics import accuracy_score, precision_score, f1_score, recall_score

# Data loading

In [3]:
data_file_path = rf'data/WEO_Data_Sheet.xlsx'
dataframes_by_sheet = pd.read_excel(data_file_path, sheet_name=None)
dataframes_by_sheet.keys()

dict_keys(['Training', 'Coarse-Grain Results', 'Fine-Grain Results', 'Vehicle Info', 'Inconsistent Samples', 'Consistent Samples', 'Metadata', 'Metadata 616 distribution', 'Course-Grain Only Metrics', 'Course-Grain Only Test', '1s_0s_Sheet', 'Testing', 'Consistent_and_Inconsistent_Met', 'Metadata 616'])

In [4]:
training_df = dataframes_by_sheet['Training']
training_df.shape

(814, 11)

In [5]:
training_df.head()

Unnamed: 0,Image Name,Course-Grain Ground Truth,Fine-Grain Ground Truth,Image Size (KB),Pixels,Color (RGB),Height (Pixels),Width (Pixels),Source,Unnamed: 9,Unnamed: 10
0,2S19_MSTA_Self-Propelled_Artillery_(SPA)_Vehic...,Self Propelled Artillery,2S19_MSTA,8.791992,43000,1,172.0,250.0,2.0,,2 indicates the source belongs to the website:...
1,2S19_Msta-S_152mm_tracked_self-propelled_howit...,Self Propelled Artillery,2S19_MSTA,63.426758,267600,1,446.0,600.0,2.0,,1 indicates the source belongs to Roboflow web...
2,2S19_Msta-S_152mm_tracked_self-propelled_howit...,Self Propelled Artillery,2S19_MSTA,68.384766,270000,1,450.0,600.0,2.0,,0 indicates that the source belongs to another...
3,2S19_Msta-S_152mm_tracked_self-propelled_howit...,Self Propelled Artillery,2S19_MSTA,68.874023,270000,1,450.0,600.0,2.0,,
4,2S19_Msta-S_152mm_tracked_self-propelled_howit...,Self Propelled Artillery,2S19_MSTA,51.21582,270000,1,450.0,600.0,2.0,,


In [6]:
fine_grain_results_df = dataframes_by_sheet['Fine-Grain Results']
fine_grain_results_df.columns

Index(['Class Name', 'Precision', 'Recall', 'F1', 'Train Count', 'Test Count',
       'Total Count', 'True Positives', 'False Positives', 'True Negatives',
       'False Negatives', 'Number of Predictions'],
      dtype='object')

In [7]:
fine_grain_classes = fine_grain_results_df['Class Name'].values
fine_grain_classes

array(['2S19_MSTA', '30N6E', 'BM-30', 'BMD', 'BMP-1', 'BMP-2', 'BMP-T15',
       'BRDM', 'BTR-60', 'BTR-70', 'BTR-80', 'D-30', 'Iskander', 'MT_LB',
       'Pantsir-S1', 'Rs-24', 'T-14', 'T-62', 'T-64', 'T-72', 'T-80',
       'T-90', 'Tornado', 'TOS-1'], dtype=object)

In [8]:
coarse_grain_results_df = dataframes_by_sheet['Coarse-Grain Results']
coarse_grain_results_df.columns

Index(['Class Name', 'Precision', 'Recall', 'F1', 'Train Count', 'Test Count',
       'Total Count', 'True Positives', 'False Positives', 'True Negatives',
       'False Negatives'],
      dtype='object')

In [9]:
coarse_grain_results_df.head()

Unnamed: 0,Class Name,Precision,Recall,F1,Train Count,Test Count,Total Count,True Positives,False Positives,True Negatives,False Negatives
0,Tank,0.835821,0.918033,0.875,232,61,293,56,11,164,5
1,BMP,0.927273,0.87931,0.902655,160,58,218,51,4,174,7
2,BTR,0.8,0.823529,0.811594,121,34,155,28,7,195,6
3,Self Propelled Artillery,1.0,0.692308,0.818182,77,26,103,18,0,210,8
4,Air Defense,0.823529,1.0,0.903226,54,14,68,14,3,219,0


In [10]:
coarse_grain_classes = coarse_grain_results_df['Class Name'].values
coarse_grain_classes

array(['Tank', 'BMP', 'BTR', 'Self Propelled Artillery', 'Air Defense',
       'MT_LB', 'BMD'], dtype=object)

In [11]:
coarse_grain_results_df = coarse_grain_results_df.T
coarse_grain_results_df.columns = coarse_grain_classes
coarse_grain_results_df.head()

Unnamed: 0,Tank,BMP,BTR,Self Propelled Artillery,Air Defense,MT_LB,BMD
Class Name,Tank,BMP,BTR,Self Propelled Artillery,Air Defense,MT_LB,BMD
Precision,0.835821,0.927273,0.8,1.0,0.823529,0.833333,1.0
Recall,0.918033,0.87931,0.823529,0.692308,1.0,0.952381,0.909091
F1,0.875,0.902655,0.811594,0.818182,0.903226,0.888889,0.952381
Train Count,232,160,121,77,54,78,88


In [15]:
zeros_and_ones_df = dataframes_by_sheet['1s_0s_Sheet']
zeros_and_ones_df.shape

(236, 87)

In [16]:
zeros_and_ones_df.head()

Unnamed: 0,Image Name,source(armyrecognition.com),source(roboflow),source(other),Air Defence,BMD,BMP,BTR,MT_LB,SPA,...,pred_Pantsir-S1,pred_Rs-24,pred_T-14,pred_T-62,pred_T-64,pred_T-72,pred_T-80,pred_T-90,pred_Tornado,pred_TOS-1
0,1_jpg.rf.d710cedbb6b7f1d25a76c767ab8a1fb7.jpg,0,1,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,100_jpg.rf.9ee6019a5bd2cab9a780a55d6fc9ea40.jpg,0,1,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
2,100_jpg.rf.cb3c97fd3e76109ff3b1619cb8e54174.jpg,0,1,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,100_jpg.rf.de78d02fa5c4a80a2441027ed3e93710.jpg,0,1,0,0,0,0,0,1,0,...,0,0,1,0,0,0,0,0,0,0
4,11_png.rf.453d730f6167d639e5ec2bb9f5b2c0e9.jpg,0,1,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [13]:
zeros_and_ones_df.columns

Index(['Image Name', 'source(armyrecognition.com)', 'source(roboflow)',
       'source(other)', 'Air Defence', 'BMD', 'BMP', 'BTR', 'MT_LB', 'SPA',
       'Tank', '30N6E', 'Iskander', 'Pantsir-S1', 'Rs-24', 'BMP-1', 'BMP-2',
       'BMP-T15', 'BRDM', 'BTR-60', 'BTR-70', 'BTR-80', '2S19_MSTA', 'BM-30',
       'D-30', 'Tornado', 'TOS-1', 'T-14', 'T-62', 'T-64', 'T-72', 'T-80',
       'T-90', 'Exp 1 Prediction (Tank)', 'Exp 1 Prediction (BMP)',
       'Exp 1 Prediction (BTR)', 'Exp 1 Prediction (SPA)',
       'Exp 1 Prediction (Air Defense)', 'Exp 1 Prediction (MT_LB)',
       'Exp 1 Prediction (BMD)', 'Exp 2 Prediction (Tank)',
       'Exp 2 Prediction (BMP)', 'Exp 2 Prediction (BTR)',
       'Exp 2 Prediction (SPA)', 'Exp 2 Prediction (Air Defense)',
       'Exp 2 Prediction (MT_LB)', 'Exp 2 Prediction (BMD)',
       'Course-Grain Correctness (Exp 1)', 'Fine-Grain Correctness (Exp 1)',
       'Course-grain prediction correctness (Exp 2)',
       'Experiment 1 and 2 consistant', 'sharpne

In [14]:
set(zeros_and_ones_df['Image Name']).intersection(set(training_df['Image Name']))

set()

In [37]:
def get_TP(cls: str) -> int:
    """
    :param cls: The input class to consider
    :return: The True Positive of the class
    """
    
    return ((zeros_and_ones_df[f'pred_{cls}'] == 1) & (zeros_and_ones_df[f'{cls}'] == 1)).sum()

{f'{cls} TP': get_TP(cls=cls) for cls in fine_grain_classes}

{'2S19_MSTA TP': 0,
 '30N6E TP': 2,
 'BM-30 TP': 2,
 'BMD TP': 20,
 'BMP-1 TP': 12,
 'BMP-2 TP': 17,
 'BMP-T15 TP': 6,
 'BRDM TP': 1,
 'BTR-60 TP': 3,
 'BTR-70 TP': 20,
 'BTR-80 TP': 0,
 'D-30 TP': 0,
 'Iskander TP': 0,
 'MT_LB TP': 20,
 'Pantsir-S1 TP': 3,
 'Rs-24 TP': 4,
 'T-14 TP': 3,
 'T-62 TP': 3,
 'T-64 TP': 18,
 'T-72 TP': 4,
 'T-80 TP': 5,
 'T-90 TP': 9,
 'Tornado TP': 5,
 'TOS-1 TP': 6}

In [34]:
def get_FP(cls: str) -> int:
    """
    :param cls: The input class to consider
    :return: The False Positive of the class
    """
    
    return ((zeros_and_ones_df[f'pred_{cls}'] == 1) & (zeros_and_ones_df[f'{cls}'] == 0)).sum()
    
{f'{cls} FP': get_FP(cls=cls) for cls in fine_grain_classes}

{'2S19_MSTA FP': 2,
 '30N6E FP': 2,
 'BM-30 FP': 2,
 'BMD FP': 0,
 'BMP-1 FP': 8,
 'BMP-2 FP': 11,
 'BMP-T15 FP': 1,
 'BRDM FP': 1,
 'BTR-60 FP': 2,
 'BTR-70 FP': 7,
 'BTR-80 FP': 1,
 'D-30 FP': 0,
 'Iskander FP': 2,
 'MT_LB FP': 4,
 'Pantsir-S1 FP': 1,
 'Rs-24 FP': 3,
 'T-14 FP': 2,
 'T-62 FP': 2,
 'T-64 FP': 7,
 'T-72 FP': 5,
 'T-80 FP': 2,
 'T-90 FP': 7,
 'Tornado FP': 0,
 'TOS-1 FP': 1}

In [36]:
def get_TN(cls: str) -> int:
    """
    :param cls: The input class to consider
    :return: The True Negative of the class
    """
    
    return ((zeros_and_ones_df[f'pred_{cls}'] == 0) & (zeros_and_ones_df[f'{cls}'] == 0)).sum()

{f'{cls} TN': get_TN(cls=cls) for cls in fine_grain_classes}

{'2S19_MSTA TN': 229,
 '30N6E TN': 231,
 'BM-30 TN': 230,
 'BMD TN': 214,
 'BMP-1 TN': 203,
 'BMP-2 TN': 198,
 'BMP-T15 TN': 229,
 'BRDM TN': 231,
 'BTR-60 TN': 231,
 'BTR-70 TN': 205,
 'BTR-80 TN': 232,
 'D-30 TN': 232,
 'Iskander TN': 230,
 'MT_LB TN': 211,
 'Pantsir-S1 TN': 232,
 'Rs-24 TN': 229,
 'T-14 TN': 228,
 'T-62 TN': 229,
 'T-64 TN': 205,
 'T-72 TN': 222,
 'T-80 TN': 228,
 'T-90 TN': 218,
 'Tornado TN': 231,
 'TOS-1 TN': 227}

In [38]:
def get_FN(cls: str) -> int:
    """
    :param cls: The input class to consider
    :return: The True Negative of the class
    """
    
    return ((zeros_and_ones_df[f'pred_{cls}'] == 0) & (zeros_and_ones_df[f'{cls}'] == 1)).sum()

{f'{cls} FN': get_FN(cls=cls) for cls in fine_grain_classes}

{'2S19_MSTA FN': 5,
 '30N6E FN': 1,
 'BM-30 FN': 2,
 'BMD FN': 2,
 'BMP-1 FN': 13,
 'BMP-2 FN': 10,
 'BMP-T15 FN': 0,
 'BRDM FN': 3,
 'BTR-60 FN': 0,
 'BTR-70 FN': 4,
 'BTR-80 FN': 3,
 'D-30 FN': 4,
 'Iskander FN': 4,
 'MT_LB FN': 1,
 'Pantsir-S1 FN': 0,
 'Rs-24 FN': 0,
 'T-14 FN': 3,
 'T-62 FN': 2,
 'T-64 FN': 6,
 'T-72 FN': 5,
 'T-80 FN': 1,
 'T-90 FN': 2,
 'Tornado FN': 0,
 'TOS-1 FN': 2}

In [42]:
def get_precision(cls: str) -> float:
    """
    :param cls: The input class to consider
    :return: The precision of the class - defined as TP / (TP + FP)
    """
    TP = get_TP(cls)
    FP = get_FP(cls)
    
    return TP / (TP + FP)

{f'{cls} precision': get_precision(cls=cls) for cls in fine_grain_classes}

  return TP / (TP + FP)


{'2S19_MSTA precision': 0.0,
 '30N6E precision': 0.5,
 'BM-30 precision': 0.5,
 'BMD precision': 1.0,
 'BMP-1 precision': 0.6,
 'BMP-2 precision': 0.6071428571428571,
 'BMP-T15 precision': 0.8571428571428571,
 'BRDM precision': 0.5,
 'BTR-60 precision': 0.6,
 'BTR-70 precision': 0.7407407407407407,
 'BTR-80 precision': 0.0,
 'D-30 precision': nan,
 'Iskander precision': 0.0,
 'MT_LB precision': 0.8333333333333334,
 'Pantsir-S1 precision': 0.75,
 'Rs-24 precision': 0.5714285714285714,
 'T-14 precision': 0.6,
 'T-62 precision': 0.6,
 'T-64 precision': 0.72,
 'T-72 precision': 0.4444444444444444,
 'T-80 precision': 0.7142857142857143,
 'T-90 precision': 0.5625,
 'Tornado precision': 1.0,
 'TOS-1 precision': 0.8571428571428571}

In [43]:
{f'{cls} precision': precision_score(y_true=zeros_and_ones_df[f'{cls}'], y_pred=zeros_and_ones_df[f'pred_{cls}']) for cls in fine_grain_classes}

  _warn_prf(average, modifier, msg_start, len(result))


{'2S19_MSTA precision': 0.0,
 '30N6E precision': 0.5,
 'BM-30 precision': 0.5,
 'BMD precision': 1.0,
 'BMP-1 precision': 0.6,
 'BMP-2 precision': 0.6071428571428571,
 'BMP-T15 precision': 0.8571428571428571,
 'BRDM precision': 0.5,
 'BTR-60 precision': 0.6,
 'BTR-70 precision': 0.7407407407407407,
 'BTR-80 precision': 0.0,
 'D-30 precision': 0.0,
 'Iskander precision': 0.0,
 'MT_LB precision': 0.8333333333333334,
 'Pantsir-S1 precision': 0.75,
 'Rs-24 precision': 0.5714285714285714,
 'T-14 precision': 0.6,
 'T-62 precision': 0.6,
 'T-64 precision': 0.72,
 'T-72 precision': 0.4444444444444444,
 'T-80 precision': 0.7142857142857143,
 'T-90 precision': 0.5625,
 'Tornado precision': 1.0,
 'TOS-1 precision': 0.8571428571428571}

In [44]:
def get_recall(cls: str) -> float:
    """
    :param cls: The input class to consider
    :return: The recall of the class - defined as TP / (TP + FN)
    """
    TP = get_TP(cls)
    FN = get_FN(cls)
    
    return TP / (TP + FN)

{f'{cls} recall': get_recall(cls=cls) for cls in fine_grain_classes}

{'2S19_MSTA recall': 0.0,
 '30N6E recall': 0.6666666666666666,
 'BM-30 recall': 0.5,
 'BMD recall': 0.9090909090909091,
 'BMP-1 recall': 0.48,
 'BMP-2 recall': 0.6296296296296297,
 'BMP-T15 recall': 1.0,
 'BRDM recall': 0.25,
 'BTR-60 recall': 1.0,
 'BTR-70 recall': 0.8333333333333334,
 'BTR-80 recall': 0.0,
 'D-30 recall': 0.0,
 'Iskander recall': 0.0,
 'MT_LB recall': 0.9523809523809523,
 'Pantsir-S1 recall': 1.0,
 'Rs-24 recall': 1.0,
 'T-14 recall': 0.5,
 'T-62 recall': 0.6,
 'T-64 recall': 0.75,
 'T-72 recall': 0.4444444444444444,
 'T-80 recall': 0.8333333333333334,
 'T-90 recall': 0.8181818181818182,
 'Tornado recall': 1.0,
 'TOS-1 recall': 0.75}

In [45]:
{f'{cls} recall': recall_score(y_true=zeros_and_ones_df[f'{cls}'], y_pred=zeros_and_ones_df[f'pred_{cls}']) for cls in fine_grain_classes}

{'2S19_MSTA recall': 0.0,
 '30N6E recall': 0.6666666666666666,
 'BM-30 recall': 0.5,
 'BMD recall': 0.9090909090909091,
 'BMP-1 recall': 0.48,
 'BMP-2 recall': 0.6296296296296297,
 'BMP-T15 recall': 1.0,
 'BRDM recall': 0.25,
 'BTR-60 recall': 1.0,
 'BTR-70 recall': 0.8333333333333334,
 'BTR-80 recall': 0.0,
 'D-30 recall': 0.0,
 'Iskander recall': 0.0,
 'MT_LB recall': 0.9523809523809523,
 'Pantsir-S1 recall': 1.0,
 'Rs-24 recall': 1.0,
 'T-14 recall': 0.5,
 'T-62 recall': 0.6,
 'T-64 recall': 0.75,
 'T-72 recall': 0.4444444444444444,
 'T-80 recall': 0.8333333333333334,
 'T-90 recall': 0.8181818181818182,
 'Tornado recall': 1.0,
 'TOS-1 recall': 0.75}

In [48]:
def get_F1(cls: str) -> float:
    """
    :param cls: The input class to consider
    :return: The F1-score of the class - defined as 2/(1/precision + 1/recall)
    """
    precision = get_precision(cls)
    recall = get_recall(cls)
    
    return 2 / (1 / precision + 1/recall)

{f'{cls} F1': get_F1(cls=cls) for cls in fine_grain_classes}

  return 2 / (1 / precision + 1/recall)
  return TP / (TP + FP)


{'2S19_MSTA F1': 0.0,
 '30N6E F1': 0.5714285714285714,
 'BM-30 F1': 0.5,
 'BMD F1': 0.9523809523809523,
 'BMP-1 F1': 0.5333333333333333,
 'BMP-2 F1': 0.6181818181818182,
 'BMP-T15 F1': 0.9230769230769229,
 'BRDM F1': 0.3333333333333333,
 'BTR-60 F1': 0.7499999999999999,
 'BTR-70 F1': 0.7843137254901962,
 'BTR-80 F1': 0.0,
 'D-30 F1': nan,
 'Iskander F1': 0.0,
 'MT_LB F1': 0.8888888888888888,
 'Pantsir-S1 F1': 0.8571428571428572,
 'Rs-24 F1': 0.7272727272727273,
 'T-14 F1': 0.5454545454545454,
 'T-62 F1': 0.6,
 'T-64 F1': 0.7346938775510203,
 'T-72 F1': 0.4444444444444444,
 'T-80 F1': 0.7692307692307694,
 'T-90 F1': 0.6666666666666666,
 'Tornado F1': 1.0,
 'TOS-1 F1': 0.8}

In [47]:
{f'{cls} F1': f1_score(y_true=zeros_and_ones_df[f'{cls}'], y_pred=zeros_and_ones_df[f'pred_{cls}']) for cls in fine_grain_classes}

{'2S19_MSTA F1': 0.0,
 '30N6E F1': 0.5714285714285715,
 'BM-30 F1': 0.5,
 'BMD F1': 0.9523809523809523,
 'BMP-1 F1': 0.5333333333333332,
 'BMP-2 F1': 0.6181818181818182,
 'BMP-T15 F1': 0.923076923076923,
 'BRDM F1': 0.3333333333333333,
 'BTR-60 F1': 0.7499999999999999,
 'BTR-70 F1': 0.7843137254901961,
 'BTR-80 F1': 0.0,
 'D-30 F1': 0.0,
 'Iskander F1': 0.0,
 'MT_LB F1': 0.888888888888889,
 'Pantsir-S1 F1': 0.8571428571428571,
 'Rs-24 F1': 0.7272727272727273,
 'T-14 F1': 0.5454545454545454,
 'T-62 F1': 0.6,
 'T-64 F1': 0.7346938775510204,
 'T-72 F1': 0.4444444444444444,
 'T-80 F1': 0.7692307692307692,
 'T-90 F1': 0.6666666666666666,
 'Tornado F1': 1.0,
 'TOS-1 F1': 0.7999999999999999}

# Problem statement

Our main goal is to improve the precision of a pretrained model, denoted $f_{\theta_f}$, which predicts fine-grain labels of images. The model was pretrained on a train dataset denoted $\mathcal{T}$. We would like to improve its precision by applying rules using a different model, denoted $f_{\theta_c}$ and also trained on $\mathcal{T}$, that predicts coarse-grain labels. The method used is as follows: the coarse-grain labels predicted by $f_{\theta_c}$ can be compared to the coarse-grain labels derived from the predicted fine-grain labels of $f_{\theta_f}$. 

 Let $\mathcal{O} \neq \mathcal{T}$ be a set of images, each has both a coarse and fine grain label. Consider some sample $\omega \in \mathcal{O}$. Denote $f_{\theta_c}(\omega)=\alpha_{\omega}$ and $f_{\theta_f}(\omega) = \beta_{\omega}$. Let $\psi$ and $\rho$ be sets of coarse and fine-grain classes respectively. So in this case we have $\mathbf{C} = \psi \cup \rho$. Let $\Phi \colon \psi \to \rho$ be a function that maps a fine grain label to it's corresponding coarse grain label. With that, we define the predicate:

\begin{equation}
\Phi(\beta_{\omega}) = \alpha_{\omega} \iff \omega \  \textrm{is consistent}$
\end{equation}

When an inconsistency is found, we assume precedence of the predictions of the coarse-grain model, i.e. we conclude that the coarse-grain label derived from the fine-grain label prediction is incorrect, and suggest the prediction of the coarse-grain model as the label for the sample.

In [14]:
fine_to_coarse = {
    "T-14": "Tank",
    "T-62": "Tank",
    "T-64": "Tank",
    "T-72": "Tank",
    "T-80": "Tank",
    "T-90": "Tank",
    "BMP-1": "BMP",
    "BMP-2": "BMP",
    "T-15 BMP": "BMP",
    "BTR-60": "BTR",
    "BTR-70": "BTR",
    "BTR-80": "BTR",
    "BRDM": "BTR",
    "2S19_MSTA": "Self Propelled Artillery",
    "BM-30": "Self Propelled Artillery",
    "D-30": "Self Propelled Artillery",
    "Tornado": "Self Propelled Artillery",
    "TOS-1": "Self Propelled Artillery",
    "30N6E": "Air Defense",
    "Iskander": "Air Defense",
    "Pantsir-S1": "Air Defense",
    "Rs-24": "Air Defense",
    "MT-LB": "MT-LB",
    "BMD": "BMD"
}

In [15]:
images_col_name = 'Image Name'

def get_example_info(image_name: str) -> pd.Series:
    return zeros_and_ones_df[zeros_and_ones_df[images_col_name] == image_name]

def get_class_name(cls: str, ground_truth: bool):
    if ground_truth and cls == 'Air Defense':
        return 'Air Defence'
    
    return cls if cls != 'Self Propelled Artillery' else 'SPA'

def get_class(image_name: str, ground_truth: bool, granularity: str) -> str:
    w_info = get_example_info(image_name)
    column_name_generator = lambda cls: get_class_name(cls, ground_truth) if ground_truth else (f"pred_{get_class_name(cls, ground_truth)}" if granularity == 'fine' else f"Exp 2 Prediction ({get_class_name(cls, ground_truth)})")
    classes = fine_grain_classes if granularity == 'fine' else coarse_grain_classes
    class_index = np.array([w_info[column_name_generator(cls)] for cls in classes]).argmax()
    predicted_class = classes[class_index]
    
    return predicted_class

def predicted_fine_grain_class(image_name: str) -> str:
    return get_class(image_name=image_name, ground_truth=False, granularity='fine')

consistent_samples = dataframes_by_sheet['Consistent Samples']
consistent_sample = consistent_samples.iloc[0][images_col_name]
predicted_fine_grain_class(consistent_sample)

'30N6E'

In [16]:
def predicted_coarse_grain_class(w: str) -> str:
    return get_class(image_name=w, ground_truth=False, granularity='coarse')

predicted_coarse_grain_class(consistent_sample)

'Air Defense'

In [17]:
def derived_coarse_grain_class(w: str) -> str:
    fine_grain_class = predicted_fine_grain_class(w)
    return fine_to_coarse[fine_grain_class]

derived_coarse_grain_class(consistent_sample)

'Air Defense'

# Predicates

In [18]:
def is_coarse_grain_prediction_i(w: str, i: str) -> bool:
    return predicted_coarse_grain_class(w) == i

def print_predicates(w:str, predicate: Callable, color: str):
    print(f'{predicate.__name__} on example {w}\n' + '#' * 100 + '\n')
    for cls in coarse_grain_classes:
        predicate_value = predicate(w=w, i=cls)
        color_prefix = f"\033[3{1 if color == 'red' else 2}m" if predicate_value else ""
        color_suffix = "\033[m" if predicate_value else ""
        print_string = color_prefix + f'{cls}: {predicate_value}' + color_suffix
        print(print_string)

print_predicates(w=consistent_sample, predicate=is_coarse_grain_prediction_i, color='green')

is_coarse_grain_prediction_i on example 30N6E_Flap_Lid_B _Tracking_and_missile_guidance_radar_for_SA-10_Grumble_-_Russia_12.jpg
####################################################################################################

Tank: False
BMP: False
BTR: False
Self Propelled Artillery: False
[32mAir Defense: True[m
MT_LB: False
BMD: False


In [19]:
def is_coarse_grain_ground_truth_i(w: str, i: str):
    return get_class(image_name=w, ground_truth=True, granularity='coarse') == i

print_predicates(w=consistent_sample, predicate=is_coarse_grain_ground_truth_i, color='green')

is_coarse_grain_ground_truth_i on example 30N6E_Flap_Lid_B _Tracking_and_missile_guidance_radar_for_SA-10_Grumble_-_Russia_12.jpg
####################################################################################################

Tank: False
BMP: False
BTR: False
Self Propelled Artillery: False
[32mAir Defense: True[m
MT_LB: False
BMD: False


In [20]:
def is_coarse_grain_prediction_incorrect_i(w: str, i: str):
    return is_coarse_grain_prediction_i(w, i) and not is_coarse_grain_ground_truth_i(w, i)

print_predicates(w=consistent_sample, predicate=is_coarse_grain_prediction_incorrect_i, color='red')

is_coarse_grain_prediction_incorrect_i on example 30N6E_Flap_Lid_B _Tracking_and_missile_guidance_radar_for_SA-10_Grumble_-_Russia_12.jpg
####################################################################################################

Tank: False
BMP: False
BTR: False
Self Propelled Artillery: False
Air Defense: False
MT_LB: False
BMD: False


In [21]:
inconsistent_samples = dataframes_by_sheet['Inconsistent Samples']
inconsistent_sample = inconsistent_samples.iloc[0][images_col_name]
    
predicted_fine_grain_class(inconsistent_sample)

'BMD'

In [22]:
predicted_coarse_grain_class(inconsistent_sample)

'BMP'

In [23]:
derived_coarse_grain_class(inconsistent_sample)

'BMD'

In [24]:
print_predicates(w=inconsistent_sample, predicate=is_coarse_grain_prediction_incorrect_i, color='red')

is_coarse_grain_prediction_incorrect_i on example 14_png.rf.6900265511b02e07d9957402df99d731.jpg
####################################################################################################

Tank: False
[31mBMP: True[m
BTR: False
Self Propelled Artillery: False
Air Defense: False
MT_LB: False
BMD: False


# Conditions

In [43]:
conditions = [lambda z, cls=cls: predicted_coarse_grain_class(z) == cls for cls in coarse_grain_classes]

{cls: condition(inconsistent_sample) for cls, condition in zip(coarse_grain_classes, conditions)}

{'Tank': False,
 'BMP': True,
 'BTR': False,
 'Self Propelled Artillery': False,
 'Air Defense': False,
 'MT_LB': False,
 'BMD': False}

In [62]:
def get_scores(y_true, y_pred):
    try:
        y_actual = y_true
        y_hat = y_pred
        TP = 0
        FP = 0
        TN = 0
        FN = 0

        for i in range(len(y_hat)):
            if y_actual[i] == y_hat[i] == 1:
                TP += 1
            if y_hat[i] == 1 and y_actual[i] != y_hat[i]:
                FP += 1
            if y_actual[i] == y_hat[i] == 0:
                TN += 1
            if y_hat[i] == 0 and y_actual[i] != y_hat[i]:
                FN += 1
        print(f"TP:{TP}, FP:{FP}, TN:{TN}, FN:{FN}")

        pre = precision_score(y_true, y_pred)
        rec = recall_score(y_true, y_pred)
        f1 = f1_score(y_true, y_pred)
        return pre, rec, f1
    except:
        pre = accuracy_score(y_true, y_pred)
        f1 = f1_score(y_true, y_pred, average='macro')
        f1micro = f1_score(y_true, y_pred, average='micro')
        
        return pre, f1, f1micro

def NEG(i: str, conditions_subset: Sequence[Callable[[str], bool]]) -> int:
    result = 0
    
    for w in zeros_and_ones_df[images_col_name]:
        if (all(condition(w) for condition in conditions_subset) 
                and is_coarse_grain_ground_truth_i(w=w, i=i) and is_coarse_grain_prediction_i(w=w, i=i)):
            result += 1
            
    return result

def POS(i: str, conditions_subset: Sequence[Callable[[str], bool]]) -> int:
    result = 0
    
    for w in zeros_and_ones_df[images_col_name]:
        if (all(condition(w) for condition in conditions_subset) 
                and is_coarse_grain_prediction_incorrect_i(i=i, w=w)):
            result += 1
    
    return result
            
def calculate_q(i: str, epsilon: float) -> float:
    n_i = zeros_and_ones_df[i if i != 'Air Defense' else 'Air Defence'].sum()
    r_i = coarse_grain_results_df.loc['Recall', i]
    p_i = coarse_grain_results_df.loc['Precision', i]
    q_i = epsilon * n_i * p_i / r_i
    
    return q_i

def GreedyNegativeConditionsSelect(i: str, epsilon: float, conditions: Sequence[Callable[[str], bool]]):
    dc_i = []
    
    q_i = calculate_q()
    
    dc_i_star = [condition for condition in conditions if NEG(i=i, conditions_subset=[condition]) <= q_i]
    
    while dc_i_star:
        c_best = dc_i_star[np.array([POS(i=i, conditions_subset=dc_i + [condition]) 
                           for condition in dc_i_star]).argmax()]
        dc_i += [c_best]
        dc_i_star = [condition for condition in conditions 
                     if condition not in dc_i and NEG(i=i, conditions_subset=dc_i + [condition]) <= q_i]
    
    return dc_i

In [67]:
coarse_grain_classes

['Tank',
 'BMP',
 'BTR',
 'Self Propelled Artillery',
 'Air Defense',
 'MT_LB',
 'BMD']

In [71]:
all(len(GreedyNegativeConditionsSelect(i=cls, epsilon=100, conditions=conditions)) == len(conditions) for cls in coarse_grain_classes[0:1])

True