# Import

In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import random
from tqdm import tqdm

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import confusion_matrix, accuracy_score, precision_recall_fscore_support


# Utils

In [2]:
def calculate_metrics(TN, FP, FN, TP, f_beta=1):
    Acc = (TP+TN)/(TN+FP+FN+TP)
    P = TP/(TP+FP)
    R = TP/(TP+FN)
    F = (1+pow(f_beta,2))*(P*R)/((pow(f_beta,2))*P+R)
    TNR = TN/(TN+FN)
    return Acc, P, R, F, TNR

def train_test_split_idxs(correct_col, test_ratio=0.25, balanced=True):
    label_0 = list(np.argwhere(correct_col==0)[:,0])
    label_1 = list(np.argwhere(correct_col==1)[:,0])

    if balanced:
        sample_size = min(len(label_0), len(label_1))
        label_0 = random.sample(label_0, sample_size)
        label_1 = random.sample(label_1, sample_size)
    
    test_idxs_0 = random.sample(label_0, int(test_ratio*len(label_0)))
    test_idxs_1 = random.sample(label_1, int(test_ratio*len(label_1)))
    train_idxs_0 = list(set(label_0)-set(test_idxs_0))
    train_idxs_1 = list(set(label_1)-set(test_idxs_1))

    test_idxs = test_idxs_0 + test_idxs_1
    train_idxs = train_idxs_0 + train_idxs_1

    random.shuffle(test_idxs)
    random.shuffle(train_idxs)

    # print(f"tr_0: {len(train_idxs_0)}", end=" ")
    # print(f"tr_1: {len(train_idxs_1)}", end=" ")
    # print(f"ts_0: {len(test_idxs_0)}", end=" ")
    # print(f"ts_1: {len(test_idxs_1)}", end=" ")
    # print()

    # train_idxs, test_idxs = train_test_split(labels_idxs, test_size=int(test_ratio*len(labels_idxs)))
    # test_idxs = random.sample(labels_idxs, int(test_ratio*len(labels_idxs)))
    # train_idxs = list(set(labels_idxs)-set(test_idxs))

    return train_idxs, test_idxs

# Define variables

In [3]:
model_names = ["Vilt", "Blip_large", "GiT_base", "GiT_large"] # skip , "Blip_base"
model_type = "VQA"
D_type = "1"

full_df_columns = ["feature", "model_name", "classifier", "TP", "TN", "FP", "FN", "Acc", "P", "R", "F", "TNR"]

numeric_cols = ["used_language", "specificity", "question_length", "complexity", "image_relatedness", "image_difficulty", "difficulty", \
                "no_tokens", "correct", "N", "Prob", "P_T_1", 'P_T_2_N']
numeric_cols_dtype = dict()
for c in numeric_cols: numeric_cols_dtype[c]=float
numeric_cols_dtype

{'used_language': float,
 'specificity': float,
 'question_length': float,
 'complexity': float,
 'image_relatedness': float,
 'image_difficulty': float,
 'difficulty': float,
 'no_tokens': float,
 'correct': float,
 'N': float,
 'Prob': float,
 'P_T_1': float,
 'P_T_2_N': float}

# Read full results df

In [4]:
full_results_df = pd.read_excel(f"./{model_type}_full_results_D_type_{D_type}_manual.xlsx", sheet_name=f"{model_type}_D_type_{D_type}_results",
                                dtype=numeric_cols_dtype)

full_results_df.head()

Unnamed: 0,ID,image_name,example_question,question,answer_gt,used_language,specificity,question_length,complexity,image_relatedness,...,P_T_2_N,x_max_str,x_min_str,Prob_str,T_1_max_str,T_1_str,P_T_1_str,T_2_max_N_str,T_2_N_str,P_T_2_N_str
0,0,Gaussian_5_2,what is in the image?,what is in the image?,random+noise+nothing+t know+not sure+unknown+c...,0.0,1.0,0.0,0.0,0.0,...,0.035396,-2.3610375,-16.713715,0.07764137,1.7832804974941396,1.3263668739558916,0.033901606,1.738079,1.307533,0.035396315
1,1,Gaussian_5_2,what is the dominant color of the image?,what is the dominant color of the image?,color+gray+grey+nothing+t know+not sure+unknow...,0.0,1.0,0.0,0.0,0.0,...,0.16671,-0.49326575,-27.210875,0.20900321,3.319589136322892,1.966495473467872,0.08871711,1.738079,1.307533,0.16670989
2,2,Gaussian_5_2,what does the image represent?,what does the image represent?,random+noise+nothing+t know+not sure+unknown+c...,0.0,1.0,0.0,0.0,0.0,...,0.039332,-2.180196,-17.993324,0.08220834,1.964737514053651,1.4019739641890212,0.031976696,1.738079,1.307533,0.03933237
3,3,Gaussian_5_2,why is the image random?,why is the image random?,random+t know+not sure+unknown+can't tell+none...,0.0,1.0,0.0,0.0,0.0,...,0.051415,-2.5192337,-19.845095,0.12248334,2.152690347564782,1.4802876448186593,0.034029774,1.738079,1.307533,0.05141516
4,4,Gaussian_5_2,why aren't there any objects in the image?,why aren't there any objects in the image?,random+noise+t know+not sure+unknown+can't tel...,0.0,1.0,0.0,0.0,0.0,...,0.029139,-3.1362438,-18.810205,0.06400901,1.9474464197595112,1.394769341566463,0.024084808,1.738079,1.307533,0.02913933


# Add valid column

In [5]:
not_nan = np.array(~(full_results_df['clean_answer']).isna())
not_qst_mark = np.array(~(full_results_df['clean_answer'].str.contains('?', na=True, regex=False)))

valid = np.where(not_nan & not_qst_mark, 1, 0)

data = full_results_df.copy()
data["valid"] = valid

len(data)

14276

In [6]:
data["no_tokens"] = data["no_tokens"]/np.max(list(data["no_tokens"]))

# Train Test indices

In [7]:
models_idxs = dict()
balanced = True

## Full set

In [8]:
correct_col = np.array(data.loc[(data["image_difficulty"]!=1)]["correct"]).astype(np.int32) # no random image
train_idxs, test_idxs = train_test_split_idxs(correct_col, test_ratio=0.25, balanced=balanced)

models_idxs["all"] = [train_idxs, test_idxs]

## Models

In [9]:
for model_name in model_names:
    print(model_name)
    correct_col = np.array(data.loc[(data["image_difficulty"]!=1) & (data["model_name"]==model_name)]["correct"]).astype(np.int32) # no random image
    train_idxs, test_idxs = train_test_split_idxs(correct_col, test_ratio=0.25, balanced=balanced)
    models_idxs[model_name] = [train_idxs, test_idxs]

Vilt
Blip_large
GiT_base
GiT_large


# Coefs

In [10]:
features = ["specificity", "question_length", "complexity", "image_relatedness", "image_difficulty"] # no used_language

models_paramaters_dict = dict()

for model_name in model_names+["all"]:

    temp = data.copy()
    temp = data.loc[(data["image_difficulty"]!=1)]


    if model_name!="all": temp = temp.loc[(temp["model_name"]==model_name)]

    train_idxs, test_idxs = models_idxs[model_name]
    X_train, X_test = np.array(temp[features])[train_idxs], np.array(temp[features])[test_idxs]
    y_train, y_test = np.array(temp["correct"])[train_idxs], np.array(temp["correct"])[test_idxs]

    # print(model_name)
    # print(np.bincount(np.array(y_train, dtype="int")))

    model = LogisticRegression()
    model.fit(X_train, y_train)
    predictions = model.predict(X_test)

    models_paramaters_dict[model_name] = {
        "results": {
            "gt":           y_test,
            "preds":        predictions
        },
        "parameters": {
            "coef":         model.coef_[0],
            "intercept":    model.intercept_
        }

    }

In [11]:
all_rows = list()
for model_name in model_names+["all"]:
    one_row = list()
    results = models_paramaters_dict[model_name]["results"]
    parameters = models_paramaters_dict[model_name]["parameters"]
    TN, FP, FN, TP = confusion_matrix(results["gt"], results["preds"]).ravel()
    Acc, P, R, F, TNR = calculate_metrics(TN, FP, FN, TP, f_beta=0.5)
    coef = list(parameters["coef"])
    intercept = list(parameters["intercept"])
    one_row = [model_name] + coef + intercept
    one_row =  one_row + [TP, TN, FP, FN, Acc, P, R, F, TNR]
    all_rows.append(one_row)

In [12]:
coef_num_cols = features + ["intercept"] + ["TP", "TN", "FP", "FN", "Acc", "P", "R", "F", "TNR"]
coef_df_columns = ["model_name"] + coef_num_cols
coef_df = pd.DataFrame(np.array(all_rows), columns=coef_df_columns)
coef_df.head()

Unnamed: 0,model_name,specificity,question_length,complexity,image_relatedness,image_difficulty,intercept,TP,TN,FP,FN,Acc,P,R,F,TNR
0,Vilt,0.2591480391345407,-2.473888216533878,-0.8553908833251268,-1.834578774741996,-0.601714256606848,2.229501351551755,141,130,60,49,0.7131578947368421,0.7014925373134329,0.7421052631578947,0.7092555331991951,0.7262569832402235
1,Blip_large,-0.0288646829120862,-1.8951673063287335,-1.033298904219652,-2.70609305056019,-0.0545728508001766,2.412095070864561,172,161,67,56,0.7302631578947368,0.7196652719665272,0.7543859649122807,0.7263513513513513,0.7419354838709677
2,GiT_base,0.3355182910640228,-0.9038537473529332,-0.8603145160052362,-1.6816142022160034,-0.8135396210531313,1.6135126690069268,155,163,70,78,0.6824034334763949,0.6888888888888889,0.6652360515021459,0.6840247131509268,0.6763485477178424
3,GiT_large,0.7183601626374071,-2.386739541120375,-0.8238831514859687,-1.9739582219891365,-0.664248712153082,1.9720263759233008,126,139,39,52,0.7443820224719101,0.7636363636363637,0.7078651685393258,0.7517899761336514,0.7277486910994765
4,all,0.3244119578672224,-1.7950401673900909,-0.7756130920431122,-2.0792093355376253,-0.2901772888386124,1.8974973402742037,561,616,215,270,0.7081829121540313,0.7229381443298969,0.6750902527075813,0.7128335451080051,0.6952595936794582


In [13]:
coef_df[coef_num_cols] = coef_df[coef_num_cols].apply(pd.to_numeric)

In [14]:
# with pd.ExcelWriter(f"./{model_type}_classifier_coefs_manual.xlsx", engine="openpyxl", mode="w") as writer:
#     coef_df.to_excel(writer, sheet_name=f"{model_type}_classifier_coefs", index_label='ID')