# Import

In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import random
from tqdm import tqdm

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import confusion_matrix, accuracy_score, precision_recall_fscore_support


# Utils

In [2]:
def calculate_metrics(TN, FP, FN, TP, f_beta=1):
    Acc = (TP+TN)/(TN+FP+FN+TP)
    P = TP/(TP+FP)
    R = TP/(TP+FN)
    F = (1+pow(f_beta,2))*(P*R)/((pow(f_beta,2))*P+R)
    TNR = TN/(TN+FN)
    return Acc, P, R, F, TNR

def train_test_split_idxs(correct_col, test_ratio=0.25, balanced=True):
    label_0 = list(np.argwhere(correct_col==0)[:,0])
    label_1 = list(np.argwhere(correct_col==1)[:,0])

    if balanced:
        sample_size = min(len(label_0), len(label_1))
        label_0 = random.sample(label_0, sample_size)
        label_1 = random.sample(label_1, sample_size)
    
    test_idxs_0 = random.sample(label_0, int(test_ratio*len(label_0)))
    test_idxs_1 = random.sample(label_1, int(test_ratio*len(label_1)))
    train_idxs_0 = list(set(label_0)-set(test_idxs_0))
    train_idxs_1 = list(set(label_1)-set(test_idxs_1))

    test_idxs = test_idxs_0 + test_idxs_1
    train_idxs = train_idxs_0 + train_idxs_1

    random.shuffle(test_idxs)
    random.shuffle(train_idxs)

    # print(f"tr_0: {len(train_idxs_0)}", end=" ")
    # print(f"tr_1: {len(train_idxs_1)}", end=" ")
    # print(f"ts_0: {len(test_idxs_0)}", end=" ")
    # print(f"ts_1: {len(test_idxs_1)}", end=" ")
    # print()

    # train_idxs, test_idxs = train_test_split(labels_idxs, test_size=int(test_ratio*len(labels_idxs)))
    # test_idxs = random.sample(labels_idxs, int(test_ratio*len(labels_idxs)))
    # train_idxs = list(set(labels_idxs)-set(test_idxs))

    return train_idxs, test_idxs

# Define variables

In [3]:
model_names = ["Vilt", "Blip_large", "GiT_base", "GiT_large"] # skip , "Blip_base"
model_type = "VQA"
D_type = "1"

full_df_columns = ["feature", "model_name", "classifier", "TP", "TN", "FP", "FN", "Acc", "P", "R", "F", "TNR"]

numeric_cols = ["used_language", "specificity", "question_length", "complexity", "image_relatedness", "image_difficulty", "difficulty", \
                "no_tokens", "correct", "N", "Prob", "P_T_1", 'P_T_2_N']
numeric_cols_dtype = dict()
for c in numeric_cols: numeric_cols_dtype[c]=float
numeric_cols_dtype

{'used_language': float,
 'specificity': float,
 'question_length': float,
 'complexity': float,
 'image_relatedness': float,
 'image_difficulty': float,
 'difficulty': float,
 'no_tokens': float,
 'correct': float,
 'N': float,
 'Prob': float,
 'P_T_1': float,
 'P_T_2_N': float}

# Read full results df

In [4]:
full_results_df = pd.read_excel(f"./{model_type}_full_results_D_type_{D_type}_automatic.xlsx", sheet_name=f"{model_type}_D_type_{D_type}_results",
                                dtype=numeric_cols_dtype)

full_results_df.head()

Unnamed: 0,ID,image_name,example_question,question,answer_gt,used_language,specificity,question_length,complexity,image_relatedness,...,P_T_2_N,x_max_str,x_min_str,Prob_str,T_1_max_str,T_1_str,P_T_1_str,T_2_max_N_str,T_2_N_str,P_T_2_N_str
0,0,Gaussian_5_2,what is in the image?,what is in the image?,random+noise+nothing+t know+not sure+unknown+c...,0.0,1.0,0.035714,0.222222,0.716642,...,0.023459,-2.3610375,-16.713715,0.07764137,1.7832804974941396,1.5291325316875397,0.022081973,1.738079,1.498597,0.023458984
1,1,Gaussian_5_2,what is the dominant color of the image?,what is the dominant color of the image?,color+gray+grey+nothing+t know+not sure+unknow...,0.0,1.0,0.142857,0.333333,0.722859,...,0.14145,-0.49326575,-27.210875,0.20900321,3.319589136322892,2.6139557853339146,0.039211985,1.738079,1.513551,0.14144981
2,2,Gaussian_5_2,what does the image represent?,what does the image represent?,random+noise+nothing+t know+not sure+unknown+c...,0.0,1.0,0.035714,0.111111,0.72258,...,0.026472,-2.180196,-17.993324,0.08220834,1.964737514053651,1.64468944859392,0.019827817,1.738079,1.493224,0.02647247
3,3,Gaussian_5_2,why is the image random?,why is the image random?,random+t know+not sure+unknown+can't tell+none...,0.0,1.0,0.035714,0.111111,0.72277,...,0.033062,-2.5192337,-19.845095,0.12248334,2.152690347564782,1.7703628149102884,0.019071277,1.738079,1.493271,0.03306224
4,4,Gaussian_5_2,why aren't there any objects in the image?,why aren't there any objects in the image?,random+noise+t know+not sure+unknown+can't tel...,0.0,1.0,0.142857,0.333333,0.720126,...,0.018999,-3.1362438,-18.810205,0.06400901,1.9474464197595112,1.658364383344993,0.014612811,1.738079,1.512878,0.018999284


# Add valid column

In [5]:
not_nan = np.array(~(full_results_df['clean_answer']).isna())
not_qst_mark = np.array(~(full_results_df['clean_answer'].str.contains('?', na=True, regex=False)))

valid = np.where(not_nan & not_qst_mark, 1, 0)

data = full_results_df.copy()
data["valid"] = valid

len(data)

14276

In [6]:
data["no_tokens"] = data["no_tokens"]/np.max(list(data["no_tokens"]))

# Train Test indices

In [7]:
models_idxs = dict()
balanced = True

## Full set

In [8]:
correct_col = np.array(data.loc[(data["image_difficulty"]!=1)]["correct"]).astype(np.int32) # no random image
train_idxs, test_idxs = train_test_split_idxs(correct_col, test_ratio=0.25, balanced=balanced)

models_idxs["all"] = [train_idxs, test_idxs]

## Models

In [9]:
for model_name in model_names:
    print(model_name)
    correct_col = np.array(data.loc[(data["image_difficulty"]!=1) & (data["model_name"]==model_name)]["correct"]).astype(np.int32) # no random image
    train_idxs, test_idxs = train_test_split_idxs(correct_col, test_ratio=0.25, balanced=balanced)
    models_idxs[model_name] = [train_idxs, test_idxs]

Vilt
Blip_large
GiT_base
GiT_large


# Coefs

In [10]:
features = ["specificity", "question_length", "complexity", "image_relatedness", "image_difficulty"] # no used_language

models_paramaters_dict = dict()

for model_name in model_names+["all"]:

    temp = data.copy()
    temp = data.loc[(data["image_difficulty"]!=1)]


    if model_name!="all": temp = temp.loc[(temp["model_name"]==model_name)]

    train_idxs, test_idxs = models_idxs[model_name]
    X_train, X_test = np.array(temp[features])[train_idxs], np.array(temp[features])[test_idxs]
    y_train, y_test = np.array(temp["correct"])[train_idxs], np.array(temp["correct"])[test_idxs]

    # print(model_name)
    # print(np.bincount(np.array(y_train, dtype="int")))

    model = LogisticRegression()
    model.fit(X_train, y_train)
    predictions = model.predict(X_test)

    models_paramaters_dict[model_name] = {
        "results": {
            "gt":           y_test,
            "preds":        predictions
        },
        "parameters": {
            "coef":         model.coef_[0],
            "intercept":    model.intercept_
        }

    }

In [11]:
all_rows = list()
for model_name in model_names+["all"]:
    one_row = list()
    results = models_paramaters_dict[model_name]["results"]
    parameters = models_paramaters_dict[model_name]["parameters"]
    TN, FP, FN, TP = confusion_matrix(results["gt"], results["preds"]).ravel()
    Acc, P, R, F, TNR = calculate_metrics(TN, FP, FN, TP, f_beta=0.5)
    coef = list(parameters["coef"])
    intercept = list(parameters["intercept"])
    one_row = [model_name] + coef + intercept
    one_row =  one_row + [TP, TN, FP, FN, Acc, P, R, F, TNR]
    all_rows.append(one_row)

In [12]:
coef_num_cols = features + ["intercept"] + ["TP", "TN", "FP", "FN", "Acc", "P", "R", "F", "TNR"]
coef_df_columns = ["model_name"] + coef_num_cols
coef_df = pd.DataFrame(np.array(all_rows), columns=coef_df_columns)
coef_df.head()

Unnamed: 0,model_name,specificity,question_length,complexity,image_relatedness,image_difficulty,intercept,TP,TN,FP,FN,Acc,P,R,F,TNR
0,Vilt,0.6989118591845445,-2.889514285018086,-0.0625143900993664,-0.750546471732854,-0.9221139992569178,1.9121532194764632,125,133,57,65,0.6789473684210526,0.6868131868131868,0.6578947368421053,0.6808278867102396,0.6717171717171717
1,Blip_large,0.5264485547390545,-2.6507542924319645,1.1859989583913169,-0.8236331922564556,-0.8583383651438472,1.4849900627208283,132,145,84,97,0.6048034934497817,0.6111111111111112,0.5764192139737991,0.6038426349496798,0.5991735537190083
2,GiT_base,0.7539406686071279,-2.2524242841377937,1.5742566601818928,0.0860757120992249,-0.3852898862941087,0.0260714382275042,146,148,85,87,0.630901287553648,0.6320346320346321,0.6266094420600858,0.6309420916162489,0.6297872340425532
3,GiT_large,1.710202128404946,-3.443050025157401,0.1003222080271101,-0.7041235444920559,0.1685245863185202,0.5994809246329742,131,116,63,48,0.6899441340782123,0.6752577319587629,0.7318435754189944,0.6858638743455499,0.7073170731707317
4,all,0.6394173938932252,-3.1616470209721723,1.578757333108254,-1.6681588600075352,-1.1128423675936097,2.2457155976891885,515,561,272,318,0.6458583433373349,0.6543837357052097,0.6182472989195679,0.6468224064305451,0.6382252559726962


In [13]:
coef_df[coef_num_cols] = coef_df[coef_num_cols].apply(pd.to_numeric)

In [14]:
# with pd.ExcelWriter(f"./{model_type}_classifier_coefs_automatic.xlsx", engine="openpyxl", mode="w") as writer:
#     coef_df.to_excel(writer, sheet_name=f"{model_type}_classifier_coefs", index_label='ID')