# Import

In [1]:
import numpy as np
import pandas as pd
import os
from tqdm import tqdm

# Define variables

In [2]:
model_names = ["Vilt", "Blip_large", "GiT_base", "GiT_large"] # skip , "Blip_base"

# image_names = ["Gaussian_5_2", "Gaussian_5_3", "Gaussian_5_4", "Gaussian_5_5",
#                "white_house_brown_cat", "white_house_brown_cow", "white_house_brown_dog", "white_house_brown_horse",
#                "white_house_green_cat", "white_house_green_unknown",
#                "white_house_pink_cow", "white_house_pink_unknown",
#                "white_house_purple_dog", "white_house_purple_horse",
#                "white_house_red_duck", "white_house_red_unknown",
#                "white_house_yellow_unknown"]

image_difficulty = {"Gaussian_5_2":1, "Gaussian_5_3":1, "Gaussian_5_4":1, "Gaussian_5_5":1,
                    "white_house_brown_cat":0, "white_house_brown_cow":0, "white_house_brown_dog":0, "white_house_brown_horse":0,
                    "white_house_green_cat":0.5, "white_house_green_unknown":0.5,
                    "white_house_pink_cow":0.5, "white_house_pink_unknown":0.5,
                    "white_house_purple_dog":0.5, "white_house_purple_horse":0.5,
                    "white_house_red_duck":0.5, "white_house_red_unknown":0.5,
                    "white_house_yellow_unknown":0.5}
image_names = list(image_difficulty.keys())

model_type = "VQA"

criteria = ["image_type", "used_language", "specificity", "question_length", "complexity", "image_relatedness"]
major_criteria = ["image_difficulty", "used_language", "image_relatedness"]
fixed_cols = ["question", "answer_gt"] + criteria

In [3]:
fixed_cols

['question',
 'answer_gt',
 'image_type',
 'used_language',
 'specificity',
 'question_length',
 'complexity',
 'image_relatedness']

# Define functions

In [4]:
def T_1(logits):
  N = len(logits)
  x_max, x_min = np.max(logits), np.min(logits)
  return (x_max-x_min)/np.log(N)

def T_2(logits_list, epsilon):
  t_min_list = list()
  for logits in logits_list:
    t_min = list()
    for logit in logits:
      x_max, x_min = np.max(logit), np.min(logit)
      t_temp = (x_max-x_min)/np.log(1+epsilon)
      t_min.append(t_temp)
    t_min_list.append(np.min(t_min)) 
  return np.min(t_min_list)

def softmax_stable(x, T=1):
  return (np.exp((x - np.max(x))/T) / np.exp((x - np.max(x))/T).sum())

def D_to_T(D=0, D_min=0, D_max=1, T_min=1, T_max=1000):
  return ((D-D_min)*(T_max-T_min)/(D_max-D_min))+T_min

def Diff_score(question_scores=dict(), image_scores=dict(), relatedness_scores=dict(), major=list(), type_=1):
  D_all, D_i, D_t, D_r = 0, 0, 0, 0
  Ds = list()

  D_i = criteria_score(image_scores, major)
  Ds.append(D_i)
  if question_scores and relatedness_scores:
    D_t = criteria_score(question_scores, major)
    D_r = criteria_score(relatedness_scores, major)
    Ds.extend([D_t, D_r])
  if (type_==2) and (np.max(Ds)==1):
    D_all = 1
  elif type_==1:
    D_all = np.mean(Ds)
  return D_all

def criteria_score(criteria_dict=dict(), major=list()):
  sum = 0
  for item, score in criteria_dict.items():
    if (item in major) and (score==1): return 1
    sum += score
  return sum/(len(criteria_dict))

# Create logits file

In [7]:
# logits_dict = dict()

# for model_name in tqdm(model_names):
#   model_dict = dict()
#   for image_name in image_names:
#     results_path = f"./{model_name}/"
#     file_name = f"{model_type}_{model_name}_{image_name}"
#     results_path = results_path + file_name + ".npy"
#     if not os.path.isfile(results_path): f"!!! Warning: No file !!!\n{file_name} file is not available!"
#     l = np.load(results_path, allow_pickle=True)
#     model_dict[image_name] = l.copy()
#   logits_dict[model_name] = model_dict.copy()

In [8]:
# np_logits_file_path = f"./{model_type}_test_logits.npy"
# np.save(np_logits_file_path, logits_dict)

# Read logits file

In [9]:
np_logits_file_path = f"./{model_type}_test_logits.npy"
logits_dict = np.load(np_logits_file_path, allow_pickle=True).item()
logits_dict.keys()

dict_keys(['Vilt', 'Blip_base', 'Blip_large', 'GiT_base', 'GiT_large'])

In [10]:
N_dict = dict()
for m in model_names:
    N_dict[m] = len(logits_dict[m]["Gaussian_5_2"][0][0])
del m

print(N_dict)

{'Vilt': 3129, 'Blip_large': 30524, 'GiT_base': 30522, 'GiT_large': 30522}


# Create results df and file

In [31]:
# model_type = "VQA"

# all_results_df = pd.DataFrame()

# for image_name in tqdm(image_names):
#   image_df = pd.DataFrame()
#   new_image = True
  
#   for model_name in model_names:
#     results_path = f".//{model_name}/"
#     file_name = f"{model_type}_{model_name}_{image_name}"
#     results_path = results_path + file_name + ".xlsx"
#     if not os.path.isfile(results_path): f"!!! Warning: No file !!!\n{file_name} sheet is not available!"
#     usecols="B:M" if new_image else "J:M"
#     df = pd.read_excel(results_path, sheet_name='answers', usecols=usecols)
#     for c in df.columns:
#       # if ("question" in c) or ("gt" in c): new_col_name = c
#       if c in set(fixed_cols): new_col_name = c
#       else: new_col_name = f"{model_name}_{c}"
#       image_df[new_col_name] = df[c]
#     new_image = False
#     # columns = [f"{model_name}_{c}" for c in df.columns]
#     # df.columns = columns.copy()
#     # image_df.concat([image_df, df], axis=1)
#   image_name_col = [image_name for _ in range(len(image_df))]
#   image_df.insert(loc=0, column='image_name', value=image_name_col)
#   all_results_df = pd.concat([all_results_df, image_df], ignore_index = True)

# all_results_df.to_excel(f"./{model_type}_test_answers.xlsx", sheet_name=f"{model_type}_answers")

100%|██████████| 17/17 [00:02<00:00,  5.98it/s]


In [11]:
all_results_df = pd.read_excel(f"./all_excels/{model_type}_test_answers.xlsx", sheet_name=f"{model_type}_answers")
print(len(all_results_df))
all_results_df.head()

3569


Unnamed: 0.1,Unnamed: 0,image_name,image_type,used_language,specificity,question_length,complexity,image_relatedness,question,answer_gt,...,Blip_large_correct,Blip_large_no_tokens,GiT_base_full_answer,GiT_base_clean_answer,GiT_base_correct,GiT_base_no_tokens,GiT_large_full_answer,GiT_large_clean_answer,GiT_large_correct,GiT_large_no_tokens
0,0,Gaussian_5_2,1,0,1,0.0,0.0,0.0,what is in the image?,random+noise+nothing+t know+not sure+unknown+c...,...,0,2,[CLS] wallpaper [SEP],wallpaper,0,3,[CLS] nothing [SEP],nothing,1,2
1,1,Gaussian_5_2,1,0,1,0.0,0.0,0.0,what is the dominant color of the image?,color+gray+grey+nothing+t know+not sure+unknow...,...,1,2,[CLS] gray [SEP],gray,1,2,[CLS] gray [SEP],gray,1,2
2,2,Gaussian_5_2,1,0,1,0.0,0.0,0.0,what does the image represent?,random+noise+nothing+t know+not sure+unknown+c...,...,0,2,[CLS] wallpaper [SEP],wallpaper,0,3,[CLS] no [SEP],no,0,2
3,3,Gaussian_5_2,1,0,1,0.0,0.0,0.0,why is the image random?,random+t know+not sure+unknown+can't tell+none...,...,0,5,[CLS] it's not [SEP],it's not,0,5,[CLS] it's not [SEP],it's not,0,5
4,4,Gaussian_5_2,1,0,1,0.0,0.0,0.0,why aren't there any objects in the image?,random+noise+t know+not sure+unknown+can't tel...,...,0,3,[CLS] they are in this picture [SEP],they are in this picture,0,6,[CLS] they are not [SEP],they are not,0,4


# Create T_max_2 dict

In [12]:
# # T_max_2 per model, do them outside the loop
# epsilons = [i/100 for i in range(1, 201)]
# T_max_2_dict = dict()
# for model_name in tqdm(model_names):
#     T_max_2_dict[model_name] = list()
#     T_max_2 = list()
#     model_epsilons = epsilons.copy()
#     model_epsilons.append(N_dict[model_name])
#     for epsilon in model_epsilons:
#         for image_name in image_names:
#             logits_list = logits_dict[model_name][image_name]
#             T_max_2.append(T_2(logits_list, epsilon))
#         T_max_2_dict[model_name].append(np.min(T_max_2))

# np.save("./T_max_2_dict.npy", T_max_2_dict)

# Read T_max_2 dict

In [13]:
epsilons = [i/100 for i in range(1, 201)]
epsilons.append("N")
###########################
epsilons = epsilons[-1] # only epsilon=N
###########################

T_max_2_dict = np.load("./T_max_2_dict.npy", allow_pickle=True).item()
T_max_2_dict.keys()

dict_keys(['Vilt', 'Blip_base', 'Blip_large', 'GiT_base', 'GiT_large'])

In [14]:
for v in T_max_2_dict.values():
    print(np.min(v))

1.7380787111952216
1.2383381355547283
1.2383381355547283
1.1705838358703073
1.2085129044555991


# Create extended df

In [15]:
full_df_columns = ["image_name"] + fixed_cols
all_results_df_sub = all_results_df[full_df_columns]
all_results_df_sub

Unnamed: 0,image_name,question,answer_gt,image_type,used_language,specificity,question_length,complexity,image_relatedness
0,Gaussian_5_2,what is in the image?,random+noise+nothing+t know+not sure+unknown+c...,1,0,1,0.0,0.0,0.0
1,Gaussian_5_2,what is the dominant color of the image?,color+gray+grey+nothing+t know+not sure+unknow...,1,0,1,0.0,0.0,0.0
2,Gaussian_5_2,what does the image represent?,random+noise+nothing+t know+not sure+unknown+c...,1,0,1,0.0,0.0,0.0
3,Gaussian_5_2,why is the image random?,random+t know+not sure+unknown+can't tell+none...,1,0,1,0.0,0.0,0.0
4,Gaussian_5_2,why aren't there any objects in the image?,random+noise+t know+not sure+unknown+can't tel...,1,0,1,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...
3564,white_house_yellow_unknown,"i generated this image with an online tool, i ...",no dragon+no talk+t know+not sure+unknown+can'...,0,0,1,1.0,1.0,1.0
3565,white_house_yellow_unknown,"i generated this image with an online tool, i ...",no books+t know+not sure+unknown+can't tell+no...,0,0,1,1.0,1.0,1.0
3566,white_house_yellow_unknown,mi van a kepen?,t know+not sure+unknown+can't tell+none+no ide...,0,1,0,0.0,0.0,0.0
3567,white_house_yellow_unknown,"tekintettel arra, hogy ilyen képet kap, kérem,...",t know+not sure+unknown+can't tell+none+no ide...,0,1,0,0.5,0.0,0.0


In [16]:
df = all_results_df_sub.loc[(all_results_df_sub['image_name']=="Gaussian_5_2")]
for idx, row in df.iterrows():
    print(f"{idx} > {row.tolist()}")

0 > ['Gaussian_5_2', 'what is in the image?', "random+noise+nothing+t know+not sure+unknown+can't tell+none+no idea+can't say", 1, 0, 1, 0.0, 0.0, 0.0]
1 > ['Gaussian_5_2', 'what is the dominant color of the image?', "color+gray+grey+nothing+t know+not sure+unknown+can't tell+none+no idea+can't say", 1, 0, 1, 0.0, 0.0, 0.0]
2 > ['Gaussian_5_2', 'what does the image represent?', "random+noise+nothing+t know+not sure+unknown+can't tell+none+no idea+can't say", 1, 0, 1, 0.0, 0.0, 0.0]
3 > ['Gaussian_5_2', 'why is the image random?', "random+t know+not sure+unknown+can't tell+none+no idea+can't say", 1, 0, 1, 0.0, 0.0, 0.0]
4 > ['Gaussian_5_2', "why aren't there any objects in the image?", "random+noise+t know+not sure+unknown+can't tell+none+no idea+can't say", 1, 0, 1, 0.0, 0.0, 0.0]


In [17]:
all_results_df_sub.loc[(all_results_df_sub['image_name']=="Gaussian_5_2")]

Unnamed: 0,image_name,question,answer_gt,image_type,used_language,specificity,question_length,complexity,image_relatedness
0,Gaussian_5_2,what is in the image?,random+noise+nothing+t know+not sure+unknown+c...,1,0,1,0.0,0.0,0.0
1,Gaussian_5_2,what is the dominant color of the image?,color+gray+grey+nothing+t know+not sure+unknow...,1,0,1,0.0,0.0,0.0
2,Gaussian_5_2,what does the image represent?,random+noise+nothing+t know+not sure+unknown+c...,1,0,1,0.0,0.0,0.0
3,Gaussian_5_2,why is the image random?,random+t know+not sure+unknown+can't tell+none...,1,0,1,0.0,0.0,0.0
4,Gaussian_5_2,why aren't there any objects in the image?,random+noise+t know+not sure+unknown+can't tel...,1,0,1,0.0,0.0,0.0


In [18]:
str_cols = ["x_max_str", "x_min_str", "Prob_str", "T_1_max_str", "T_1_str", "P_T_1_str"]
num_cols = ["Prob", "P_T_1"]
str_cols_epsilon, num_cols_epsilon = list(), list()
for epsilon in epsilons:
    str_cols_epsilon.append(f"T_2_max_{epsilon}_str")
    str_cols_epsilon.append(f"T_2_{epsilon}_str")
    str_cols_epsilon.append(f"P_T_2_{epsilon}_str")
    num_cols_epsilon.append(f"P_T_2_{epsilon}")
len(str_cols_epsilon), len(num_cols_epsilon)

(3, 1)

In [19]:
num_cols_epsilon

['P_T_2_N']

In [20]:
fixed_cols

['question',
 'answer_gt',
 'image_type',
 'used_language',
 'specificity',
 'question_length',
 'complexity',
 'image_relatedness']

In [21]:
# | image_name | example_question | question | answer_gt | used_language | specificity | question_length | complexity | image_relatedness | image_difficulty |
full_df_columns = ["image_name", "example_question", "question", "answer_gt", "used_language", "specificity", \
                   "question_length", "complexity", "image_relatedness", "image_difficulty", "difficulty"]
# | model_name | full_answer | clean_answer | no_tokens | correct |
full_df_columns = full_df_columns + ["model_name", "full_answer", "clean_answer", "no_tokens", "correct", "N"]
# | Prob | P_T_1 | P_T_2_0.01 | P_T_2_0.02 | ...
full_df_columns = full_df_columns + num_cols + num_cols_epsilon
# | x_max_str | x_min_str | Prob_str | T_1_max_str | T_1_str | P_T_1_str | T_2_max_0.01_str | T_2_0.01_str | P_T_2_0.01_str | ...
full_df_columns = full_df_columns + str_cols + str_cols_epsilon
print(len(full_df_columns))
full_df_columns

29


['image_name',
 'example_question',
 'question',
 'answer_gt',
 'used_language',
 'specificity',
 'question_length',
 'complexity',
 'image_relatedness',
 'image_difficulty',
 'difficulty',
 'model_name',
 'full_answer',
 'clean_answer',
 'no_tokens',
 'correct',
 'N',
 'Prob',
 'P_T_1',
 'P_T_2_N',
 'x_max_str',
 'x_min_str',
 'Prob_str',
 'T_1_max_str',
 'T_1_str',
 'P_T_1_str',
 'T_2_max_N_str',
 'T_2_N_str',
 'P_T_2_N_str']

# Create full results df and file

In [22]:
original_qsts = pd.read_excel("./qsts_original.xlsx", sheet_name="qsts_original")
original_qsts.head()

Unnamed: 0,image_type,used_language,specificity,question_length,complexity,image_relatedness,question,answer_gt
0,0,0,0,0.0,0.0,0.0,what is the color1 object?,object1
1,0,0,0,0.0,0.0,0.0,what is in the image with the object1?,object2+sky
2,0,0,0,0.0,0.0,0.0,what is in the image with the object2?,object1+sky
3,0,0,0,0.0,0.0,0.0,what is the object1's color in the image?,color1
4,0,0,0,0.0,0.0,0.0,what is the object2's color in the image?,color2


In [23]:
D_type = 1
all_rows = list()
for image_name in tqdm(image_names):
    df = all_results_df_sub.loc[(all_results_df_sub['image_name']==image_name)]
    image_type = 0
    image_diff = image_difficulty[image_name]
    if "Gaussian" in image_name: image_type=1
    for model_name in model_names:
        # one_row.append()
        logits_list = logits_dict[model_name][image_name]
        # print(f"len logits_list: ", len(logits_list), " len df: ", len(df))
        for idx in range(len(df)):
            one_row = list()
            row = df.iloc[idx].values.tolist()
            # print(f"{idx} > {row.tolist()}")
            example_question = original_qsts.loc[(original_qsts['image_type']==image_type)]["question"].values[idx]
            question, answer_gt = row[1:3]
            used_lang, specificity, question_len, complexity, image_relatedness = row[4:] # skip image_type; replaced by image_difficulty
            question_scores = {"used_language":used_lang, "specificity":specificity, "question_length":question_len, "complexity":complexity}
            image_scores = {"image_difficulty":image_diff}
            relatedness_scores = {"image_relatedness":image_relatedness}
            diff = Diff_score(question_scores, image_scores, relatedness_scores, major=major_criteria, type_=D_type)

            full_answer = all_results_df.loc[(all_results_df['image_name']==image_name) & (all_results_df['question']==question)][f"{model_name}_full_answer"].values[0]
            clean_answer = all_results_df.loc[(all_results_df['image_name']==image_name) & (all_results_df['question']==question)][f"{model_name}_clean_answer"].values[0]
            no_tokens = all_results_df.loc[(all_results_df['image_name']==image_name) & (all_results_df['question']==question)][f"{model_name}_no_tokens"].values[0]
            if "Blip" in model_name: no_tokens=no_tokens-1 # exclude [SEP]
            if "GiT" in model_name: no_tokens=no_tokens-1 # exclude [SEP]
            correct = all_results_df.loc[(all_results_df['image_name']==image_name) & (all_results_df['question']==question)][f"{model_name}_correct"].values[0]
            # step 1
            # add the following column's valeus
            # | image_name | example_question | question | answer_gt | used_language | specificity | question_length | complexity | image_relatedness | ...
            #   image_difficulty | difficulty | model_name | full_answer | clean_answer | no_tokens | correct |
            one_row = [image_name, example_question, question, answer_gt, used_lang, specificity, question_len, complexity, image_relatedness]
            one_row.extend([image_diff, diff, model_name, full_answer, clean_answer, no_tokens, correct, N_dict[model_name]])
                        
            # step 2
            # add the following column's valeus
            # | x_max_str | x_min_str | Prob_str | T_1_max_str | T_1_str | P_T_1_str |
            # | Prob | P_T_1 |
            logits = logits_list[idx]
            if ("Blip" in model_name) and (no_tokens!=len(logits)-1): print(f"{model_name} -> {image_name} -> {idx}")
            if ("GiT" in model_name) and (no_tokens!=len(logits)-1): print(f"{model_name} -> {image_name} -> {idx}")
            num_list, str_list = list(), list()

            # x_maxs_str, x_mins_str, probs_str, t_1s_str, t_1_maxs_str, p_T_1s_str  = str(), str(), str(), str(), str(), str()
            x_maxs, x_mins, probs, t_1s, t_1_maxs, p_T_1s = list(), list(), list(), list(), list(), list()
            for logit in logits:
                # decoded token (do not ignore stopwords)
                x_maxs.append(np.max(logit))
                x_mins.append(np.min(logit))
                probs.append(np.max(softmax_stable(logit, T=1)))
                T_1_max = T_1(logit)
                t_1_maxs.append(T_1_max)
                t_1 = D_to_T(D=diff, T_max=T_1_max)
                t_1s.append(t_1)
                p_T_1s.append(np.max(softmax_stable(logit, T=t_1)))

                # prob = np.max(softmax_stable(logit, T=1))
                # seq_P = seq_P * prob
                # prob_str = prob_str + f"{str(prob)}+"
                # t_1 = T_1(logit)
                # p_T_1 = np.max(softmax_stable(logit, T=t_1))
                # seq_P_T_1 = seq_P_T_1 * p_T_1
                # t_1_str = t_1_str + f"{str(t_1)}+"
                # p_T_1_str = p_T_1_str + f"{str(p_T_1)}+"
            # one_row.extend([prob_str, seq_P]) #, t_1_str, p_T_1_str, seq_P_T_1])

            for item in [x_maxs, x_mins, probs, t_1_maxs, t_1s, p_T_1s]:
                str_list.append("+".join(list(map(str, item))))

            for item in [probs, p_T_1s]:
                temp_item = item.copy()
                if "Blip" in model_name: temp_item=temp_item[:-1] # exclude [SEP]
                if "GiT" in model_name: temp_item=temp_item[:-1] # exclude [SEP]
                num_list.append(np.prod(temp_item))       

# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
            # TODO:
            # what happens when the model has no logits (empty answer)? np.prod(list()) is equal to 1 !!!
            # in case of GiT [CLS] is already not int he logits maybe??? or [SEP]
            # Stretch check: does CLS represent GiT confidnece??
            # Rubber Ducky in GiT is a good fauiler example, the qsts have same D one right and one wrong, but exact same confidence
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>              
            # step 3
            # add the following column's valeus
            # | T_2_max_0.01 | T_2_0.01 | P_T_2_0.01_str | P_T_2_0.01 | ... |
            t_2_maxs = T_max_2_dict[model_name]
            t_2_maxs = [t_2_maxs[-1]] # only epsioln=N
            for T_2_max in t_2_maxs:
                t_2 = D_to_T(D=diff, T_max=T_2_max)
                str_list.append(str(T_2_max))
                str_list.append(str(t_2))
                # p_T_2_str = str()
                # seq_P_T_2 = 1
                p_T_2s = list()
                for logit in logits:
                    p_T_2s.append(np.max(softmax_stable(logit, T=t_2)))
                str_list.append("+".join(list(map(str, p_T_2s))))
                if "Blip" in model_name: p_T_2s=p_T_2s[:-1] # exclude [SEP]
                if "GiT" in model_name: p_T_2s=p_T_2s[:-1] # exclude [SEP]
                num_list.append(np.prod(p_T_2s))
                # one_row.extend([p_T_2_str, seq_P_T_2])
            # print(len(one_row), one_row)
            one_row.extend(num_list)
            one_row.extend(str_list)
            all_rows.append(one_row)

100%|██████████| 17/17 [00:47<00:00,  2.80s/it]


In [24]:
len(full_df_columns)

29

In [25]:
full_df = pd.DataFrame(np.array(all_rows), columns=full_df_columns)
full_df.head()

Unnamed: 0,image_name,example_question,question,answer_gt,used_language,specificity,question_length,complexity,image_relatedness,image_difficulty,...,P_T_2_N,x_max_str,x_min_str,Prob_str,T_1_max_str,T_1_str,P_T_1_str,T_2_max_N_str,T_2_N_str,P_T_2_N_str
0,Gaussian_5_2,what is in the image?,what is in the image?,random+noise+nothing+t know+not sure+unknown+c...,0,1,0.0,0.0,0.0,1,...,0.035396315,-2.3610375,-16.713715,0.07764137,1.7832804974941396,1.3263668739558916,0.033901606,1.7380787111952216,1.3075327963313423,0.035396315
1,Gaussian_5_2,what is the dominant color of the image?,what is the dominant color of the image?,color+gray+grey+nothing+t know+not sure+unknow...,0,1,0.0,0.0,0.0,1,...,0.16670989,-0.49326575,-27.210875,0.20900321,3.319589136322892,1.966495473467872,0.08871711,1.7380787111952216,1.3075327963313423,0.16670989
2,Gaussian_5_2,what does the image represent?,what does the image represent?,random+noise+nothing+t know+not sure+unknown+c...,0,1,0.0,0.0,0.0,1,...,0.03933237,-2.180196,-17.993324,0.08220834,1.964737514053651,1.4019739641890212,0.031976696,1.7380787111952216,1.3075327963313423,0.03933237
3,Gaussian_5_2,why is the image random?,why is the image random?,random+t know+not sure+unknown+can't tell+none...,0,1,0.0,0.0,0.0,1,...,0.05141516,-2.5192337,-19.845095,0.12248334,2.152690347564782,1.4802876448186593,0.034029774,1.7380787111952216,1.3075327963313423,0.05141516
4,Gaussian_5_2,why aren't there any objects in the image?,why aren't there any objects in the image?,random+noise+t know+not sure+unknown+can't tel...,0,1,0.0,0.0,0.0,1,...,0.02913933,-3.1362438,-18.810205,0.06400901,1.9474464197595112,1.394769341566463,0.024084808,1.7380787111952216,1.3075327963313423,0.02913933


In [26]:
numeric_cols = ["used_language", "specificity", "question_length", "complexity", "image_relatedness", "image_difficulty", "difficulty", \
                "no_tokens", "correct", "N"]
numeric_cols = numeric_cols + num_cols + num_cols_epsilon
numeric_cols

['used_language',
 'specificity',
 'question_length',
 'complexity',
 'image_relatedness',
 'image_difficulty',
 'difficulty',
 'no_tokens',
 'correct',
 'N',
 'Prob',
 'P_T_1',
 'P_T_2_N']

In [27]:
full_df[numeric_cols] = full_df[numeric_cols].apply(pd.to_numeric)

In [28]:
# full_df.to_excel(f"./{model_type}_full_results_D_type_{D_type}_manual.xlsx", sheet_name=f"{model_type}_D_type_{D_type}_results", index_label='ID')