## Import stuff

In [1]:
import os
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import shutil
import random
from PIL import Image
import logging
import time
import sklearn.metrics as metrics


## Load training data from csv


In [11]:
val_data = pd.read_csv("val_distances.csv",index_col=0)
val_labels = pd.read_csv("val_labels.txt", header=None)

for extractor in ["xl","resnet","clip","vit"]:
    for i in range(1,4):
        val_data[extractor+'_'+str(i)] = val_data["anchor_left_distance_"+extractor+'_'+str(i)]-val_data["anchor_right_distance_"+extractor+'_'+str(i)]
        val_data.drop("anchor_left_distance_"+extractor+'_'+str(i), axis=1, inplace=True)
        val_data.drop("anchor_right_distance_"+extractor+'_'+str(i), axis=1, inplace=True)


X_val = np.array(val_data.iloc[:, :].values,  dtype=float)
Y_val = np.array(val_labels.iloc[:, :].values,  dtype=float).reshape(-1)

print("X shape: ", X_val.shape)
print("Y shape: ", Y_val.shape)

val_data

X shape:  (7471, 12)
Y shape:  (7471,)


Unnamed: 0,xl_1,xl_2,xl_3,resnet_1,resnet_2,resnet_3,clip_1,clip_2,clip_3,vit_1,vit_2,vit_3
0,-0.068664,-0.041754,-0.014346,-0.090454,-0.013032,0.058454,-0.230397,-0.209917,-0.209141,-0.087971,-0.036976,-0.188392
1,0.005716,-0.018974,-0.022985,0.032172,-0.141101,-0.023593,0.048286,-0.079686,-0.064621,-0.035327,0.067278,0.006873
2,-0.056310,-0.006234,-0.006093,0.003603,-0.044759,-0.087855,-0.096871,-0.101507,-0.097258,0.037173,-0.026207,-0.025088
3,-0.172882,-0.244676,-0.158502,-0.224878,-0.063581,-0.113187,-0.211265,-0.129867,-0.274124,-0.584619,-0.503904,-0.409068
4,0.110826,0.201579,0.119889,0.073123,0.029605,0.065464,0.101040,0.069663,0.048476,-0.188351,-0.130856,-0.286965
...,...,...,...,...,...,...,...,...,...,...,...,...
7466,-0.114149,-0.197882,-0.062034,-0.054414,0.007887,-0.040145,-0.035834,-0.031924,-0.060561,-0.008923,-0.092040,-0.112462
7467,-0.118582,-0.121366,-0.130768,-0.106617,-0.012099,-0.077673,-0.102473,0.012539,0.037888,-0.104456,-0.035895,0.015354
7468,-0.114087,-0.143940,-0.098739,-0.265105,-0.241483,-0.238572,-0.119350,-0.062712,-0.141130,0.020443,-0.274030,0.016123
7469,0.072685,0.096396,0.024189,0.158151,0.145758,0.029370,0.068423,0.052202,0.067914,0.141706,0.096333,-0.007368


# weighted ensemble
the weights would be kinda hard to estimate in practice, it would take a few server submissions. I just made the worse 2 models less important

In [13]:
pred = []
for row in X_val:
    score_left = (row[0]+row[1]+row[2])/6 + (row[3]+row[4]+row[5])/3 + (row[6]+row[7]+row[8])/3 + (row[9]+row[10]+row[11])/6
    pred.append(score_left>0)

pred = np.array(pred)
accuracy = metrics.accuracy_score(Y_val, pred)
accuracy

0.7181100254316691

# Majority vote
performs worse

In [None]:
pred = []
for row in X_val:
    score_left = 0
    score_left += row[0]>0
    score_left += row[1]>0
    score_left += row[2]>0
    score_left += row[3]>0
    score_left += row[4]>0
    pred.append(0 if score_left>2 else 1)

pred = np.array(pred)
accuracy = metrics.accuracy_score(Y_val, pred<0.5)
accuracy

0.778744478650783

In [9]:
val_data = pd.read_csv("val_distances.csv",index_col=0)
val_data
# val_data.drop("anchor_left_distance_swin",inplace=True,axis=1)
# val_data.drop("anchor_right_distance_swin",inplace=True,axis=1)
# val_data.to_csv("val_distances.csv")

Unnamed: 0,anchor_left_distance_clip_1,anchor_right_distance_clip_1,anchor_left_distance_clip_2,anchor_right_distance_clip_2,anchor_left_distance_clip_3,anchor_right_distance_clip_3,anchor_left_distance_xl_1,anchor_right_distance_xl_1,anchor_left_distance_xl_2,anchor_right_distance_xl_2,...,anchor_left_distance_resnet_2,anchor_right_distance_resnet_2,anchor_left_distance_resnet_3,anchor_right_distance_resnet_3,anchor_left_distance_vit_1,anchor_right_distance_vit_1,anchor_left_distance_vit_2,anchor_right_distance_vit_2,anchor_left_distance_vit_3,anchor_right_distance_vit_3
0,0.323175,0.553571,0.308628,0.518545,0.378845,0.587986,0.546659,0.615324,0.534716,0.576471,...,0.587014,0.600046,0.541205,0.482751,0.750026,0.837997,0.814891,0.851867,0.725180,0.913572
1,0.504326,0.456040,0.425435,0.505120,0.580654,0.645276,0.736648,0.730932,0.691018,0.709993,...,0.594866,0.735967,0.569175,0.592768,1.011418,1.046745,1.050378,0.983100,1.031830,1.024957
2,0.386311,0.483183,0.360702,0.462209,0.356006,0.453264,0.704422,0.760732,0.712232,0.718466,...,0.488985,0.533744,0.547760,0.635615,1.064129,1.026956,0.996132,1.022339,1.016380,1.041468
3,0.293137,0.504402,0.294002,0.423869,0.344702,0.618826,0.520046,0.692928,0.494008,0.738684,...,0.456094,0.519675,0.502059,0.615246,0.643607,1.228226,0.604704,1.108608,0.673361,1.082429
4,0.485143,0.384103,0.421579,0.351915,0.557889,0.509414,0.653479,0.542652,0.675509,0.473930,...,0.422551,0.392946,0.450284,0.384820,0.803268,0.991620,0.942835,1.073691,0.770493,1.057458
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7466,0.301527,0.337360,0.317738,0.349662,0.405924,0.466484,0.566413,0.680562,0.395655,0.593537,...,0.473178,0.465291,0.425257,0.465402,0.923516,0.932439,0.919171,1.011211,0.935307,1.047768
7467,0.431211,0.533683,0.391561,0.379022,0.595596,0.557708,0.550971,0.669553,0.441854,0.563221,...,0.440714,0.452812,0.436168,0.513841,1.061792,1.166249,1.088620,1.124515,1.031335,1.015981
7468,0.507565,0.626915,0.502994,0.565706,0.567106,0.708236,0.579985,0.694071,0.527039,0.670979,...,0.363306,0.604790,0.335091,0.573663,0.970762,0.950319,0.776288,1.050318,1.016313,1.000190
7469,0.500502,0.432079,0.492935,0.440733,0.571393,0.503479,0.687616,0.614931,0.725043,0.628648,...,0.594050,0.448292,0.578646,0.549276,1.111962,0.970256,1.144136,1.047803,1.069952,1.077320
