In [None]:
import numpy as np
import pandas as pd

import os
print(os.listdir("./track3-data"))

In [None]:
from collections import defaultdict
from glob import glob
from random import choice, sample
from keras.preprocessing import image
import cv2
import numpy as np
import pandas as pd
from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau
from keras.layers import Input, Dense, GlobalMaxPool2D, GlobalAvgPool2D, Concatenate, Multiply, Dropout, Subtract,Add
from keras.models import Model
from keras.optimizers import Adam
from keras.backend import sqrt
import tensorflow as tf
from keras import backend as K
import keras
from tqdm import tqdm_notebook, tqdm

In [None]:
from keras_vggface.utils import preprocess_input
from keras_vggface.vggface import VGGFace

In [None]:
def get_train_val(familly_name):
    train_file_path = "./track3-data/train-val-data.xlsx"
    train_folders_path = "./track3-data/train-val-faces/"
    val_famillies = familly_name

    all_images = glob(train_folders_path + "*/*/*.jpg")
    all_images=[x.replace('\\','/') for x in all_images]
    train_images = [x for x in all_images if val_famillies not in x]
    val_images = [x for x in all_images if val_famillies in x]

    train_person_to_images_map = defaultdict(list)
    # "./track2-data/train-faces/F0001/MID1/P00001_face0.jpg"
    ppl = [x.split("/")[-3] + "/" + x.split("/")[-2] for x in all_images]

    for x in train_images:
        train_person_to_images_map[x.split("/")[-3] + "/" + x.split("/")[-2]].append(x)

    val_person_to_images_map = defaultdict(list)

    for x in val_images:
        val_person_to_images_map[x.split("/")[-3] + "/" + x.split("/")[-2]].append(x)

    relationships = pd.read_excel(train_file_path)
    relationships = list(zip(relationships.p1.values, relationships.p2.values))
    relationships = [x for x in relationships if x[0] in ppl and x[1] in ppl]

    train = [x for x in relationships if val_famillies not in x[0]]
    val = [x for x in relationships if val_famillies in x[0]]
    return train, val, train_person_to_images_map, val_person_to_images_map

In [None]:
def focal_loss(gamma=2., alpha=.25):
    def focal_loss_fixed(y_true, y_pred):
        pt_1 = tf.where(tf.equal(y_true, 1), y_pred, tf.ones_like(y_pred))
        pt_0 = tf.where(tf.equal(y_true, 0), y_pred, tf.zeros_like(y_pred))
        return -K.sum(alpha * K.pow(1. - pt_1, gamma) * K.log(K.epsilon()+pt_1))-K.sum((1-alpha) * K.pow( pt_0, gamma) * K.log(1. - pt_0 + K.epsilon()))
    return focal_loss_fixed

In [None]:
def signed_sqrt(x):
    return keras.backend.sign(x) * keras.backend.sqrt(keras.backend.abs(x) + 1e-9)

In [None]:
def read_img(path):
    img = image.load_img(path, target_size=(224, 224))
    img = np.array(img).astype(np.float)
    return preprocess_input(img, version=2)

def gen(list_tuples, person_to_images_map, batch_size=16):
    ppl = list(person_to_images_map.keys())
    while True:
        batch_tuples = sample(list_tuples, batch_size // 2)
        labels = [1] * len(batch_tuples)
        while len(batch_tuples) < batch_size:
            p1 = choice(ppl)
            p2 = choice(ppl)

            if p1 != p2 and (p1, p2) not in list_tuples and (p2, p1) not in list_tuples:
                batch_tuples.append((p1, p2))
                labels.append(0)

        for x in batch_tuples:
            if not len(person_to_images_map[x[0]]):
                print(x[0])

        X1 = [choice(person_to_images_map[x[0]]) for x in batch_tuples]
        X1 = np.array([read_img(x) for x in X1])

        X2 = [choice(person_to_images_map[x[1]]) for x in batch_tuples]
        X2 = np.array([read_img(x) for x in X2])

        yield [X1, X2], labels


def baseline_model():
    input_1 = Input(shape=(224, 224, 3))
    input_2 = Input(shape=(224, 224, 3))

    base_model = VGGFace(model='resnet50', include_top=False)

    for x in base_model.layers[:-3]:
        x.trainable = True

    x1 = base_model(input_1)
    x2 = base_model(input_2)

    x1=GlobalMaxPool2D()(x1)
    x2=GlobalAvgPool2D()(x2)
    
    # (x-y)^2 + (x^2-y^2) + xy
    x3 = Subtract()([x1, x2])
    x3 = Multiply()([x3, x3])
    x1_ = Multiply()([x1, x1])
    x2_ = Multiply()([x2, x2])
    x4 = Subtract()([x1_, x2_])
    x5 = Multiply()([x1, x2])
    
    # (x-y) + (x+y) + xy
#     x3 = Subtract()([x1, x2])
#     x4 = Add()([x1,x2])
#     x5 = Multiply()([x1, x2])
    
    x = Concatenate(axis=-1)([x3, x4, x5])

    x = Dense(128, activation="relu")(x)
    x = Dropout(0.02)(x)
    out = Dense(1, activation="sigmoid")(x)

    model = Model([input_1, input_2], out)

#     model.compile(loss="binary_crossentropy", metrics=['acc'], optimizer=Adam(0.00001))
    model.compile(loss=[focal_loss(alpha=.25, gamma=2)], metrics=['acc'], optimizer=Adam(0.00003))
#     model.compile(loss=[focal_loss(alpha=.25, gamma=2)], metrics=['acc'], optimizer=Adam(0.00001))
    model.summary()

    return model

In [None]:
model = baseline_model()

In [None]:
val_famillies_list = ["F01","F03","F05","F07", "F09"]
n_val_famillies_list = len(val_famillies_list)

In [None]:
for i in tqdm_notebook(range(n_val_famillies_list)):
    train, val, train_person_to_images_map, val_person_to_images_map = get_train_val(val_famillies_list[i])
    file_path = f"vgg_face_{i}.h5"
    checkpoint = ModelCheckpoint(file_path, monitor='val_acc', verbose=1, save_best_only=True, mode='max')
    reduce_on_plateau = ReduceLROnPlateau(monitor="val_acc", mode="max", factor=0.3, patience=30, verbose=1)
#     reduce_on_plateau = ReduceLROnPlateau(monitor="val_acc", mode="max", factor=0.2, patience=20, verbose=1)   
    callbacks_list = [checkpoint, reduce_on_plateau]

    history = model.fit_generator(gen(train, train_person_to_images_map, batch_size=16), 
                                  use_multiprocessing=False,
                                  validation_data=gen(val, val_person_to_images_map, batch_size=16), 
                                  epochs=66, verbose=2,
                                  workers=1, callbacks=callbacks_list, 
                                  steps_per_epoch=300, validation_steps=200)

In [None]:
import re

def sort_key(s):
    if s:
        try:
            c = re.findall('\d+', s)[0]
        except:
            c = -1
        return int(c)

In [None]:
# test multi models
test_gallery_path = "./track3-data/test-data/gallery/gallery_faces/"
test_probes_path = "./track3-data/test-data/probes/probe_subjects/"

probes_id = os.listdir(test_probes_path)
probes_id.sort(key=sort_key)

for k in tqdm(range(len(probes_id))):
    print("test---"+probes_id[k])

    submission = pd.read_csv("./track3-data/test-data/track3-test/track3-test-{}.csv".format(probes_id[k]))

    # face0.jpg-P07920_face1.jpg, s0

    def chunker(seq, size=64):
        return (seq[pos:pos + size] for pos in range(0, len(seq), size))

    preds_for_sub = np.zeros(submission.shape[0])

    for i in tqdm_notebook(range(n_val_famillies_list)):
        file_path = f"./track3-models/01357-resnet50-bce-1/vgg_face_{i}.h5"
        model.load_weights(file_path)
        # Get the predictions
        predictions = []

        for batch in tqdm_notebook(chunker(submission.img_pair.values)):
            X1 = [x.split("-")[0] for x in batch]
            X1 = np.array([read_img(test_gallery_path + x) for x in X1])

            X2 = [x.split("-")[1] for x in batch]
            X2 = np.array([read_img(test_probes_path + probes_id[k] + '/' + x) for x in X2])

            pred = model.predict([X1, X2]).ravel().tolist()
            predictions += pred
        preds_for_sub += np.array(predictions) / n_val_famillies_list

    submission['score'] = preds_for_sub
    submission.to_csv("./track3-res/sim/track3-sim-{}.csv".format(probes_id[k]), index=False)

In [None]:
# test single model
test_gallery_path = "./track3-data/test-data/gallery/gallery_faces/"
test_probes_path = "./track3-data/test-data/probes/probe_subjects/"

probes_id = os.listdir(test_probes_path)
probes_id.sort(key=sort_key)

file_path = "./track3-models/13579-resnet50-bce-1/vgg_face_0.h5"
model.load_weights(file_path)

for k in tqdm(range(len(probes_id))):
    print("test---"+probes_id[k])
    
    submission = pd.read_csv("./track3-data/test-data/track3-test/track3-test-{}.csv".format(probes_id[k]))

    # face0.jpg-P07920_face1.jpg, s0

    def chunker(seq, size=256):
        return (seq[pos:pos + size] for pos in range(0, len(seq), size))

    preds_for_sub = np.zeros(submission.shape[0])

#     file_path = "./track3-models/13579-resnet50-bce-1/vgg_face_0.h5"
#     model.load_weights(file_path)
    # Get the predictions
    predictions = []

    for batch in tqdm_notebook(chunker(submission.img_pair.values)):
        X1 = [x.split("-")[0] for x in batch]
        X1 = np.array([read_img(test_gallery_path + x) for x in X1])

        X2 = [x.split("-")[1] for x in batch]
        X2 = np.array([read_img(test_probes_path + probes_id[k] + '/' + x) for x in X2])

        pred = model.predict([X1, X2]).ravel().tolist()
        predictions += pred
    preds_for_sub += np.array(predictions)

    submission['score'] = preds_for_sub
    submission.to_csv("./track3-preds/13579/sim/track3-sim-{}.csv".format(probes_id[k]), index=False)

In [None]:
import csv
import re
from collections import defaultdict

test_gallery_path = "./track3-data/test-data/gallery/gallery_faces/"
test_probes_path = "./track3-data/test-data/probes/probe_subjects/"

probes_id = os.listdir(test_probes_path)
# probes_id.sort(key=sort_key)

nums_gallery = 3897
nums_probes_id = 190

for k in range(nums_probes_id):
    sim_lines = pd.read_csv("./track3-preds/24689/sim/track3-sim-{}.csv".format(probes_id[k]))['score']
    nums_probe_pre_id = len(sim_lines)//nums_gallery
    sim2_dict = {}
    for i in range(nums_gallery):
        gallery_img = 'face{}.jpg'.format(str(i))
        sim_score_list = [float(sim_lines[i*nums_probe_pre_id+j]) for j in range(nums_probe_pre_id)]
        #================
        sim2_score = np.mean(sim_score_list)
        #================
        sim2_dict[gallery_img] = sim2_score
    sim2_sorted = sorted(sim2_dict.items(), key=lambda x:x[1], reverse=True)
    
    rank_dict = {}
    for item in zip(sim2_sorted, list(range(len(sim2_sorted)))):
        name = item[0][0]
        rank = item[1]
        rank_dict[name] = rank
        
    sort_dict = {}
    for item in zip(sim2_dict, sim2_sorted):
        name = item[0]
        sort = re.findall(r"\d+?\d*", item[1][0])[0]
        sort_dict[name] = sort
    
    sim2 = defaultdict(list)
    for item in sim2_dict:
        sim2['gallery_img'].append(item)
        sim2['score'].append(sim2_dict[item])
        sim2['rank'].append(rank_dict[item])
        sim2['sort'].append(sort_dict[item])
    pd.DataFrame(sim2).to_csv("./track3-preds/24689/sim2/track3-sim2-{}.csv".format(probes_id[k]))

with open('./track3-preds/24689/sim2_predictions.csv','w',newline='') as f:
    csv_write = csv.writer(f, dialect='excel')
    for k in range(nums_probes_id):
        sim2_lines = pd.read_csv("./track3-preds/24689/sim2/track3-sim2-{}.csv".format(probes_id[k]))['sort']
        csv_write.writerow(sim2_lines)



In [None]:
# extract features
import h5py

test_gallery_path = "./track3-data/test-data/gallery/gallery_faces/"
test_probes_path = "./track3-data/test-data/probes/probe_subjects/"

gallery_img = os.listdir(test_gallery_path)
gallery_img.sort(key=sort_key)
probes_id = os.listdir(test_probes_path)
probes_id.sort(key=sort_key)

nums_gallery = 3897
nums_probes_id = 190

def chunker(seq, size=256):
    return (seq[pos:pos + size] for pos in range(0, len(seq), size))

def l2norm(x, eps=1e-6):
    return x/(np.linalg.norm(x, ord=2, axis=1, keepdims=True)+eps)

# get gallery features
for i in range(5):
    file_path = "./track3-models/24689-resnet50-focalloss-1/vgg_face_{}.h5".format(i)
    model.load_weights(file_path)

    model_base_max = Model(inputs=model.get_layer('input_1').output, outputs=model.get_layer('global_max_pooling2d_1').output)
    model_base_avg = Model(inputs=model.get_layer('input_2').output, outputs=model.get_layer('global_average_pooling2d_1').output)

#     print(model_base_max.summary(), model_base_avg.summary()) 

    F_max_all = np.zeros((256, 2048))
    F_avg_all = np.zeros((256, 2048))

    for batch in tqdm_notebook(chunker(gallery_img)):
        X = np.array([read_img(test_gallery_path + x) for x in batch])
        F_max = model_base_max.predict(X)
        F_avg = model_base_avg.predict(X)
#         print(F_max.shape, F_avg.shape)
        F_max_all = np.concatenate((F_max_all, F_max), axis=0)
        F_avg_all = np.concatenate((F_avg_all, F_avg), axis=0)

    F_max_avg_all = np.concatenate((F_max_all[256:], F_avg_all[256:]), axis=1)
    F_max_all = l2norm(F_max_all[256:])
    F_avg_all = l2norm(F_avg_all[256:])
    F_max_avg_all = l2norm(F_max_avg_all)
    print(F_max_all.shape, F_avg_all.shape, F_max_avg_all.shape)

    with h5py.File("./track3-feats/24689/gallery/feat_gallery_{}.h5".format(i), 'w') as f:
        f.create_dataset('img_name', np.array(gallery_img).shape, dtype=h5py.special_dtype(vlen=str))[:] = np.array(gallery_img)
        f['feat_max'] = F_max_all
        f['feat_avg'] = F_avg_all
        f['feat_max_avg'] = F_max_avg_all

# get mean gallery features
F_max_all_mean = np.zeros((nums_gallery, 2048))
F_avg_all_mean = np.zeros((nums_gallery, 2048))
F_max_avg_all_mean = np.zeros((nums_gallery, 4096))
for i in range(5):
    with h5py.File("./track3-feats/24689/gallery/feat_gallery_{}.h5".format(i), 'r') as f:
        F_max_all_mean += f['feat_max']
        F_avg_all_mean += f['feat_avg']
        F_max_avg_all_mean += f['feat_max_avg']
with h5py.File("./track3-feats/24689/gallery/feat_gallery_mean.h5", 'w') as f:
    f.create_dataset('img_name', np.array(gallery_img).shape, dtype=h5py.special_dtype(vlen=str))[:] = np.array(gallery_img)
    f['feat_max'] = F_max_all_mean/5.0
    f['feat_avg'] = F_avg_all_mean/5.0
    f['feat_max_avg'] = F_max_avg_all_mean/5.0


In [None]:
# extract features
import h5py

test_gallery_path = "./track3-data/test-data/gallery/gallery_faces/"
test_probes_path = "./track3-data/test-data/probes/probe_subjects/"

gallery_img = os.listdir(test_gallery_path)
gallery_img.sort(key=sort_key)
probes_id = os.listdir(test_probes_path)
probes_id.sort(key=sort_key)

nums_gallery = 3897
nums_probes_id = 190

def chunker(seq, size=256):
    return (seq[pos:pos + size] for pos in range(0, len(seq), size))

def l2norm(x, eps=1e-6):
    return x/(np.linalg.norm(x, ord=2, axis=1, keepdims=True)+eps)

# get probes features
for i in range(5):
    file_path = "./track3-models/24689-resnet50-focalloss-1/vgg_face_{}.h5".format(i)
    model.load_weights(file_path)

    model_base_max = Model(inputs=model.get_layer('input_1').output, outputs=model.get_layer('global_max_pooling2d_1').output)
    model_base_avg = Model(inputs=model.get_layer('input_2').output, outputs=model.get_layer('global_average_pooling2d_1').output)

    F_max_id = np.zeros((1, 2048))
    F_avg_id = np.zeros((1, 2048))
    F_max_avg_id = np.zeros((1, 4096))

    for k in tqdm_notebook(range(nums_probes_id)):
        probes_img = os.listdir(test_probes_path + probes_id[k])

        F_max_all = np.zeros((256, 2048))
        F_avg_all = np.zeros((256, 2048))

        for batch in chunker(probes_img):
            X = np.array([read_img(test_probes_path + probes_id[k] + '/' + x) for x in batch])
            F_max = model_base_max.predict(X)
            F_avg = model_base_avg.predict(X)
#             print(F_max.shape, F_avg.shape)
            F_max_all = np.concatenate((F_max_all, F_max), axis=0)
            F_avg_all = np.concatenate((F_avg_all, F_avg), axis=0)

        F_max_avg_all = np.concatenate((F_max_all[256:], F_avg_all[256:]), axis=1)
        F_max_all = l2norm(F_max_all[256:])
        F_avg_all = l2norm(F_avg_all[256:])
        F_max_avg_all = l2norm(F_max_avg_all)
#         print(F_max_all.shape, F_avg_all.shape, F_max_avg_all.shape)
    
        if not os.path.exists("./track3-feats/24689/probes/feat_probes_{}".format(i)):
            os.mkdir("./track3-feats/24689/probes/feat_probes_{}".format(i))
        with h5py.File("./track3-feats/24689/probes/feat_probes_{}/feat_probes_{}.h5".format(i, probes_id[k]), 'w') as f:
            f.create_dataset('img_name', np.array(probes_img).shape, dtype=h5py.special_dtype(vlen=str))[:] = np.array(probes_img)
            f['feat_max'] = F_max_all
            f['feat_avg'] = F_avg_all
            f['feat_max_avg'] = F_max_avg_all

        F_max_id = np.concatenate((F_max_id, np.mean(F_max_all, axis=0)[np.newaxis,:]), axis=0)
        F_avg_id = np.concatenate((F_avg_id, np.mean(F_avg_all, axis=0)[np.newaxis,:]), axis=0)
        F_max_avg_id = np.concatenate((F_max_avg_id, np.mean(F_max_avg_all, axis=0)[np.newaxis,:]), axis=0)
#         print(F_max_id.shape, F_avg_id.shape, F_max_avg_id.shape)

    with h5py.File("./track3-feats/24689/probes/feat_probes_{}.h5".format(i), 'w') as f:
        f.create_dataset('probes_id', np.array(probes_id).shape, dtype=h5py.special_dtype(vlen=str))[:] = np.array(probes_id)
        f['feat_max'] = F_max_id[1:]
        f['feat_avg'] = F_avg_id[1:]
        f['feat_max_avg'] = F_max_avg_id[1:]
    
# get mean probes features
F_max_id_mean = np.zeros((nums_probes_id, 2048))
F_avg_id_mean = np.zeros((nums_probes_id, 2048))
F_max_avg_id_mean = np.zeros((nums_probes_id, 4096))
for i in range(5):
    with h5py.File("./track3-feats/24689/probes/feat_probes_{}.h5".format(i), 'r') as f:
        F_max_id_mean += f['feat_max']
        F_avg_id_mean += f['feat_avg']
        F_max_avg_id_mean += f['feat_max_avg']
with h5py.File("./track3-feats/24689/probes/feat_probes_mean.h5", 'w') as f:
    f.create_dataset('probes_id', np.array(probes_id).shape, dtype=h5py.special_dtype(vlen=str))[:] = np.array(probes_id)
    f['feat_max'] = F_max_id_mean/5.0
    f['feat_avg'] = F_avg_id_mean/5.0
    f['feat_max_avg'] = F_max_avg_id_mean/5.0
    

In [None]:
import h5py
import pandas as pd
import re

test_gallery_path = "./track3-data/test-data/gallery/gallery_faces/"
test_probes_path = "./track3-data/test-data/probes/probe_subjects/"

def sort_key(s):
    if s:
        try:
            c = re.findall('\d+', s)[0]
        except:
            c = -1
        return int(c)

probes_id = os.listdir(test_probes_path)
probes_id.sort(key=sort_key)
    
nums_gallery = 3897
nums_probes_id = 190

def read_feature(filepath):
    h5f = h5py.File(filepath, 'r')
    feat_max = h5f['feat_max'][:]
    feat_avg = h5f['feat_avg'][:]
    feat_max_avg = h5f['feat_max_avg'][:]
    h5f.close()
    return feat_max, feat_avg, feat_max_avg

def calu_sim(feat_probes, feat_gallery):
    return np.dot(feat_probes, feat_gallery.T)

feat_gallery_max, feat_gallery_avg, feat_gallery_max_avg = read_feature("./track3-feats/24689/gallery/feat_gallery_mean.h5")
feat_probes_max, feat_probes_avg, feat_probes_max_avg = read_feature("./track3-feats/24689/probes/feat_probes_mean.h5")

feat_max_sim = calu_sim(feat_probes_max, feat_gallery_max)
feat_avg_sim = calu_sim(feat_probes_avg, feat_gallery_avg)
feat_max_avg_sim = calu_sim(feat_probes_max_avg, feat_gallery_max_avg)

feat_max_sort = np.argsort(-feat_max_sim, axis=1)
feat_avg_sort = np.argsort(-feat_avg_sim, axis=1)
feat_max_avg_sort = np.argsort(-feat_max_avg_sim, axis=1)

pd.DataFrame(feat_max_sort).to_csv("./track3-feats/24689/feat_max_pred_mean.csv")
pd.DataFrame(feat_avg_sort).to_csv("./track3-feats/24689/feat_avg_pred_mean.csv")
pd.DataFrame(feat_max_avg_sort).to_csv("./track3-feats/24689/feat_max_avg_pred_mean.csv")


In [None]:
import h5py
import pandas as pd
import re

test_gallery_path = "./track3-data/test-data/gallery/gallery_faces/"
test_probes_path = "./track3-data/test-data/probes/probe_subjects/"

def sort_key(s):
    if s:
        try:
            c = re.findall('\d+', s)[0]
        except:
            c = -1
        return int(c)

probes_id = os.listdir(test_probes_path)
probes_id.sort(key=sort_key)
    
nums_gallery = 3897
nums_probes_id = 190

def read_feature(filepath):
    h5f = h5py.File(filepath, 'r')
    feat_max = h5f['feat_max'][:]
    feat_avg = h5f['feat_avg'][:]
    feat_max_avg = h5f['feat_max_avg'][:]
    h5f.close()
    return feat_max, feat_avg, feat_max_avg

def calu_sim(feat_probes, feat_gallery):
    return np.dot(feat_probes, feat_gallery.T)

feat_max_sim_mean = np.zeros((nums_probes_id, nums_gallery))
feat_avg_sim_mean = np.zeros((nums_probes_id, nums_gallery))
feat_max_avg_sim_mean = np.zeros((nums_probes_id, nums_gallery))

for i in range(5):
    feat_gallery_max, feat_gallery_avg, feat_gallery_max_avg = read_feature("./track3-feats/24689/gallery/feat_gallery_{}.h5".format(i))
    feat_probes_max, feat_probes_avg, feat_probes_max_avg = read_feature("./track3-feats/24689/probes/feat_probes_{}.h5".format(i))

    feat_max_sim = calu_sim(feat_probes_max, feat_gallery_max)
    feat_avg_sim = calu_sim(feat_probes_avg, feat_gallery_avg)
    feat_max_avg_sim = calu_sim(feat_probes_max_avg, feat_gallery_max_avg)

    feat_max_sim_mean += feat_max_sim/5.0
    feat_avg_sim_mean += feat_avg_sim/5.0
    feat_max_avg_sim_mean += feat_max_avg_sim/5.0

    feat_max_sort = np.argsort(-feat_max_sim, axis=1)
    feat_avg_sort = np.argsort(-feat_avg_sim, axis=1)
    feat_max_avg_sort = np.argsort(-feat_max_avg_sim, axis=1)

    pd.DataFrame(feat_max_sort).to_csv("./track3-feats/24689/sub1/feat_max_pred_{}.csv".format(i))
    pd.DataFrame(feat_avg_sort).to_csv("./track3-feats/24689/sub1/feat_avg_pred_{}.csv".format(i))
    pd.DataFrame(feat_max_avg_sort).to_csv("./track3-feats/24689/sub1/feat_max_avg_pred_{}.csv".format(i))

feat_max_mean_sort = np.argsort(-feat_max_sim_mean, axis=1)
feat_avg_mean_sort = np.argsort(-feat_avg_sim_mean, axis=1)
feat_max_avg_mean_sort = np.argsort(-feat_max_avg_sim_mean, axis=1)

pd.DataFrame(feat_max_mean_sort).to_csv("./track3-feats/24689/sub1/feat_max_pred.csv")
pd.DataFrame(feat_avg_mean_sort).to_csv("./track3-feats/24689/sub1/feat_avg_pred.csv")
pd.DataFrame(feat_max_avg_mean_sort).to_csv("./track3-feats/24689/sub1/feat_max_avg_pred.csv")

In [None]:
import h5py
import pandas as pd
import re

test_gallery_path = "./track3-data/test-data/gallery/gallery_faces/"
test_probes_path = "./track3-data/test-data/probes/probe_subjects/"

def sort_key(s):
    if s:
        try:
            c = re.findall('\d+', s)[0]
        except:
            c = -1
        return int(c)

probes_id = os.listdir(test_probes_path)
probes_id.sort(key=sort_key)
    
nums_gallery = 3897
nums_probes_id = 190

def read_feature(filepath):
    h5f = h5py.File(filepath, 'r')
    feat_max = h5f['feat_max'][:]
    feat_avg = h5f['feat_avg'][:]
    feat_max_avg = h5f['feat_max_avg'][:]
    h5f.close()
    return feat_max, feat_avg, feat_max_avg

def calu_sim(feat_probes, feat_gallery):
    return np.dot(feat_probes, feat_gallery.T)

feat_max_sim_mean = np.zeros((nums_probes_id, nums_gallery))
feat_avg_sim_mean = np.zeros((nums_probes_id, nums_gallery))
feat_max_avg_sim_mean = np.zeros((nums_probes_id, nums_gallery))

for idx in range(5):
    feat_gallery_max, feat_gallery_avg, feat_gallery_max_avg = read_feature("./track3-feats/24689/gallery/feat_gallery_{}.h5".format(idx))

    feat_max_sim = np.zeros((1, nums_gallery))
    feat_avg_sim = np.zeros((1, nums_gallery))
    feat_max_avg_sim = np.zeros((1, nums_gallery))

    for k in range(nums_probes_id):
        feat_probes_max, feat_probes_avg, feat_probes_max_avg = read_feature("./track3-feats/24689/probes/feat_probes_{}/feat_probes_{}.h5".format(idx, probes_id[k]))
        feat_max_sim = np.concatenate((feat_max_sim, np.mean(calu_sim(feat_probes_max, feat_gallery_max), axis=0)[np.newaxis,:]), axis=0)
        feat_avg_sim = np.concatenate((feat_avg_sim, np.mean(calu_sim(feat_probes_avg, feat_gallery_avg), axis=0)[np.newaxis,:]), axis=0)
        feat_max_avg_sim = np.concatenate((feat_max_avg_sim, np.mean(calu_sim(feat_probes_max_avg, feat_gallery_max_avg), axis=0)[np.newaxis,:]), axis=0)
#     print(feat_max_sim.shape)
    feat_max_sim_mean += feat_max_sim[1:]/5.0
    feat_avg_sim_mean += feat_avg_sim[1:]/5.0
    feat_max_avg_sim_mean += feat_max_avg_sim[1:]/5.0

    feat_max_sort = np.argsort(-feat_max_sim[1:], axis=1)
    feat_avg_sort = np.argsort(-feat_avg_sim[1:], axis=1)
    feat_max_avg_sort = np.argsort(-feat_max_avg_sim[1:], axis=1)

    pd.DataFrame(feat_max_sort).to_csv("./track3-feats/24689/sub2/feat_max_pred_{}.csv".format(idx))
    pd.DataFrame(feat_avg_sort).to_csv("./track3-feats/24689/sub2/feat_avg_pred_{}.csv".format(i))
    pd.DataFrame(feat_max_avg_sort).to_csv("./track3-feats/24689/sub2/feat_max_avg_pred_{}.csv".format(i))

feat_max_mean_sort = np.argsort(-feat_max_sim_mean, axis=1)
feat_avg_mean_sort = np.argsort(-feat_avg_sim_mean, axis=1)
feat_max_avg_mean_sort = np.argsort(-feat_max_avg_sim_mean, axis=1)

pd.DataFrame(feat_max_mean_sort).to_csv("./track3-feats/24689/sub2/feat_max_pred.csv")
pd.DataFrame(feat_avg_mean_sort).to_csv("./track3-feats/24689/sub2/feat_avg_pred.csv")
pd.DataFrame(feat_max_avg_mean_sort).to_csv("./track3-feats/24689/sub2/feat_max_avg_pred.csv")