In [1]:
from mart_controller import MART_Evaluator
from PIL import Image
import numpy as np
import pandas as pd
import os
from tqdm import tqdm
import joblib

In [2]:
IMG_DIR = '/mnt/sda/hong01-data/MART_DATA/OUTPUT_MERGED/AUTOGRAPHER'
PANDAS_DIR = '/mnt/sda/hong01-data/MART_DATA/OUTPUT_MERGED/PANDAS'

MODEL_NAME = 'EFFICIENT-B4'
CROP_SIZE = 224
CHECKPOINT = 'RUN_0_Unfreeze/EFFICIENT-B4-18082020-020739.pth.tar'
HIDDEN_SIZE = 1024
BATCH_NORM = False
    
model_info = {}
model_info['model_name'] = MODEL_NAME
model_info['crop_size'] = CROP_SIZE
model_info['checkpoint'] = CHECKPOINT
model_info['hidden_size'] = HIDDEN_SIZE
model_info['batch_norm'] = BATCH_NORM

evaluator = MART_Evaluator(model_info)

all_img_files = sorted(os.listdir(IMG_DIR))

Loaded pretrained weights for efficientnet-b4
LOAD PRETRAINED MODEL AT RUN_0_Unfreeze/EFFICIENT-B4-18082020-020739.pth.tar


## TRY THE CODE

In [24]:
TXT_FILE = 'dataset/val.txt'

loss, acc = evaluator.evaluate(IMG_DIR, TXT_FILE)
(loss, acc)

(0.8546260260045528, 0.9017341040462428)

In [25]:
img_path = f"{IMG_DIR}/1005_trainB_act17_4.jpg"
pred_lbl, probs_lbl = evaluator.test_image(img_path, props=True) # remember act = label+1

In [26]:
pred_lbl

16

In [27]:
probs_lbl

array([2.1052938e-07, 3.3188870e-05, 8.3061364e-05, 1.2945433e-06,
       8.5043384e-06, 5.5669909e-07, 2.3099426e-05, 1.2240148e-06,
       3.4870263e-06, 6.4895437e-07, 5.6167302e-04, 2.2210679e-06,
       1.0234877e-01, 6.5412465e-04, 1.4623778e-05, 7.7470195e-06,
       8.5550719e-01, 3.9434277e-02, 9.8404140e-05, 1.2157230e-03],
      dtype=float32)

In [28]:
ranking_lbl = np.argsort(probs_lbl)[::-1] # descending
ranking_score = probs_lbl[ranking_lbl]

In [18]:
ranking_lbl

array([16, 12, 17, 19, 13, 10, 18,  2,  1,  6, 14,  4, 15,  8, 11,  3,  7,
        9,  5,  0])

In [19]:
ranking_score

array([8.5550719e-01, 1.0234877e-01, 3.9434277e-02, 1.2157230e-03,
       6.5412465e-04, 5.6167302e-04, 9.8404140e-05, 8.3061364e-05,
       3.3188870e-05, 2.3099426e-05, 1.4623778e-05, 8.5043384e-06,
       7.7470195e-06, 3.4870263e-06, 2.2210679e-06, 1.2945433e-06,
       1.2240148e-06, 6.4895437e-07, 5.5669909e-07, 2.1052938e-07],
      dtype=float32)

In [29]:
probs_lbl.shape

(20,)

## MAKE THE PREDICTION 

In [7]:
dataset = 'trainA' # trainA, trainB, test
data = pd.read_csv(f"{PANDAS_DIR}/{dataset}.csv")

In [8]:
data.head(3)

Unnamed: 0.1,Unnamed: 0,sub_id,event_id,source,data_HR_activity_median,data_HR_activity_min,data_HR_activity_max,data_HR_activity_average,data_HR_activity_std,data_HR_activity_len,...,"data_AUTOGRAPHER_RESNET_max_buckeye, horse chestnut, conker",data_AUTOGRAPHER_RESNET_max_coral fungus,data_AUTOGRAPHER_RESNET_max_agaric,data_AUTOGRAPHER_RESNET_max_gyromitra,"data_AUTOGRAPHER_RESNET_max_stinkhorn, carrion fungus",data_AUTOGRAPHER_RESNET_max_earthstar,"data_AUTOGRAPHER_RESNET_max_hen-of-the-woods, hen of the woods, Polyporus frondosus, Grifola frondosa",data_AUTOGRAPHER_RESNET_max_bolete,"data_AUTOGRAPHER_RESNET_max_ear, spike, capitulum","data_AUTOGRAPHER_RESNET_max_toilet tissue, toilet paper, bathroom tissue"
0,0,1001,act01,trainA,84.507042,76.923077,100.0,84.577495,3.933112,90,...,0.000728,9e-05,0.00092,4.8e-05,0.0001,0.000287,0.000317,0.00029,0.001594,0.001767
1,1,1001,act02,trainA,83.333333,71.428571,100.0,82.63142,5.642256,89,...,0.001809,0.000228,0.002908,0.000167,0.000224,0.000654,0.000971,0.000937,0.007302,0.013306
2,2,1001,act03,trainA,83.920188,76.923077,90.909091,84.207147,2.827778,90,...,0.000996,0.000154,0.002684,0.000104,0.000201,0.000685,0.00062,0.000566,0.006093,0.00552


In [5]:
len(data)

140

In [6]:
sub_id = list(data['sub_id'])
event_id = list(data['event_id'])
task_id = [f"{x}_{y}" for x, y in zip(sub_id, event_id)]
task_id = task_id[0:10] # take the small
task_id[0:5]

['1001_act01', '1001_act02', '1001_act03', '1001_act04', '1001_act05']

In [9]:
list_probs = np.zeros((len(task_id), 20))
# start loop
for idx_task, task in tqdm(enumerate(task_id)):
    if dataset == 'trainA' or dataset == 'trainB':
        task = task.split('_')
        task = f"{task[0]}_{dataset}_{task[1]}"
    task_imgs = [x for x in all_img_files if task in x]
    task_probs = np.zeros((len(task_imgs), 20))
    for idx, img in enumerate(task_imgs):
        img_path = f"{IMG_DIR}/{img}"
        pred_lbl, probs_lbl = evaluator.test_image(img_path, props=True) # remember act = label+1
        task_probs[idx] = probs_lbl
    task_mean_probs = np.mean(task_probs, axis=0)
    list_probs[idx_task] = task_mean_probs
# end loop

10it [00:06,  1.57it/s]


## GENERATE SUBMISSION FILE

In [3]:
import joblib
import os
import numpy as np
import pandas as pd

RUN = 'test' # trainA, trainB, test
PANDAS_DIR = '/mnt/sda/hong01-data/MART_DATA/OUTPUT_MERGED/PANDAS'
data = pd.read_csv(f"{PANDAS_DIR}/{RUN}.csv")
sub_id = list(data['sub_id'])
event_id = list(data['event_id'])
task_id = [f"{x}_{y}" for x, y in zip(sub_id, event_id)]
task_id_np = np.asarray(task_id)
probs = joblib.load(f'joblib_files/act_probs_{RUN}_RUN_0_Unfreeze.joblib')

In [6]:
probs.shape

(140, 20)

In [6]:
submission = "group_id: group50 ouroboros\n"

for act in range(20):
    probs_act = probs[:, act]
    highest_subj = np.zeros(7)
    for i in range(7):
        subj_act_probs = probs_act[(i*20):((i+1)*20)]
        highest_index = np.argsort(subj_act_probs)[::-1][0]
        highest_index += (i*20)
        highest_subj[i] = highest_index
    highest_subj = highest_subj.astype(int)
    highest_subj_list = list(highest_subj)
    remain_subj_act = [x for x in range(len(task_id)) if x not in highest_subj_list]
    remain_subj_act = np.asarray(remain_subj_act)
    highest_subj_score = probs_act[highest_subj]
    remain_subj_score = probs_act[remain_subj_act]
    
    h_index = np.argsort(highest_subj_score)[::-1]
    h_map_index = highest_subj[h_index]
    r_index = np.argsort(remain_subj_score)[::-1]
    r_map_index = remain_subj_act[r_index]
    final_index = np.concatenate((h_map_index, r_map_index))
    task_ranking = task_id_np[final_index]
    task_score = probs_act[final_index]
    
    task_ranking_list = list(task_ranking)
    if act < 9:
        act_str = f"act0{act+1}"
    else:
        act_str = f"act{act+1}"
    for task in task_ranking_list:
        submission += f"{act_str} {task}\n"

submission_write = open(f"Submission/submission_{RUN}.txt", "w")
submission_write.write(submission)
submission_write.close()

print('DONE')

DONE


## DEBUG

In [1]:
import joblib
a = joblib.load('joblib_files/autographer_embeded_ft_trainA_RUN_5_Unfreeze.joblib')
b = joblib.load('joblib_files/autographer_embeded_ft_trainB_RUN_5_Unfreeze.joblib')

In [2]:
c = dict()
for key, val in a.items():
    c[key] = val
for key, val in b.items():
    c[key] = val

In [3]:
len(c)

280

In [4]:
joblib.dump(c, 'joblib_files/autographer_embeded_ft_train_RUN_5_Unfreeze.joblib')

['joblib_files/autographer_embeded_ft_train_RUN_5_Unfreeze.joblib']

In [5]:
d = joblib.load('joblib_files/tabular_embeded_ft.joblib')

In [6]:
d

{'1001_trainA_act01': array([[ 0.09277251, -0.04616039, -0.0547361 , -0.03465628,  0.13491987,
          0.11870143,  0.06134478,  0.08354767,  0.06161736, -0.01724561,
         -0.02058645, -0.01805881,  0.17465614, -0.07092412, -0.03513316,
         -0.02070904, -0.00699046,  0.05262494, -0.01424418, -0.03174572,
          0.02530257, -0.11413883,  0.08635848, -0.05502121,  0.03365318,
         -0.12806678, -0.02966033, -0.02477345,  0.03810642, -0.03811815,
          0.05405489, -0.06960399,  0.08490444, -0.19096373,  0.00778631,
         -0.01110382, -0.05193844, -0.06407069,  0.0357406 ,  0.25014937,
         -0.00696521, -0.11165149,  0.11203633, -0.05338116, -0.00982048,
         -0.14931193,  0.00251607, -0.0328962 , -0.00949273, -0.04552748,
         -0.03007336,  0.1883127 ,  0.05953154, -0.16885118,  0.10259778,
          0.10271023, -0.0107867 , -0.02256508,  0.07029059, -0.04558254,
          0.13657506, -0.09711993, -0.06363953,  0.0389973 ,  0.0302557 ,
          0.17513

In [11]:
c['1001_trainA_act01'] # AUTOGRAPHER 

{'images': ['1001_trainA_act01_0.jpg',
  '1001_trainA_act01_1.jpg',
  '1001_trainA_act01_2.jpg',
  '1001_trainA_act01_3.jpg',
  '1001_trainA_act01_4.jpg',
  '1001_trainA_act01_5.jpg',
  '1001_trainA_act01_6.jpg',
  '1001_trainA_act01_7.jpg'],
 'features': array([[-1.23040652, -0.87231106,  0.12411231, ...,  1.0644933 ,
         -0.17552121,  1.41689181],
        [-0.66738701, -0.90858388, -0.13840082, ...,  1.07988214,
         -0.70690656,  1.49018538],
        [-0.24927108, -0.75093663, -0.35898921, ...,  1.30117667,
         -1.23503482,  1.76461554],
        ...,
        [-0.74176228, -1.3586998 ,  0.15384108, ...,  1.3134861 ,
         -0.47269285,  1.57379663],
        [ 0.14448754, -1.14572382,  0.82036394, ...,  1.24496675,
         -0.59968328,  0.87706167],
        [ 0.05223045, -1.00261128,  0.80742979, ...,  1.28873229,
         -0.78592575,  0.95537794]])}

In [9]:
d['1001_trainA_act01'] # TABULAR 

array([[ 0.09277251, -0.04616039, -0.0547361 , -0.03465628,  0.13491987,
         0.11870143,  0.06134478,  0.08354767,  0.06161736, -0.01724561,
        -0.02058645, -0.01805881,  0.17465614, -0.07092412, -0.03513316,
        -0.02070904, -0.00699046,  0.05262494, -0.01424418, -0.03174572,
         0.02530257, -0.11413883,  0.08635848, -0.05502121,  0.03365318,
        -0.12806678, -0.02966033, -0.02477345,  0.03810642, -0.03811815,
         0.05405489, -0.06960399,  0.08490444, -0.19096373,  0.00778631,
        -0.01110382, -0.05193844, -0.06407069,  0.0357406 ,  0.25014937,
        -0.00696521, -0.11165149,  0.11203633, -0.05338116, -0.00982048,
        -0.14931193,  0.00251607, -0.0328962 , -0.00949273, -0.04552748,
        -0.03007336,  0.1883127 ,  0.05953154, -0.16885118,  0.10259778,
         0.10271023, -0.0107867 , -0.02256508,  0.07029059, -0.04558254,
         0.13657506, -0.09711993, -0.06363953,  0.0389973 ,  0.0302557 ,
         0.17513303, -0.05318628,  0.1393955 , -0.0