# Aggregate evaluation metrics across random seeds

In [1]:
import os
import torch
import numpy as np

proj_dir = os.path.dirname(os.getcwd())
exp_dir = os.path.join(proj_dir, 'output')

In [2]:
def aggregate_results_from_training(exp_name, exp_dir, target_name):
    
    report_list = []
    rs_list = []
    for dir in os.listdir(exp_dir):
        if dir.startswith('{}_rs'.format(exp_name)):
            random_seed = os.path.splitext(dir)[0][-1]
            print('random seed {}'.format(random_seed))
            exp_subdir = os.path.join(exp_dir, dir, 'training')
            this_exp_name = '{}_rs{}'.format(exp_name, random_seed)
            metrics_fn = os.path.join(exp_subdir, '{}_metrics.pt'.format(this_exp_name))
            report = torch.load(metrics_fn, map_location='cpu')
            print('{} epochs'.format(len(report['val_reports'])))
            report_list.append(report['val_reports'][-1][target_name])
            rs_list.append(random_seed)
    if len(report_list) > 0:
        print(exp_name)
        if target_name == 'target_multitemp':
            for color_set in ['overall_gray', 'overall_rgb', 'overall']:
                print(color_set)
                try:
                    f1_list = [r['seg'][color_set]['f1-score']*100 for r in report_list]
                except KeyError:
                    f1_list = [r['seg'][color_set]['F']['f1-score']*100 for r in report_list]
                m_f1, std_f1 = np.mean(f1_list), np.std(f1_list)

                try:
                    iou_list = [r['seg'][color_set]['iou']*100 for r in report_list]
                except KeyError:
                    iou_list = [r['seg'][color_set]['F']['iou']*100 for r in report_list]
                m_iou, std_iou = np.mean(iou_list), np.std(iou_list)

                try:
                    f1c_list = [r['seg_contours'][color_set]['f1-score']*100 for r in report_list]
                except KeyError:
                    f1c_list = [r['seg_contours'][color_set]['F']['f1-score']*100 for r in report_list]
                m_f1c, std_f1c = np.mean(f1c_list), np.std(f1c_list)
                
                print('iou \t{:0.1f} +- {:0.1f}'.format(m_iou, std_iou))
                print('f1: \t{:0.1f} +- {:0.1f}'.format(m_f1, std_f1))
                print('f1c: \t{:0.1f} +- {:0.1f}'.format(m_f1c, std_f1c))
                
                if color_set == 'overall':
                    aggr_metric_list = [(f1 + f1c)/2 for f1, f1c in zip(f1_list, f1c_list)]
                    best_rs = rs_list[np.argmax(aggr_metric_list)]
                    print('best random seed: {}'.format(best_rs))
                    
        else:
            f1_list = [r['seg']['f1-score']*100 for r in report_list]
            m_f1, std_f1 = np.mean(f1_list), np.std(f1_list)

            iou_list = [r['seg']['iou']*100 for r in report_list]
            m_iou, std_iou = np.mean(iou_list), np.std(iou_list)

            f1c_list = [r['seg_contours']['f1-score']*100 for r in report_list]
            m_f1c, std_f1c = np.mean(f1c_list), np.std(f1c_list)
            print('iou \t{:0.1f} +- {:0.1f}'.format(m_iou, std_iou))
            print('f1: \t{:0.1f} +- {:0.1f}'.format(m_f1, std_f1))
            print('f1c: \t{:0.1f} +- {:0.1f}'.format(m_f1c, std_f1c))
            
            aggr_metric_list = [(f1 + f1c)/2 for f1, f1c in zip(f1_list, f1c_list)]
            best_rs = rs_list[np.argmax(aggr_metric_list)]
            print('best random seed: {}'.format(best_rs))
    else:
        print('no experiments found')

In [3]:
def aggregate_results_from_inference(exp_name, exp_dir, epoch, set, target_name):
    
    report_list = []
    rs_list = []
    for dir in os.listdir(exp_dir):
        if dir.startswith('{}_rs'.format(exp_name)):
            random_seed = os.path.splitext(dir)[0][-1]
            print('random seed {}'.format(random_seed))
            exp_subdir = os.path.join(exp_dir, dir, 'inference', 'epoch_{}'.format(epoch), set)
            this_exp_name = '{}_rs{}'.format(exp_name, random_seed)
            metrics_fn = os.path.join(exp_subdir, '{}_metrics.pt'.format(this_exp_name))
            report = torch.load(metrics_fn, map_location='cpu')
            try:
                report_list.append(report['val_reports'][target_name])
            except KeyError:
                report_list.append(report['reports'][target_name])
            rs_list.append(random_seed)
    if len(report_list) > 0:
        print(exp_name)
        if target_name == 'target_multitemp':
            for color_set in ['overall_gray', 'overall_rgb', 'overall']:
                print(color_set)
                try:
                    try:
                        f1_list = [r['seg'][color_set]['f1-score']*100 for r in report_list]
                    except KeyError:
                        f1_list = [r['seg'][color_set]['F']['f1-score']*100 for r in report_list]
                    m_f1, std_f1 = np.mean(f1_list), np.std(f1_list)

                    try:
                        iou_list = [r['seg'][color_set]['iou']*100 for r in report_list]
                    except KeyError:
                        iou_list = [r['seg'][color_set]['F']['iou']*100 for r in report_list]
                    m_iou, std_iou = np.mean(iou_list), np.std(iou_list)

                    try:
                        f1c_list = [r['seg_contours'][color_set]['f1-score']*100 for r in report_list]
                    except KeyError:
                        f1c_list = [r['seg_contours'][color_set]['F']['f1-score']*100 for r in report_list]
                    m_f1c, std_f1c = np.mean(f1c_list), np.std(f1c_list)
                    
                    print('iou \t{:0.1f} +- {:0.1f}'.format(m_iou, std_iou))
                    print('f1: \t{:0.1f} +- {:0.1f}'.format(m_f1, std_f1))
                    print('f1c: \t{:0.1f} +- {:0.1f}'.format(m_f1c, std_f1c))
                except KeyError:
                    print('Not found')
                if color_set == 'overall':
                    aggr_metric_list = [(f1 + f1c)/2 for f1, f1c in zip(f1_list, f1c_list)]
                    best_rs = rs_list[np.argmax(aggr_metric_list)]
                    print('best random seed: {}'.format(best_rs))
        else:
            f1_list = [r['seg']['f1-score']*100 for r in report_list]
            m_f1, std_f1 = np.mean(f1_list), np.std(f1_list)

            iou_list = [r['seg']['iou']*100 for r in report_list]
            m_iou, std_iou = np.mean(iou_list), np.std(iou_list)

            f1c_list = [r['seg_contours']['f1-score']*100 for r in report_list]
            m_f1c, std_f1c = np.mean(f1c_list), np.std(f1c_list)
            print('iou \t{:0.1f} +- {:0.1f}'.format(m_iou, std_iou))
            print('f1: \t{:0.1f} +- {:0.1f}'.format(m_f1, std_f1))
            print('f1c: \t{:0.1f} +- {:0.1f}'.format(m_f1c, std_f1c))
            best_rs = rs_list[np.argmax(aggr_metric_list)]
            print('best random seed: {}'.format(best_rs))
    else:
        print('no experiments found')

In [4]:
pretrain_epoch = 19
test_epoch = 39

val_set = 'mylabels_val'
test_set = 'mylabels_test'

# GRU vs IRREgGRU

## validation

In [5]:
exp_name_list = ['NRUnet7df_bwrd_freeze0_lrfe1em06_lrtemp0_0001_tlossCE_asymgraddot_ltemp1_0_1_0',
                 'GRU7df_bwrd_freeze0_lrfe1em06_lrtemp0_0001_tlossCE_asymgraddot_ltemp1_0_1_0',
                        'NIrregGRU7df_bwrd_freeze0_lrfe1em06_lrtemp0_0001_tlossCE_asymgraddot_ltemp1_0_1_0']
for exp_name in exp_name_list:
    aggregate_results_from_training(exp_name, exp_dir, 'target_multitemp')

random seed 0
40 epochs
random seed 1
40 epochs
random seed 2


40 epochs
random seed 3
40 epochs
random seed 4
40 epochs
NRUnet7df_bwrd_freeze0_lrfe1em06_lrtemp0_0001_tlossCE_asymgraddot_ltemp1_0_1_0
overall_gray
iou 	74.4 +- 0.7
f1: 	85.3 +- 0.5
f1c: 	63.1 +- 3.0
overall_rgb
iou 	86.8 +- 0.1
f1: 	93.0 +- 0.0
f1c: 	79.7 +- 0.8
overall
iou 	79.1 +- 0.4
f1: 	88.3 +- 0.3
f1c: 	70.0 +- 2.0
best random seed: 2
random seed 0
40 epochs
random seed 1
40 epochs
random seed 2
40 epochs
random seed 3
40 epochs
random seed 4
40 epochs
GRU7df_bwrd_freeze0_lrfe1em06_lrtemp0_0001_tlossCE_asymgraddot_ltemp1_0_1_0
overall_gray
iou 	79.9 +- 1.0
f1: 	88.8 +- 0.6
f1c: 	78.2 +- 0.3
overall_rgb
iou 	87.3 +- 0.4
f1: 	93.2 +- 0.3
f1c: 	82.3 +- 0.3
overall
iou 	82.7 +- 0.8
f1: 	90.5 +- 0.5
f1c: 	79.8 +- 0.2
best random seed: 0
random seed 0
40 epochs
random seed 1
40 epochs
random seed 2
40 epochs
random seed 3
40 epochs
random seed 4
40 epochs
NIrregGRU7df_bwrd_freeze0_lrfe1em06_lrtemp0_0001_tlossCE_asymgraddot_ltemp1_0_1_0
overall_gray
iou 	78.3 +- 1.8
f1: 	87.8 +- 1.1


## test

In [6]:
exp_name_list = ['NRUnet7df_bwrd_freeze0_lrfe1em06_lrtemp0_0001_tlossCE_asymgraddot_ltemp1_0_1_0',
                 'GRU7df_bwrd_freeze0_lrfe1em06_lrtemp0_0001_tlossCE_asymgraddot_ltemp1_0_1_0',
                        'NIrregGRU7df_bwrd_freeze0_lrfe1em06_lrtemp0_0001_tlossCE_asymgraddot_ltemp1_0_1_0']
for exp_name in exp_name_list:
    aggregate_results_from_inference(exp_name, exp_dir, test_epoch, test_set, 'target_multitemp')

random seed 0
random seed 1
random seed 2
random seed 3
random seed 4
NRUnet7df_bwrd_freeze0_lrfe1em06_lrtemp0_0001_tlossCE_asymgraddot_ltemp1_0_1_0
overall_gray
iou 	73.5 +- 1.4
f1: 	84.7 +- 0.9
f1c: 	72.1 +- 2.4
overall_rgb
iou 	87.5 +- 0.4
f1: 	93.3 +- 0.2
f1c: 	81.6 +- 0.3
overall
iou 	79.1 +- 0.7
f1: 	88.4 +- 0.4
f1c: 	75.5 +- 1.6
best random seed: 2
random seed 0
random seed 1
random seed 2
random seed 3
random seed 4
GRU7df_bwrd_freeze0_lrfe1em06_lrtemp0_0001_tlossCE_asymgraddot_ltemp1_0_1_0
overall_gray
iou 	76.9 +- 1.9
f1: 	86.9 +- 1.2
f1c: 	80.1 +- 0.8
overall_rgb
iou 	85.5 +- 0.3
f1: 	92.2 +- 0.2
f1c: 	81.5 +- 0.2
overall
iou 	80.2 +- 1.3
f1: 	89.0 +- 0.8
f1c: 	80.6 +- 0.6
best random seed: 1
random seed 0
random seed 1
random seed 2
random seed 3
random seed 4
NIrregGRU7df_bwrd_freeze0_lrfe1em06_lrtemp0_0001_tlossCE_asymgraddot_ltemp1_0_1_0
overall_gray
iou 	76.0 +- 1.2
f1: 	86.3 +- 0.8
f1c: 	81.0 +- 0.5
overall_rgb
iou 	83.0 +- 1.9
f1: 	90.7 +- 1.1
f1c: 	80.6 +- 0.8
overal

# Pretraining (Unet)

## validation, TLM

In [11]:
for exp_name in ['Unet_SI2020gray_100cm', 'Unet_SI2020_100cm_noaugment', 'Unet_SI2020_100cm_grayaugment']:
    aggregate_results_from_training(exp_name, exp_dir, 'target_tlm')

random seed 0
20 epochs
random seed 1
20 epochs
random seed 2
20 epochs
random seed 3
20 epochs
random seed 4
20 epochs
Unet_SI2020gray_100cm
iou 	76.0 +- 0.4
f1: 	86.4 +- 0.3
f1c: 	68.5 +- 1.1
best random seed: 4
random seed 0
20 epochs
random seed 1
20 epochs
random seed 2
20 epochs
random seed 3
20 epochs
random seed 4
20 epochs
Unet_SI2020_100cm_noaugment
iou 	82.9 +- 0.7
f1: 	90.7 +- 0.4
f1c: 	79.3 +- 0.7
best random seed: 4
random seed 0
20 epochs
random seed 1
20 epochs
random seed 2
20 epochs
random seed 3
20 epochs
random seed 4
20 epochs
Unet_SI2020_100cm_grayaugment
iou 	81.5 +- 0.4
f1: 	89.8 +- 0.2
f1c: 	76.6 +- 0.6
best random seed: 1


## validation

In [12]:
for exp_name in ['Unet_SI2020gray_100cm', 'Unet_SI2020_100cm_noaugment', 'Unet_SI2020_100cm_grayaugment']:
    aggregate_results_from_inference(exp_name, exp_dir, pretrain_epoch, val_set, 'target_multitemp')

random seed 0
random seed 1
random seed 2
random seed 3
random seed 4
Unet_SI2020gray_100cm
overall_gray
iou 	67.1 +- 1.5
f1: 	80.3 +- 1.1
f1c: 	51.4 +- 3.0
overall_rgb
iou 	80.7 +- 1.3
f1: 	89.3 +- 0.8
f1c: 	64.2 +- 2.7
overall
iou 	72.2 +- 1.0
f1: 	83.8 +- 0.6
f1c: 	56.5 +- 2.4
best random seed: 4
random seed 0
random seed 1
random seed 2
random seed 3
random seed 4
Unet_SI2020_100cm_noaugment
overall_gray
iou 	71.1 +- 0.9
f1: 	83.1 +- 0.6
f1c: 	57.3 +- 2.5
overall_rgb
iou 	82.6 +- 0.9
f1: 	90.5 +- 0.5
f1c: 	71.6 +- 1.3
overall
iou 	75.4 +- 0.9
f1: 	86.0 +- 0.6
f1c: 	63.0 +- 1.6
best random seed: 4
random seed 0
random seed 1
random seed 2
random seed 3
random seed 4
Unet_SI2020_100cm_grayaugment
overall_gray
iou 	72.2 +- 1.2
f1: 	83.9 +- 0.8
f1c: 	53.4 +- 3.5
overall_rgb
iou 	84.2 +- 0.7
f1: 	91.4 +- 0.4
f1c: 	72.1 +- 0.4
overall
iou 	76.8 +- 0.5
f1: 	86.9 +- 0.3
f1c: 	61.1 +- 1.9
best random seed: 4


## test

In [13]:
for exp_name in ['Unet_SI2020gray_100cm', 'Unet_SI2020_100cm_noaugment', 'Unet_SI2020_100cm_grayaugment']:
    aggregate_results_from_inference(exp_name, exp_dir, pretrain_epoch, test_set, 'target_multitemp')

random seed 0
random seed 1
random seed 2
random seed 3
random seed 4
Unet_SI2020gray_100cm
overall_gray
iou 	66.5 +- 2.2
f1: 	79.8 +- 1.6
f1c: 	60.1 +- 3.5
overall_rgb
iou 	80.2 +- 0.9
f1: 	89.0 +- 0.6
f1c: 	62.5 +- 2.1
overall
iou 	71.9 +- 1.6
f1: 	83.6 +- 1.1
f1c: 	60.9 +- 2.9
best random seed: 4
random seed 0
random seed 1
random seed 2
random seed 3
random seed 4
Unet_SI2020_100cm_noaugment
overall_gray
iou 	68.6 +- 0.9
f1: 	81.4 +- 0.6
f1c: 	66.3 +- 1.6
overall_rgb
iou 	86.3 +- 0.6
f1: 	92.6 +- 0.4
f1c: 	76.9 +- 1.0
overall
iou 	75.7 +- 0.3
f1: 	86.2 +- 0.2
f1c: 	70.0 +- 0.9
best random seed: 0
random seed 0
random seed 1
random seed 2
random seed 3
random seed 4
Unet_SI2020_100cm_grayaugment
overall_gray
iou 	67.2 +- 2.1
f1: 	80.3 +- 1.5
f1c: 	62.2 +- 3.0
overall_rgb
iou 	86.0 +- 1.0
f1: 	92.5 +- 0.6
f1c: 	74.7 +- 1.0
overall
iou 	74.8 +- 0.9
f1: 	85.6 +- 0.6
f1c: 	66.7 +- 1.7
best random seed: 4


# Temporal losses

## GRUUnet

### validation

In [14]:
exp_name_list = ['GRU7df_bwrd_freeze0_lrfe1em06_lrtemp0_0001_tlossCE_ltemp2_0',
                    'GRU7df_bwrd_freeze0_lrfe1em06_lrtemp0_0001_tlossMSE_graddot_ltemp1_0_1_0',
                    'GRU7df_bwrd_freeze0_lrfe1em06_lrtemp0_0001_tlossCE_asymgraddot_ltemp1_0_1_0']
for exp_name in exp_name_list:
    aggregate_results_from_training(exp_name, exp_dir, 'target_multitemp')

random seed 0
40 epochs
random seed 1
40 epochs
random seed 2
40 epochs
random seed 3
40 epochs
random seed 4
40 epochs
GRU7df_bwrd_freeze0_lrfe1em06_lrtemp0_0001_tlossCE_ltemp2_0
overall_gray
iou 	80.4 +- 0.8
f1: 	89.1 +- 0.5
f1c: 	73.8 +- 2.3
overall_rgb
iou 	88.6 +- 0.2
f1: 	93.9 +- 0.1
f1c: 	81.7 +- 1.1
overall
iou 	83.5 +- 0.5
f1: 	91.0 +- 0.3
f1c: 	76.9 +- 1.6
best random seed: 3
random seed 0
40 epochs
random seed 1
40 epochs
random seed 2
40 epochs
random seed 3


40 epochs
random seed 4
40 epochs
GRU7df_bwrd_freeze0_lrfe1em06_lrtemp0_0001_tlossMSE_graddot_ltemp1_0_1_0
overall_gray
iou 	80.0 +- 1.4
f1: 	88.9 +- 0.8
f1c: 	73.6 +- 2.1
overall_rgb
iou 	86.5 +- 1.0
f1: 	92.7 +- 0.6
f1c: 	79.0 +- 1.9
overall
iou 	82.4 +- 1.0
f1: 	90.4 +- 0.6
f1c: 	75.7 +- 2.0
best random seed: 2
random seed 0
40 epochs
random seed 1
40 epochs
random seed 2
40 epochs
random seed 3
40 epochs
random seed 4
40 epochs
GRU7df_bwrd_freeze0_lrfe1em06_lrtemp0_0001_tlossCE_asymgraddot_ltemp1_0_1_0
overall_gray
iou 	79.9 +- 1.0
f1: 	88.8 +- 0.6
f1c: 	78.2 +- 0.3
overall_rgb
iou 	87.3 +- 0.4
f1: 	93.2 +- 0.3
f1c: 	82.3 +- 0.3
overall
iou 	82.7 +- 0.8
f1: 	90.5 +- 0.5
f1c: 	79.8 +- 0.2
best random seed: 0


### test

In [15]:
exp_name_list = ['GRU7df_bwrd_freeze0_lrfe1em06_lrtemp0_0001_tlossCE_ltemp2_0',
                    'GRU7df_bwrd_freeze0_lrfe1em06_lrtemp0_0001_tlossMSE_graddot_ltemp1_0_1_0',
                    'GRU7df_bwrd_freeze0_lrfe1em06_lrtemp0_0001_tlossCE_asymgraddot_ltemp1_0_1_0']
for exp_name in exp_name_list:
    aggregate_results_from_inference(exp_name, exp_dir, test_epoch, test_set, 'target_multitemp')

random seed 0
random seed 1
random seed 2
random seed 3
random seed 4
GRU7df_bwrd_freeze0_lrfe1em06_lrtemp0_0001_tlossCE_ltemp2_0
overall_gray
iou 	79.4 +- 0.5
f1: 	88.5 +- 0.3
f1c: 	78.7 +- 1.2
overall_rgb
iou 	87.0 +- 0.8
f1: 	93.0 +- 0.5
f1c: 	81.1 +- 0.6
overall
iou 	82.4 +- 0.6
f1: 	90.3 +- 0.4
f1c: 	79.5 +- 1.0
best random seed: 3
random seed 0
random seed 1
random seed 2
random seed 3
random seed 4
GRU7df_bwrd_freeze0_lrfe1em06_lrtemp0_0001_tlossMSE_graddot_ltemp1_0_1_0
overall_gray
iou 	75.2 +- 1.8
f1: 	85.9 +- 1.1
f1c: 	75.6 +- 1.6
overall_rgb
iou 	85.4 +- 0.6
f1: 	92.1 +- 0.3
f1c: 	78.6 +- 1.8
overall
iou 	79.2 +- 1.3
f1: 	88.4 +- 0.8
f1c: 	76.6 +- 1.6
best random seed: 0
random seed 0
random seed 1
random seed 2
random seed 3
random seed 4
GRU7df_bwrd_freeze0_lrfe1em06_lrtemp0_0001_tlossCE_asymgraddot_ltemp1_0_1_0
overall_gray
iou 	76.9 +- 1.9
f1: 	86.9 +- 1.2
f1c: 	80.1 +- 0.8
overall_rgb
iou 	85.5 +- 0.3
f1: 	92.2 +- 0.2
f1c: 	81.5 +- 0.2
overall
iou 	80.2 +- 1.3
f1: 	89.0

## IrregGRUUnet

### validation

In [16]:
exp_name_list = ['NIrregGRU7df_bwrd_freeze0_lrfe1em06_lrtemp0_0001_tlossMSE_ltemp2_0',
                 'NIrregGRU7df_bwrd_freeze0_lrfe1em06_lrtemp0_0001_tlossCE_ltemp2_0',                
                 'NIrregGRU7df_bwrd_freeze0_lrfe1em06_lrtemp0_0001_tlossnone_asymgraddot_ltemp0_0_2_0',
                    'NIrregGRU7df_bwrd_freeze0_lrfe1em06_lrtemp0_0001_tlossMSE_asymgraddot_ltemp1_0_1_0',
                    'NIrregGRU7df_bwrd_freeze0_lrfe1em06_lrtemp0_0001_tlossCE_asymgraddot_ltemp1_0_1_0']
for exp_name in exp_name_list:
    aggregate_results_from_training(exp_name, exp_dir, 'target_multitemp')

random seed 0
40 epochs
random seed 1
40 epochs
random seed 2
40 epochs
random seed 3
40 epochs
random seed 4
40 epochs
NIrregGRU7df_bwrd_freeze0_lrfe1em06_lrtemp0_0001_tlossMSE_ltemp2_0
overall_gray
iou 	70.6 +- 1.2
f1: 	82.7 +- 0.8
f1c: 	55.3 +- 3.6
overall_rgb
iou 	82.3 +- 8.2
f1: 	90.0 +- 5.3
f1c: 	75.0 +- 3.5
overall
iou 	75.0 +- 3.8
f1: 	85.6 +- 2.5
f1c: 	63.7 +- 2.0
best random seed: 2
random seed 0
40 epochs
random seed 1
40 epochs
random seed 2
40 epochs
random seed 3
40 epochs
random seed 4
40 epochs
NIrregGRU7df_bwrd_freeze0_lrfe1em06_lrtemp0_0001_tlossCE_ltemp2_0
overall_gray
iou 	74.2 +- 0.8
f1: 	85.2 +- 0.6
f1c: 	60.1 +- 1.8
overall_rgb
iou 	87.8 +- 0.6
f1: 	93.5 +- 0.3
f1c: 	79.7 +- 1.2
overall
iou 	79.4 +- 0.7
f1: 	88.5 +- 0.5
f1c: 	68.4 +- 1.6
best random seed: 2
random seed 0
40 epochs
random seed 1
40 epochs
random seed 2
40 epochs
random seed 3
40 epochs
random seed 4
40 epochs
NIrregGRU7df_bwrd_freeze0_lrfe1em06_lrtemp0_0001_tlossnone_asymgraddot_ltemp0_0_2_0
overa

### test

In [17]:
exp_name_list = ['NIrregGRU7df_bwrd_freeze0_lrfe1em06_lrtemp0_0001_tlossMSE_ltemp2_0',
                 'NIrregGRU7df_bwrd_freeze0_lrfe1em06_lrtemp0_0001_tlossCE_ltemp2_0',
                 'NIrregGRU7df_bwrd_freeze0_lrfe1em06_lrtemp0_0001_tlossnone_asymgraddot_ltemp0_0_2_0',
                    'NIrregGRU7df_bwrd_freeze0_lrfe1em06_lrtemp0_0001_tlossMSE_asymgraddot_ltemp1_0_1_0',
                    'NIrregGRU7df_bwrd_freeze0_lrfe1em06_lrtemp0_0001_tlossCE_asymgraddot_ltemp1_0_1_0']
for exp_name in exp_name_list:
    aggregate_results_from_inference(exp_name, exp_dir, test_epoch, test_set, 'target_multitemp')

random seed 0
random seed 1
random seed 2
random seed 3
random seed 4
NIrregGRU7df_bwrd_freeze0_lrfe1em06_lrtemp0_0001_tlossMSE_ltemp2_0
overall_gray
iou 	67.8 +- 2.0
f1: 	80.8 +- 1.4
f1c: 	64.7 +- 3.2
overall_rgb
iou 	82.2 +- 8.6
f1: 	90.0 +- 5.6
f1c: 	76.8 +- 3.3
overall
iou 	73.6 +- 4.5
f1: 	84.7 +- 3.1
f1c: 	69.2 +- 1.6
best random seed: 2
random seed 0
random seed 1
random seed 2
random seed 3
random seed 4
NIrregGRU7df_bwrd_freeze0_lrfe1em06_lrtemp0_0001_tlossCE_ltemp2_0
overall_gray
iou 	72.7 +- 1.2
f1: 	84.2 +- 0.8
f1c: 	69.3 +- 2.0
overall_rgb
iou 	87.2 +- 0.4
f1: 	93.2 +- 0.2
f1c: 	80.3 +- 0.7
overall
iou 	78.5 +- 0.8
f1: 	88.0 +- 0.5
f1c: 	73.3 +- 1.5
best random seed: 2
random seed 0
random seed 1
random seed 2
random seed 3
random seed 4
NIrregGRU7df_bwrd_freeze0_lrfe1em06_lrtemp0_0001_tlossnone_asymgraddot_ltemp0_0_2_0
overall_gray
iou 	68.5 +- 4.9
f1: 	81.2 +- 3.5
f1c: 	78.2 +- 1.4
overall_rgb
iou 	73.7 +- 7.2
f1: 	84.6 +- 4.8
f1c: 	77.2 +- 1.8
overall
iou 	70.5 +- 5.5
f

# tCA ablation

## validation

In [18]:
exp_name_list = ['NIrregGRU7df_bwrd_freeze0_lrfe1em06_lrtemp0_0001_tlossCE_asymgraddot_ltemp1_0_1_0',
                 'NIrregGRU7df_bwrd_freeze0_lrfe1em06_lrtemp0_0001_tlossCE_unscaledgraddot_ltemp1_0_1_0']
for exp_name in exp_name_list:
    aggregate_results_from_training(exp_name, exp_dir, 'target_multitemp')

random seed 0


40 epochs
random seed 1
40 epochs
random seed 2
40 epochs
random seed 3
40 epochs
random seed 4
40 epochs
NIrregGRU7df_bwrd_freeze0_lrfe1em06_lrtemp0_0001_tlossCE_asymgraddot_ltemp1_0_1_0
overall_gray
iou 	78.3 +- 1.8
f1: 	87.8 +- 1.1
f1c: 	78.9 +- 0.5
overall_rgb
iou 	85.5 +- 1.5
f1: 	92.2 +- 0.9
f1c: 	82.7 +- 0.4
overall
iou 	81.0 +- 1.7
f1: 	89.5 +- 1.0
f1c: 	80.3 +- 0.4
best random seed: 1
random seed 0
40 epochs
random seed 1
40 epochs
random seed 2
40 epochs
random seed 3
40 epochs
random seed 4
40 epochs
NIrregGRU7df_bwrd_freeze0_lrfe1em06_lrtemp0_0001_tlossCE_unscaledgraddot_ltemp1_0_1_0
overall_gray
iou 	77.5 +- 1.4
f1: 	87.3 +- 0.9
f1c: 	75.8 +- 3.3
overall_rgb
iou 	86.0 +- 1.0
f1: 	92.5 +- 0.6
f1c: 	82.3 +- 0.8
overall
iou 	80.7 +- 1.0
f1: 	89.3 +- 0.6
f1c: 	78.4 +- 2.0
best random seed: 1


## test

In [19]:
exp_name_list = ['NIrregGRU7df_bwrd_freeze0_lrfe1em06_lrtemp0_0001_tlossCE_asymgraddot_ltemp1_0_1_0',
                 'NIrregGRU7df_bwrd_freeze0_lrfe1em06_lrtemp0_0001_tlossCE_unscaledgraddot_ltemp1_0_1_0']
for exp_name in exp_name_list:
    aggregate_results_from_inference(exp_name, exp_dir, test_epoch, test_set, 'target_multitemp')

random seed 0
random seed 1
random seed 2
random seed 3
random seed 4
NIrregGRU7df_bwrd_freeze0_lrfe1em06_lrtemp0_0001_tlossCE_asymgraddot_ltemp1_0_1_0
overall_gray
iou 	76.0 +- 1.2
f1: 	86.3 +- 0.8
f1c: 	81.0 +- 0.5
overall_rgb
iou 	83.0 +- 1.9
f1: 	90.7 +- 1.1
f1c: 	80.6 +- 0.8
overall
iou 	78.7 +- 1.4
f1: 	88.1 +- 0.8
f1c: 	80.9 +- 0.3
best random seed: 1
random seed 0
random seed 1
random seed 2
random seed 3
random seed 4
NIrregGRU7df_bwrd_freeze0_lrfe1em06_lrtemp0_0001_tlossCE_unscaledgraddot_ltemp1_0_1_0
overall_gray
iou 	76.0 +- 2.3
f1: 	86.3 +- 1.5
f1c: 	79.2 +- 2.2
overall_rgb
iou 	83.3 +- 1.1
f1: 	90.9 +- 0.6
f1c: 	80.0 +- 0.6
overall
iou 	78.9 +- 1.7
f1: 	88.2 +- 1.1
f1c: 	79.5 +- 1.5
best random seed: 2
