In [1]:
%reload_ext autoreload
%autoreload 2

import os
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

import numpy as np
import pandas as pd

log_dir = "../logs/emb/"

In [2]:
def display_wrapper(model_name, epoc, log_dir=log_dir, test_seq_len='1 3 5 9 11 19', _return=False):
    emb_dir = os.path.join(log_dir, model_name, str(epoc))
    print(model_name, epoc)
    if _return:
        return display_results(emb_dir, test_seq_len, _return=True)
    else:
        display_results(emb_dir, test_seq_len, _return=False)

def display_results(emb_dir, test_seq_len='1 3 5 9 11 19', _return=False):

    assert os.path.isdir(emb_dir), "emb_dir not found: {}".format(emb_dir)

    test_seq_len = np.asarray(list(map(int, test_seq_len.split())))

    raw_score = np.load(os.path.join(emb_dir, "raw_score.npy"))

    top1_exact, top1_near, top3_exact, top10_exact = np.split(raw_score, 4, axis=1)
    top1_exact_rate = 100. * np.mean(top1_exact, axis=0)
    top1_near_rate = 100. * np.mean(top1_near, axis=0)
    top3_exact_rate = 100. * np.mean(top3_exact, axis=0)
    top10_exact_rate = 100. * np.mean(top10_exact, axis=0)
    all_metrics = np.stack((top1_exact_rate, top1_near_rate, top3_exact_rate, top10_exact_rate), axis=0)

    df = pd.DataFrame(all_metrics, columns=test_seq_len, index=["top1_exact", "top1_near", "top3_exact", "top10_exact"])

    print(df)
    print()
    if _return:
        return df, all_metrics

## Baseline

### 93k dummy_db

In [11]:
model_name = "640_lamb"
epoc = 101
display_wrapper(model_name, epoc)

model_name = "replication"
epoc = 100
display_wrapper(model_name, epoc)

640_lamb 101
                1      3      5      9      11     19
top1_exact   70.60  87.80  93.65  96.95  97.70  98.95
top1_near    72.30  88.55  93.65  96.95  97.70  98.95
top3_exact   76.00  90.10  94.55  97.70  98.25  99.45
top10_exact  78.95  90.80  95.10  98.00  98.55  99.50

replication 100
                1      3      5      9      11    19
top1_exact   68.45  88.95  94.30  97.50  98.05  99.1
top1_near    73.20  89.90  94.55  97.55  98.05  99.1
top3_exact   76.20  91.60  95.75  98.25  98.65  99.7
top10_exact  79.95  92.30  96.10  98.40  98.85  99.7



### 100k dummy_db

In [4]:
model_name = "640_lamb"
epoc = 101
display_wrapper(model_name, epoc, log_dir='../logs/emb_100k/', test_seq_len='1 3 5')

model_name = "replication"
epoc = 100
display_wrapper(model_name, epoc, log_dir='../logs/emb_100k/', test_seq_len='1 3 5')

640_lamb 101
                 1      3      5
top1_exact   53.95  68.80  73.70
top1_near    55.30  69.35  73.70
top3_exact   74.30  89.30  94.20
top10_exact  78.50  90.50  95.05

replication 100
                 1      3      5
top1_exact   51.10  67.20  72.55
top1_near    54.25  67.95  72.75
top3_exact   74.55  91.25  95.60
top10_exact  79.95  92.35  96.15



## FMA Trained Models

### Full Precision Models

#### Initial Models

In [4]:
model_name = "replication"
epoc = "100"
print(model_name, epoc)
display_results(model_name, epoc)
print()

model_name = "dB_normalized_1"
epoc = "100"
print(model_name, epoc)
display_results(model_name, epoc)
print()

model_name = "essentia_mel"
epoc = "100"
print(model_name, epoc)
display_results(model_name, epoc)
print()

model_name = "essentia_mel-norm_audio"
epoc = "100"
print(model_name, epoc)
display_results(model_name, epoc)
print()

model_name = "store_bg_ir"
epoc = "100"
print(model_name, epoc)
display_results(model_name, epoc)
print()

model_name = "correct_conv"
epoc = "100"
print(model_name, epoc)
display_results(model_name, epoc)
print()

model_name = "speed"
epoc = "100"
print(model_name, epoc)
display_results(model_name, epoc)
print()

model_name = "speed1"
epoc = "100"
print(model_name, epoc)
display_results(model_name, epoc)
print()

model_name = "single_init"
epoc = "100"
print(model_name, epoc)
display_results(model_name, epoc)
print()

model_name = "better_shuffle"
epoc = "47"
print(model_name, epoc)
display_results(model_name, epoc)
print()

model_name = "better_shuffle"
epoc = "6000"
print(model_name, epoc)
display_results(model_name, epoc)
print()

model_name = "much_better_shuffle"
epoc = "37"
print(model_name, epoc)
display_results(model_name, epoc)
print()

model_name = "long_train"
epoc = "43"
print(model_name, epoc)
display_results(model_name, epoc)
print()

model_name = "long_train"
epoc = "200"
print(model_name, epoc)
display_results(model_name, epoc)
print()

model_name = "schedule_adjust"
epoc = "41"
print(model_name, epoc)
display_results(model_name, epoc)
print()

model_name = "schedule_adjust"
epoc = "200"
print(model_name, epoc)
display_results(model_name, epoc)
print()

replication 100
                1     3      5      9      11    19
top1_exact   75.85  92.0  96.70  98.55  98.95  99.6
top1_near    82.60  93.4  97.05  98.60  98.95  99.6
top3_exact   84.30  94.9  97.80  99.15  99.30  99.8
top10_exact  87.20  95.9  98.30  99.25  99.45  99.8

dB_normalized_1 100
                1      3      5      9      11     19
top1_exact   77.30  93.40  97.05  98.85  99.15  99.55
top1_near    82.60  94.05  97.40  98.95  99.20  99.65
top3_exact   85.20  96.25  98.05  99.35  99.55  99.85
top10_exact  88.05  96.55  98.45  99.50  99.65  99.90

essentia_mel 100
                1      3      5      9      11     19
top1_exact   76.55  93.45  97.25  99.05  99.20  99.60
top1_near    82.85  94.50  97.50  99.05  99.20  99.60
top3_exact   84.90  96.15  98.50  99.40  99.55  99.85
top10_exact  87.90  97.05  98.85  99.45  99.60  99.85

essentia_mel-norm_audio 100
                1     3      5      9      11     19
top1_exact   76.75  93.6  96.85  98.90  99.25  99.60
top1_near 

#### These are the models with a bug that resulted in using less data.

In [6]:
# These models are full of bugs. Don't use them.

model_name = "fma_120"
epoc = "100"
print(model_name, epoc)
display_results(model_name, epoc, test_seq_len)
print()

model_name = "fma_128_50msec_alpha_06"
epoc = "100"
print(model_name, epoc)
display_results(model_name, epoc, test_seq_len)
print()

model_name = "fma_384"
epoc = "100"
print(model_name, epoc)
display_results(model_name, epoc, test_seq_len)
print()

fma_120 100
                1      3      5      9      11    19
top1_exact   59.10  82.65  90.60  95.20  96.60  98.7
top1_near    64.55  84.00  91.10  95.60  96.80  98.7
top3_exact   68.00  86.60  92.85  96.50  97.45  99.0
top10_exact  72.05  87.70  93.40  96.65  97.50  99.0

fma_128_50msec_alpha_06 100
                1      3      5      9      11     19
top1_exact   74.80  90.90  96.05  98.25  98.95  99.45
top1_near    78.70  92.15  96.25  98.30  98.95  99.45
top3_exact   81.75  93.85  97.25  98.90  99.40  99.70
top10_exact  84.90  94.95  97.90  98.95  99.40  99.70

fma_384 100
                1      3      5      9      11     19
top1_exact   64.40  85.75  92.65  96.10  97.00  98.75
top1_near    69.75  87.30  93.35  96.20  97.15  98.80
top3_exact   72.40  89.50  94.70  97.25  97.85  99.20
top10_exact  75.70  90.55  95.20  97.45  98.00  99.25



#### Fine models

In [4]:
model_name = "replication"
epoc = "100"
display_wrapper(model_name, epoc)

model_name = "fma-bsz_128-seg_58"
epoc = "100"
display_wrapper(model_name, epoc)

model_name = "fma-bsz_384-seg_58"
epoc = "100"
display_wrapper(model_name, epoc)

model_name = "fma-lr_1e4-no_sched"
epoc = "100"
display_wrapper(model_name, epoc)

model_name = "fma-old_sched-val_shift"
epoc = "100"
display_wrapper(model_name, epoc)

model_name = "fma-old_sched-200_epochs"
epoc = "200"
display_wrapper(model_name, epoc)

model_name = "fma-ir_250msec"
epoc = "100"
display_wrapper(model_name, epoc)

model_name = "fma-ir_500msec"
epoc = "100"
display_wrapper(model_name, epoc)

replication 100
                1      3      5      9      11    19
top1_exact   68.45  88.95  94.30  97.50  98.05  99.1
top1_near    73.20  89.90  94.55  97.55  98.05  99.1
top3_exact   76.20  91.60  95.75  98.25  98.65  99.7
top10_exact  79.95  92.30  96.10  98.40  98.85  99.7

fma-bsz_128-seg_58 100
                1      3      5      9      11     19
top1_exact   68.05  87.75  93.50  97.00  98.30  99.20
top1_near    71.40  88.40  93.65  97.00  98.35  99.20
top3_exact   74.90  90.30  95.00  98.20  98.90  99.65
top10_exact  78.85  91.25  95.60  98.35  98.95  99.65

fma-bsz_384-seg_58 100
                1      3      5     9      11     19
top1_exact   70.30  89.00  94.15  97.7  98.25  99.20
top1_near    73.50  89.70  94.40  97.7  98.25  99.20
top3_exact   76.95  91.45  95.55  98.4  99.00  99.55
top10_exact  80.10  92.25  96.10  98.6  99.20  99.65

fma-lr_1e4-no_sched 100
                1      3      5      9      11    19
top1_exact   69.30  88.50  94.25  97.05  98.10  99.5
top1_

### Mixed Precision Models

In [5]:
model_name = "fma-mixed_precision-bsz_384"
epoc = "100"
precision = "mixed_precision"
print(model_name, epoc, precision)
emb_dir = os.path.join(log_dir, model_name, epoc, precision)
display_results(emb_dir)
print()

model_name = "fma-mixed_precision-bsz_384"
epoc = "100"
precision = "float32"
print(model_name, epoc, precision)
emb_dir = os.path.join(log_dir, model_name, epoc, precision)
display_results(emb_dir)
print()

model_name = "fma-mixed_precision-bsz_512"
epoc = "100"
precision = "float32"
print(model_name, epoc, precision)
emb_dir = os.path.join(log_dir, model_name, epoc, precision)
display_results(emb_dir)
print()

model_name = "fma-mixed_precision-bsz_640"
epoc = "100"
precision = "float32"
print(model_name, epoc, precision)
emb_dir = os.path.join(log_dir, model_name, epoc, precision)
display_results(emb_dir)
print()

model_name = "fma-mixed_precision-bsz_768"
epoc = "100"
precision = "mixed_precision"
print(model_name, epoc, precision)
emb_dir = os.path.join(log_dir, model_name, epoc, precision)
display_results(emb_dir)
print()

model_name = "fma-mixed_precision-bsz_768"
epoc = "100"
precision = "float32"
print(model_name, epoc, precision)
emb_dir = os.path.join(log_dir, model_name, epoc, precision)
display_results(emb_dir)
print()

# model_name = "fma-LAMB"
# epoc = "100"
# precision = "float32"
# print(model_name, epoc, precision)
# emb_dir = os.path.join(log_dir, model_name, epoc, precision)
# display_results(emb_dir)
# print()

model_name = "fma-BG_IR_not_normalized"
epoc = "100"
precision = "float32"
print(model_name, epoc, precision)
emb_dir = os.path.join(log_dir, model_name, epoc, precision)
display_results(emb_dir)
print()

fma-mixed_precision-bsz_384 100 mixed_precision
                1      3     5      9      11     19
top1_exact   70.25  89.40  94.6  97.45  98.60  99.45
top1_near    73.50  90.25  94.8  97.45  98.60  99.45
top3_exact   77.05  92.40  96.0  98.40  99.10  99.70
top10_exact  80.10  93.00  96.5  98.60  99.25  99.70

fma-mixed_precision-bsz_384 100 float32
                1     3      5      9      11     19
top1_exact   70.50  89.7  94.75  97.65  98.65  99.45
top1_near    73.70  90.5  94.95  97.65  98.65  99.45
top3_exact   77.25  92.4  96.05  98.50  99.15  99.70
top10_exact  80.25  93.2  96.55  98.75  99.30  99.70

fma-mixed_precision-bsz_512 100 float32
                1      3      5      9      11     19
top1_exact   72.00  89.25  94.70  97.40  98.45  99.35
top1_near    75.30  89.90  94.70  97.40  98.45  99.35
top3_exact   78.45  92.00  95.85  98.45  99.10  99.70
top10_exact  81.55  92.85  96.55  98.65  99.30  99.70

fma-mixed_precision-bsz_640 100 float32
                1      3     

In [17]:
model_name = "replication"
epoc = 100
_, m_r = display_wrapper(model_name, epoc, _return=True)

model_name = "fma-mixed_precision-bsz_768"
epoc = "100"
precision = "float32"
print(model_name, epoc, precision)
emb_dir = os.path.join(log_dir, model_name, epoc, precision)
_, m_ours = display_results(emb_dir, _return=True)
print()

model_name = "640_lamb"
epoc = 101
_, m_theirs = display_wrapper(model_name, epoc, _return=True)

replication 100
                1      3      5      9      11    19
top1_exact   68.45  88.95  94.30  97.50  98.05  99.1
top1_near    73.20  89.90  94.55  97.55  98.05  99.1
top3_exact   76.20  91.60  95.75  98.25  98.65  99.7
top10_exact  79.95  92.30  96.10  98.40  98.85  99.7

fma-mixed_precision-bsz_768 100 float32
                1      3      5      9      11     19
top1_exact   72.70  89.50  94.95  97.60  98.10  99.20
top1_near    75.95  90.30  95.10  97.60  98.10  99.20
top3_exact   78.95  92.45  96.15  98.45  98.85  99.55
top10_exact  81.65  93.25  96.55  98.60  99.05  99.55


640_lamb 101
                1      3      5      9      11     19
top1_exact   70.60  87.80  93.65  96.95  97.70  98.95
top1_near    72.30  88.55  93.65  96.95  97.70  98.95
top3_exact   76.00  90.10  94.55  97.70  98.25  99.45
top10_exact  78.95  90.80  95.10  98.00  98.55  99.50



In [24]:
m_diff = m_ours - m_theirs

x = np.asarray(list(map(int, '1 3 5 9 11 19'.split())))

df = pd.DataFrame(m_diff, columns=x, index=["top1_exact", "top1_near", "top3_exact", "top10_exact"])

print(df)

               1     3     5     9    11    19
top1_exact   2.10  1.70  1.30  0.65  0.4  0.25
top1_near    3.65  1.75  1.45  0.65  0.4  0.25
top3_exact   2.95  2.35  1.60  0.75  0.6  0.10
top10_exact  2.70  2.45  1.45  0.60  0.5  0.05


## Discotube Models

### Trained on discotube (clever segmenting) evaluated on disctoube

In [11]:
model_name = "discotube_float16"
epoc = "49"
print(model_name, epoc)
display_results(model_name, epoc, test_seq_len)
print()

model_name = "discotube_float16"
epoc = "67"
print(model_name, epoc)
display_results(model_name, epoc, test_seq_len)
print()

model_name = "discotube_float16"
epoc = "100"
print(model_name, epoc)
display_results(model_name, epoc, test_seq_len)
print()

discotube_float16 49
                    1          3          5          9          11         19
top1_exact   51.386826  71.804509  79.388142  85.802563  87.537215  91.244862
top1_near    65.234041  82.753692  88.167032  92.332405  93.392030  95.483988
top3_exact   66.583886  84.416883  89.376871  93.211190  94.170603  96.000050
top10_exact  72.101604  86.360710  90.601503  93.999346  94.864172  96.496320

discotube_float16 67
                    1          3          5          9          11         19
top1_exact   89.181447  97.327603  98.530358  99.255387  99.404560  99.652903
top1_near    90.328992  97.461983  98.581611  99.271846  99.414352  99.655820
top3_exact   92.060102  97.910751  98.802244  99.349974  99.472687  99.680821
top10_exact  93.604539  98.128259  98.912249  99.408310  99.526231  99.716030

discotube_float16 100
                    1          3          5          9          11         19
top1_exact   90.358993  97.691784  98.770576  99.390809  99.519356  99.72686

### Trained on discotube eval on fma

#### Clever(!) Segmenting

In [16]:
model_name = "discotube-bsz_384-lr_1e4"
epoc = "100"
display_wrapper(model_name, epoc)

model_name = "discotube-bsz_384-old_sched"
epoc = "100"
display_wrapper(model_name, epoc)

discotube-bsz_384-lr_1e4 100
                1      3      5      9      11     19
top1_exact   32.55  59.85  71.80  82.85  85.50  92.10
top1_near    35.95  61.30  72.80  83.55  86.20  92.30
top3_exact   39.15  63.65  74.75  85.45  87.30  93.50
top10_exact  45.00  65.65  76.50  86.35  88.35  94.15

discotube-bsz_384-old_sched 100
                1      3     5      9      11     19
top1_exact   33.05  58.25  72.0  83.00  86.05  93.35
top1_near    36.55  59.70  73.3  83.40  86.65  93.60
top3_exact   38.85  62.70  75.3  84.80  88.20  94.25
top10_exact  44.45  64.70  76.6  86.05  89.20  94.95



#### Chunk Sampling

In [17]:
model_name = "discotube_chunk-old_sched"
epoc = "100"
display_wrapper(model_name, epoc)

model_name = "discotube_chunk-bsz_128"
epoc = "100"
display_wrapper(model_name, epoc)

model_name = "discotube_chunk-bsz_384"
epoc = "100"
display_wrapper(model_name, epoc)

discotube_chunk-old_sched 100
               1      3      5      9      11     19
top1_exact   32.9  62.00  74.65  85.95  88.60  93.85
top1_near    35.2  62.95  75.30  86.10  88.75  93.85
top3_exact   39.3  65.50  77.40  87.10  89.95  95.15
top10_exact  44.6  67.60  78.45  88.00  90.70  95.60

discotube_chunk-bsz_128 100
               1      3      5      9      11     19
top1_exact   27.0  57.10  70.75  82.75  86.00  91.95
top1_near    28.8  57.95  71.25  82.95  86.20  92.05
top3_exact   33.3  61.15  73.20  84.50  87.65  93.65
top10_exact  37.9  63.30  74.50  85.55  88.40  94.05

discotube_chunk-bsz_384 100
                1      3      5      9      11     19
top1_exact   29.50  58.30  72.15  83.75  87.15  92.95
top1_near    31.35  59.20  72.90  83.90  87.20  93.00
top3_exact   35.30  62.05  74.75  85.00  88.80  94.25
top10_exact  40.90  64.55  76.05  86.40  89.70  94.75

