In [8]:
!pip install -r requirements.txt



In [25]:
import os
import sys
import librosa
sys.path.insert(1, os.path.join(sys.path[0], './utils'))
sys.path.insert(1, os.path.join(sys.path[0], './pytorch'))

In [26]:
import pickle
import glob
import time
import numpy as np

In [27]:
import torch
import torch.utils.data

In [28]:
from functools import partial

In [29]:
from IPython.display import clear_output

In [30]:
from evaluate import Evaluator

In [31]:
from data_generator import GtzanDataset, TrainSampler, EvaluateSampler, collate_fn

In [32]:
from config import (sample_rate, classes_num, mel_bins, fmin, fmax, window_size, hop_size, window, pad_mode, center, ref, amin, top_db)

In [33]:
from models import Transfer_Cnn14, Cnn14

In [34]:
hdf5_train_path = os.path.join(".", 'features_train', 'waveform.h5')
print(hdf5_train_path)

hdf5_public_test_path = os.path.join(".", 'features_public_test', 'waveform.h5')
print(hdf5_public_test_path)

hdf5_private_test_path = os.path.join(".", 'features_private_test', 'waveform.h5')
print(hdf5_private_test_path)

./features_train/waveform.h5
./features_public_test/waveform.h5
./features_private_test/waveform.h5


# Config

In [35]:
augmentation = "mixup" # select in ["mixup", "none"] depend on which arg you use on training

In [36]:
score_info = []

ckpt_path_template = "checkpoints/main/holdout_fold={model_fold}/Transfer_Cnn14/pretrain=True/loss_type=clip_nll/augmentation="+augmentation+"/batch_size=32/freeze_base=False/{iteration}_iterations.pth"

for model_fold in range(1, 11):
  pickle_path = f"statistics/main/holdout_fold={model_fold}/Transfer_Cnn14/pretrain=True/loss_type=clip_nll/augmentation={augmentation}/batch_size=32/freeze_base=False/statistics.pickle"
  validate_statistics = pickle.load(open(pickle_path, "rb"))["validate"]
  
  best = [*filter(lambda D:D["iteration"]%50 ==0, sorted(validate_statistics, key=lambda small_d:small_d.get("accuracy"),reverse=True))][0]
  best_iteration = best["iteration"]
  print(model_fold, best, best_iteration)
  score_info.append((best["accuracy"],model_fold))

1 {'accuracy': 0.9416666666666667, 'loss': 0.043955818, 'cm': array([[19,  0,  1,  0,  0,  0],
       [ 1, 18,  1,  0,  0,  0],
       [ 0,  3, 17,  0,  0,  0],
       [ 0,  0,  0, 20,  0,  0],
       [ 0,  0,  0,  0, 20,  0],
       [ 0,  0,  1,  0,  0, 19]]), 'iteration': 500} 500


FileNotFoundError: [Errno 2] No such file or directory: 'statistics/main/holdout_fold=2/Transfer_Cnn14/pretrain=True/loss_type=clip_nll/augmentation=mixup/batch_size=32/freeze_base=False/statistics.pickle'

In [12]:
sorted_score_info = sorted(score_info, reverse=True)
sorted_score_info

[(0.9666666666666667, 1),
 (0.9583333333333334, 5),
 (0.95, 9),
 (0.9333333333333333, 4),
 (0.925, 8),
 (0.925, 7),
 (0.925, 3),
 (0.9083333333333333, 2),
 (0.907563025210084, 10),
 (0.9, 6)]

In [25]:
all_ret = []
start_time = time.time()
ith_run = 0
for model_fold in range(1, 11):
  pickle_path = f"statistics/main/holdout_fold={model_fold}/Transfer_Cnn14/pretrain=True/loss_type=clip_nll/augmentation={augmentation}/batch_size=32/freeze_base=False/statistics.pickle"
  validate_statistics = pickle.load(open(pickle_path, "rb"))["validate"]
  
  best = [*filter(lambda D:D["iteration"]%50 ==0, sorted(validate_statistics, key=lambda small_d:small_d.get("accuracy"),reverse=True))][0]
  best_iteration = best["iteration"]
  print(model_fold, best, best_iteration)
  model = Transfer_Cnn14(sample_rate, window_size, hop_size, mel_bins, fmin, fmax, 6, False)
  
  ckpt_path = ckpt_path_template.format(model_fold=model_fold, iteration=best_iteration)
  print(ckpt_path)
  checkpoint = torch.load(ckpt_path)
  model.load_state_dict(checkpoint["model"])
  model.to("cuda")
  
  model_fold_ret = []
  all_ret.append(model_fold_ret)
  for data_fold in range(1,11):
    total_cost = time.time()-start_time
    clear_output()
    
    ith_run += 1
    if ith_run == 1:
      pass
    else:
      print("ith_run:", ith_run)
      print("total_cost:", total_cost)
      print("estimate need time:", total_cost/(ith_run-1)*(101-ith_run))
    

    print("model_fold, data_fold:", model_fold, data_fold)
    
    for hdf5_path in [hdf5_public_test_path, hdf5_private_test_path]:
      print(hdf5_path)
      dataset = GtzanDataset()
      sampler = EvaluateSampler(hdf5_path, data_fold, 32)
      loader = torch.utils.data.DataLoader(dataset=dataset, 
          batch_sampler=sampler, collate_fn=collate_fn, 
          num_workers=20, pin_memory=True)

      evaluator = Evaluator(model=model)
      ret_stats, ret = evaluator.evaluate(loader, ALSO_RETURN_RAW=True)
      model_fold_ret.append(ret)

      del evaluator
      del loader
      del sampler
      del hdf5_path
      del dataset
    
  del model
  
print("Done")

ith_run: 100
total_cost: 3678.798843860626
estimate need time: 37.159584281420464
model_fold, data_fold: 10 10
./features_public_test/waveform.h5
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
./features_private_test/waveform.h5
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
Done


In [12]:
import pickle
pickle.dump(all_ret,open("all_ret.pickle", "wb"))

NameError: name 'all_ret' is not defined

In [11]:
import pickle
all_ret_load_from_pickle = pickle.load(open("all_ret.pickle", "rb"))

In [12]:
from collections import defaultdict

In [13]:
all_ret_load_from_pickle[0][0]

{'audio_name': array(['Other.public_00001.wav', 'Other.public_00011.wav',
        'Other.public_00021.wav', 'Other.public_00031.wav',
        'Other.public_00041.wav', 'Other.public_00051.wav',
        'Other.public_00061.wav', 'Other.public_00071.wav',
        'Other.public_00081.wav', 'Other.public_00091.wav',
        'Other.public_00101.wav', 'Other.public_00111.wav',
        'Other.public_00121.wav', 'Other.public_00131.wav',
        'Other.public_00141.wav', 'Other.public_00151.wav',
        'Other.public_00161.wav', 'Other.public_00171.wav',
        'Other.public_00181.wav', 'Other.public_00191.wav',
        'Other.public_00201.wav', 'Other.public_00211.wav',
        'Other.public_00221.wav', 'Other.public_00231.wav',
        'Other.public_00241.wav', 'Other.public_00251.wav',
        'Other.public_00261.wav', 'Other.public_00271.wav',
        'Other.public_00281.wav', 'Other.public_00291.wav',
        'Other.public_00301.wav', 'Other.public_00311.wav',
        'Other.public_0032

In [14]:
DICT = defaultdict(list)
for model_ret in all_ret_load_from_pickle:
  for batch_ret in model_ret:
    for audio_name, clipwise_output in zip(batch_ret["audio_name"], batch_ret["clipwise_output"]):
      DICT[audio_name].append(clipwise_output)

In [15]:
# import tensorflow as tf

In [16]:
softmaxed_then_mean_DICT = defaultdict(list)
mean_then_softmax_DICT = defaultdict(list)
for name, values in DICT.items():
#   softmaxed_then_mean_DICT[name] = tf.reduce_mean(tf.math.softmax(np.array(values),axis=-1), axis=0)
  softmaxed_then_mean_DICT[name] = torch.mean(torch.softmax(torch.Tensor(values), dim=-1), dim=0)
#   mean_then_softmax_DICT[name] = tf.math.softmax(tf.reduce_mean(np.array(values),axis=0),axis=0)
  mean_then_softmax_DICT[name] = torch.softmax(torch.mean(torch.Tensor(values),dim=0),dim=0)

In [17]:
filename = "Other.private_{:05d}.wav".format(28)
softmaxed_then_mean_DICT[filename], mean_then_softmax_DICT[filename]

(tensor([0.0331, 0.0210, 0.0240, 0.0282, 0.2929, 0.6008]),
 tensor([0.0266, 0.0183, 0.0222, 0.0288, 0.2854, 0.6187]))

In [18]:
filename

'Other.private_00028.wav'

In [19]:
LIST = []
for filename, values in softmaxed_then_mean_DICT.items():
  LIST.append([filename.split(".")[1], *values.numpy()])

In [20]:
import pandas as pd

In [21]:
df = pd.DataFrame(data=LIST, columns=['Filename', 'Barking', 'Howling', 'Crying', 'COSmoke', 'GlassBreaking','Other'])

In [22]:
new_df = df.sort_values("Filename")

In [23]:
new_df

Unnamed: 0,Filename,Barking,Howling,Crying,COSmoke,GlassBreaking,Other
1000,private_00001,0.012656,0.734690,0.221018,0.015686,0.006698,0.009251
4000,private_00002,0.018909,0.028831,0.910148,0.012934,0.009834,0.019343
7000,private_00003,0.094384,0.447172,0.226664,0.069268,0.049381,0.113130
10000,private_00004,0.569133,0.077300,0.049376,0.039140,0.100780,0.164271
13000,private_00005,0.013612,0.028547,0.011699,0.017702,0.022819,0.905620
...,...,...,...,...,...,...,...
15999,public_09996,0.116008,0.411473,0.086713,0.062300,0.079868,0.243637
18999,public_09997,0.047685,0.717235,0.120074,0.024553,0.023918,0.066535
21999,public_09998,0.034734,0.752641,0.066897,0.019786,0.017081,0.108862
24999,public_09999,0.176721,0.306467,0.325497,0.051665,0.050279,0.089371


In [24]:
new_df.to_csv("softmax_then_mean_from_panns_transfer_to_gtzan.csv", index=False)

In [75]:
# champians = [1,5,9,4]

# champians_softmaxed_then_mean_DICT = defaultdict(list)
# champians_mean_then_softmax_DICT = defaultdict(list)

# for name, values in DICT.items():
#   champian_values = []
#   for itr, v in enumerate(values):
#     if itr+1 not in champians:
#       continue
    
#     champian_values.append(v)

#   champians_softmaxed_then_mean_DICT[name] = tf.reduce_mean(tf.math.softmax(np.array(champian_values),axis=-1), axis=0)
#   champians_mean_then_softmax_DICT[name] = tf.math.softmax(tf.reduce_mean(np.array(champian_values),axis=0),axis=0)

In [76]:
# champians_LIST = []
# for filename, values in champians_softmaxed_then_mean_DICT.items():
#   champians_LIST.append([filename.split(".")[1], *values.numpy()])

In [77]:
# champians_df = pd.DataFrame(data=champians_LIST, columns=['Filename', 'Barking', 'Howling', 'Crying', 'COSmoke', 'GlassBreaking','Other'])

In [78]:
# new_champians_df = champians_df.sort_values("Filename")

In [79]:
# new_champians_df

Unnamed: 0,Filename,Barking,Howling,Crying,COSmoke,GlassBreaking,Other
1000,private_00001,0.005678,0.779080,0.191068,0.013938,0.005418,0.004817
4000,private_00002,0.011510,0.023187,0.937773,0.008226,0.007107,0.012198
7000,private_00003,0.092910,0.408681,0.231952,0.094480,0.060650,0.111326
10000,private_00004,0.618621,0.073110,0.064102,0.032649,0.055303,0.156215
13000,private_00005,0.008307,0.014482,0.010860,0.011827,0.018847,0.935678
...,...,...,...,...,...,...,...
15999,public_09996,0.094949,0.330093,0.078828,0.086414,0.108063,0.301654
18999,public_09997,0.039045,0.774797,0.110686,0.016797,0.018654,0.040021
21999,public_09998,0.027528,0.820619,0.061985,0.013497,0.010477,0.065893
24999,public_09999,0.211755,0.295641,0.282837,0.051254,0.056513,0.101999


In [80]:
# new_champians_df.to_csv("4_champian_softmax_then_mean_from_panns_transfer_to_gtzan.csv", index=False)