In [17]:
import pickle
import os
import numpy as np
import csv
import argparse
import ast


In [26]:
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)

parser.add_argument("--model", type=str, default='ast', help="the model used")
parser.add_argument("--dataset", type=str, default="audioset", help="the dataset used", choices=["audioset","audioset_s", "esc50", "speechcommands"])
parser.add_argument("--n_mels", type=int, default=128, help="number of mel bins")
parser.add_argument("--dataset-portion", type=str, default='full', help="balance set or full set")

parser.add_argument("--exp-dir", type=str, default="", help="directory to dump experiments")
parser.add_argument('--lr', '--learning-rate', default="0.001", type=str, metavar='LR', help='initial learning rate')
parser.add_argument("--optim", type=str, default="adam", help="training optimizer", choices=["sgd", "adam"])
parser.add_argument('-b', '--batch-size', default=12, type=int, metavar='N', help='mini-batch size')
parser.add_argument('-w', '--num-workers', default=32, type=int, metavar='NW', help='# of workers for dataloading (default: 32)')
parser.add_argument("--n-epochs", type=int, default=1, help="number of maximum training epochs")
# not used in the formal experiments
parser.add_argument("--lr_patience", type=int, default=2, help="how many epoch to wait to reduce lr if mAP doesn't improve")

parser.add_argument("--n-print-steps", type=int, default=100, help="number of steps to print statistics")

parser.add_argument('--freqm', help='frequency mask max length', type=int, default=0)
parser.add_argument('--timem', help='time mask max length', type=int, default=0)
parser.add_argument("--mixup", type=float, default=0, help="how many (0-1) samples need to be mixup during training")
parser.add_argument("--bal", type=str, default=None, help="use balanced sampling or not")
# the stride used in patch spliting, e.g., for patch size 16*16, a stride of 16 means no overlapping, a stride of 10 means overlap of 6.
parser.add_argument("--fstride", type=int, default=10, help="soft split freq stride, overlap=patch_size-stride")
parser.add_argument("--tstride", type=int, default=10, help="soft split time stride, overlap=patch_size-stride")
parser.add_argument('--imagenet_pretrain', help='if use ImageNet pretrained audio spectrogram transformer model', type=ast.literal_eval, default='True')
parser.add_argument('--audioset_pretrain', help='if use ImageNet and audioset pretrained audio spectrogram transformer model', type=ast.literal_eval, default='False')
parser.add_argument('--suffix', type=str, default='')

args = parser.parse_args(args=['--model=ast','--dataset=audioset_s','--batch-size=448', '--lr=1e-5','--suffix=ast_debug0.21'])

In [27]:
exp_id = f"../egs/audioset/exp/{args.dataset}-{args.dataset_portion}-f{args.fstride}-t{args.tstride}-p{args.imagenet_pretrain}-b{args.batch_size}-lr{args.lr}-{args.suffix}"
print(exp_id)

../egs/audioset/exp/audioset_s-full-f10-t10-pTrue-b448-lr1e-5-ast_debug0.21


In [35]:
with open(os.path.join(exp_id, 'stats_2.pickle'), 'rb') as f:
    data = pickle.load(f, encoding='bytes')

In [36]:
print(data[0])

{'precisions': array([0.26408344, 0.4179454 , 0.55334599, 0.68399854]), 'recalls': array([1.        , 0.98740698, 0.94504865, 0.71284106]), 'AP': 0.7160138349751176, 'fpr': array([0.        , 0.18740761, 0.35929311, 0.86036823]), 'fnr': array([1.00000000e+00, 1.32035871e-01, 2.69032627e-02, 7.63213127e-04]), 'auc': 0.9055092133419932, 'acc': 0.4216071162351538}


In [41]:
final_mAP = 0
result_AP = []
result_auc = []
for item in data:
    result_AP.append(item['AP'])
    result_auc.append(item['auc'])
final_mAP = np.mean(result_AP)
final_auc = np.mean(result_auc)

In [42]:
print(final_mAP, final_auc)

0.2109605491562668 0.9303739362002054


In [43]:
for class_id, mAP in enumerate(zip(result_AP, result_auc)):
    print(class_id, mAP)

0 (0.7160138349751176, 0.9055092133419932)
1 (0.026878048292643977, 0.7808459542008894)
2 (0.06080615614749236, 0.9100072206628962)
3 (0.24148186259841598, 0.9490540184035371)
4 (0.1553336432947377, 0.9370869261825252)
5 (0.0440465815546555, 0.9140731695160246)
6 (0.2141787685648117, 0.9813133179236446)
7 (0.31550950314029613, 0.9750253368555716)
8 (0.15383801545775724, 0.8232528789230394)
9 (0.0687332569618843, 0.8324239552077419)
10 (0.05216148111112915, 0.9040160992872452)
11 (0.0943094408028417, 0.899040001708562)
12 (0.6523102177206566, 0.9963062187442888)
13 (0.3853704236201116, 0.989602335975012)
14 (0.12112028163389821, 0.9613673023463329)
15 (0.40250941555598146, 0.993511322723757)
16 (0.3653251504147195, 0.9506569252485769)
17 (0.3966943910704832, 0.982595756433744)
18 (0.07597857103715022, 0.9554442732408834)
19 (0.15546600004715783, 0.9609633820133247)
20 (0.2548715985424924, 0.9805574031976283)
21 (0.06673004360834317, 0.9456686437721178)
22 (0.2051354050541779, 0.96308337

In [32]:
with open(os.path.join(exp_id, 'predictions', 'target.csv'), 'rb') as csv_f:
#     csv_reader = csv.DictReader(csv_f)
    lines = csv_f.readlines()
    print(np.(lines[0]))
#     print(csv_reader)
#     for row in lines:
#         print(row)

0


In [31]:
with open(os.path.join(exp_id, 'progress.pkl'), 'rb') as f:
    progress = pickle.load(f, encoding='bytes')

epoch, global_step, best_epoch, best_mAP, time.time() - start_tim

In [32]:
[print(item) for item in progress]

[1, 4511, 1, 0.04908593594799577, 2950.064663171768]
[2, 9022, 2, 0.2109605491562668, 5912.413208246231]


[None, None]