In [34]:
import pickle
import os
import numpy as np
import csv
import argparse
import ast
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="3"


In [43]:
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)

parser.add_argument("--model", type=str, default='ast', help="the model used")
parser.add_argument("--dataset", type=str, default="audioset", help="the dataset used", choices=["audioset","audioset_s", "esc50", "speechcommands"])
parser.add_argument("--n_mels", type=int, default=128, help="number of mel bins")
parser.add_argument("--dataset-portion", type=str, default='full', help="balance set or full set")
parser.add_argument('--mean', default=-12.7508, type=float, help='normalizing mean')
parser.add_argument('--std', default=11.7584, type=float, help='normalizing std')

parser.add_argument("--exp-dir", type=str, default="", help="directory to dump experiments")
parser.add_argument('--lr', '--learning-rate', default="0.001", type=str, metavar='LR', help='initial learning rate')
parser.add_argument("--optim", type=str, default="adam", help="training optimizer", choices=["sgd", "adam"])
parser.add_argument('-b', '--batch-size', default=12, type=int, metavar='N', help='mini-batch size')
parser.add_argument('-w', '--num-workers', default=32, type=int, metavar='NW', help='# of workers for dataloading (default: 32)')
parser.add_argument("--n-epochs", type=int, default=1, help="number of maximum training epochs")
# not used in the formal experiments
parser.add_argument("--lr_patience", type=int, default=2, help="how many epoch to wait to reduce lr if mAP doesn't improve")

parser.add_argument("--n-print-steps", type=int, default=100, help="number of steps to print statistics")

parser.add_argument('--freqm', help='frequency mask max length', type=int, default=0)
parser.add_argument('--timem', help='time mask max length', type=int, default=0)
parser.add_argument("--mixup", type=float, default=0, help="how many (0-1) samples need to be mixup during training")
parser.add_argument("--bal", type=str, default=None, help="use balanced sampling or not")
# the stride used in patch spliting, e.g., for patch size 16*16, a stride of 16 means no overlapping, a stride of 10 means overlap of 6.
parser.add_argument("--fstride", type=int, default=10, help="soft split freq stride, overlap=patch_size-stride")
parser.add_argument("--tstride", type=int, default=10, help="soft split time stride, overlap=patch_size-stride")
parser.add_argument('--imagenet_pretrain', help='if use ImageNet pretrained audio spectrogram transformer model', type=ast.literal_eval, default='True')
parser.add_argument('--audioset_pretrain', help='if use ImageNet and audioset pretrained audio spectrogram transformer model', type=ast.literal_eval, default='False')
parser.add_argument('--suffix', type=str, default='')

args = parser.parse_args(args=['--model=ast','--fstride=6', '--tstride=6','--dataset=audioset_s','--batch-size=72', '--lr=1e-5', '--freqm=12', '--timem=60','--mixup=0.3','--mean=-29.686901','--std=40.898224','--n-epochs=10','--suffix=ast_challenge-40.8'])

In [39]:
exp_id = f"../egs/audioset/exp/{args.dataset}-{args.dataset_portion}-f{args.fstride}-t{args.tstride}-p{args.imagenet_pretrain}-b{args.batch_size}-lr{args.lr}-{args.suffix}"
print(exp_id)

../egs/audioset/exp/audioset_s-full-f6-t6-pTrue-b72-lr1e-5-ast_challenge-40.8


In [45]:
exp_id = f"../egs/audioset/exp/{args.dataset}-{args.dataset_portion}-f{args.fstride}-t{args.tstride}-p{args.imagenet_pretrain}-b{args.batch_size}-lr{args.lr}-fm{args.freqm}-tm{args.timem}-mix{args.mixup}-m{args.mean}-std{args.std}-epoch{args.n_epochs}-{args.suffix}"
print(exp_id)

../egs/audioset/exp/audioset_s-full-f6-t6-pTrue-b72-lr1e-5-fm12-tm60-mix0.3-m-29.686901-std40.898224-epoch10-ast_challenge-40.8


In [10]:
exp_id='../egs/audioset/exp/test-full-f10-t10-pTrue-b12-lr1e-5'
print(exp_id)

../egs/audioset/exp/test-full-f10-t10-pTrue-b12-lr1e-5


In [50]:
with open(os.path.join(exp_id, 'stats_6.pickle'), 'rb') as f:
    data = pickle.load(f, encoding='bytes')

In [51]:
print(data[0])

{'precisions': array([0.27638032, 0.46997106, 0.60491288, 0.70442253, 0.90780142]), 'recalls': array([1.        , 0.99141385, 0.95382561, 0.84487693, 0.04884564]), 'AP': 0.7963401053071197, 'fpr': array([0.        , 0.12484881, 0.24049187, 0.51404381]), 'fnr': array([1.        , 0.15512307, 0.03701584, 0.00362526]), 'auc': 0.9343514525678709, 'acc': 0.4580827908363564}


In [52]:
final_mAP = 0
result_AP = []
result_auc = []
for item in data:
    result_AP.append(item['AP'])
    result_auc.append(item['auc'])
final_mAP = np.mean(result_AP)
final_auc = np.mean(result_auc)

In [53]:
print(final_mAP, final_auc)

0.4081595729037591 0.9696046641238654


In [54]:
for class_id, mAP in enumerate(zip(result_AP, result_auc)):
    print(class_id, mAP[0], mAP[1])

0 0.7963401053071197 0.9343514525678709
1 0.06747012533036023 0.8189839212521165
2 0.07770179525388186 0.9343003964340602
3 0.36912320058647596 0.9641768215767994
4 0.21530760191513304 0.9738743790393594
5 0.09804318891463679 0.9520020269484456
6 0.4789505564144407 0.9962385179202076
7 0.470735591229055 0.9837578295037299
8 0.1676070624901316 0.9566298860595248
9 0.12877142792454604 0.9212705782175006
10 0.11748150202655609 0.9671763112861154
11 0.2285952153848548 0.9751017128334365
12 0.8051415772365549 0.9993682400438618
13 0.4370036546277616 0.9954302281147718
14 0.2966815005859783 0.984893959871938
15 0.8070639545451777 0.9977327216505787
16 0.3871006145089668 0.9765448472345023
17 0.6121491400796643 0.9918915297005512
18 0.19612371170619575 0.9798289541264564
19 0.28436771138143113 0.9821636843941584
20 0.3480836301676178 0.9900456126989937
21 0.19578181558080401 0.9782771768927877
22 0.5379422004370681 0.9896791772583029
23 0.5084599149869585 0.9851559754548158
24 0.2755356224408

In [26]:
with open(os.path.join(exp_id, 'predictions', 'target.csv'), 'rb') as csv_f:
#     csv_reader = csv.DictReader(csv_f)
    lines = csv_f.readlines()
    print(np.(lines[0]))
#     print(csv_reader)
#     for row in lines:
#         print(row)

SyntaxError: invalid syntax (<ipython-input-26-84708d29dc5c>, line 4)

In [55]:
with open(os.path.join(exp_id, 'progress.pkl'), 'rb') as f:
    progress = pickle.load(f, encoding='bytes')

epoch, global_step, best_epoch, best_mAP, time.time() - start_tim

In [56]:
[print(item) for item in progress]

[1, 28066, 1, 0.3520142285339701, 9182.559254407883]
[2, 56132, 2, 0.39191073545642585, 18358.36445760727]
[3, 84198, 3, 0.4060429672121134, 27479.976589679718]
[4, 112264, 4, 0.4077943627077529, 36141.80537772179]
[5, 140330, 4, 0.4077943627077529, 44815.40937042236]
[6, 168396, 6, 0.4081595729037591, 53496.31414580345]
[7, 196462, 6, 0.4081595729037591, 62167.2529091835]
[8, 224528, 6, 0.4081595729037591, 70837.60245680809]
[9, 252594, 6, 0.4081595729037591, 79511.00012993813]
[10, 280660, 6, 0.4081595729037591, 88202.09598326683]


[None, None, None, None, None, None, None, None, None, None]

In [57]:
with open(os.path.join(exp_id, 'args.pkl'), 'rb') as f:
    args = pickle.load(f, encoding='bytes')

In [58]:
print(args)

Namespace(audioset_pretrain=False, bal='bal', batch_size=72, data_eval='', data_train='/local/slurm-7047405/local/audio/data/datafiles/audioset_bal_unbal_train_data.json', data_val='/local/slurm-7047405/local/audio/data/datafiles/audioset_eval_data.json', dataset='audioset_s', exp_dir='./exp/audioset_s-full-f6-t6-pTrue-b72-lr1e-5-fm12-tm60-mix0.3-m-29.686901-std40.898224-epoch10-ast_challenge', freqm=12, fstride=6, imagenet_pretrain=True, label_csv='./data/class_labels_indices.csv', lr=1e-05, lr_patience=2, mean=-29.686901, mixup=0.3, model='ast', n_class=527, n_epochs=10, n_mels=64, n_print_steps=100, num_workers=32, optim='adam', save_model=True, std=40.898224, timem=60, tstride=6)
