In [1]:
from collections import defaultdict
import csv
from datetime import datetime
import os
import re

from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
from pytz import timezone
from sklearn.externals import joblib
import torch
from torch import nn
from tqdm import tqdm_notebook

from load_dataset import AudioDataset
from train_coarse import VGG_alt, device  #  TEST_LOADER
from train_branches import VGG_alt as VGG_alt_fine, get_label_range



# Config

In [2]:
DATE = datetime.now(timezone('US/Pacific')).strftime('%Y%m%d_%H%M%S')
print(f'DATE: {DATE}')

DATE: 20190610_231917


In [12]:
LINUX = True

#FINE = False
#COARSE = True

COARSE_WITH_FINE_TOP_LEVEL_MODEL = True

FINE_TO_COARSE_LOGIC = False

VGG_EMBED = True
L3_EMBED = False

if not LINUX:
    basepath = r"D:\repos\Data-Processing\audio\test"
else:
    basepath = '/dcase/spec_vgg/validate'                                  # Validate / compute metrics
    #basepath = '/dcase/datasets-dcase-2019-5/audio-eval/features/spec_vgg'  # Final Eval set
NUM_COARSE_LABELS = 8
NUM_TOTAL_LABELS = 37
NUM_FINE_LABELS = NUM_TOTAL_LABELS - NUM_COARSE_LABELS

COARSE_PREDICT_THRESHOLD = 0.5  # what else?

RESULT_CSV_COARSE_PATH = f'csvs/results-{DATE}-coarse.csv'
RESULT_CSV_FINE_PATH_TEMPLATE = f'csvs/results-{DATE}-fine-%d.csv' 
RESULT_CSV_FINAL_PATH = f'csvs/results-{DATE}-final.csv'

In [13]:
if not LINUX:
    COARSE_CHECKPOINT = "models/20190605_085047_best_epoch_1_val_loss=0.2655.ckpt"
    FINE_CHECKPOINT_BASE = 'models'
else:
    #COARSE_CHECKPOINT = '/dcase/trained-models-branches/20190607_085504_0.773.ckpt'
    #COARSE_CHECKPOINT =  '/dcase/trained-models-branches/20190610_083507_coarse=0.777_fine=0.644.ckpt'
    #COARSE_CHECKPOINT = '/dcase/models/20190610_083507_coarse=0.777_fine=0.644.ckpt'
    
    #Best Coarse from Daniel:
    #COARSE_CHECKPOINT = '/dcase/models/20190610_170308_coarse=0.787_fine=0.646.ckpt'
    
    # Best Fine form Daniel:
    COARSE_CHECKPOINT = '/dcase/models/20190609_230306_coarse=0.769_fine=0.656.ckpt'

    
    FINE_CHECKPOINT_BASE = '/dcase/trained-models-branches'
    #FINE_CHECKPOINT_BASE = '/dcase/output/models/best'

FINE_CKPT_REGEX = r'.+_coarse=(\d+)_best_.*\.ckpt'

## Find checkpoints for fine categories

Assumes there is at most 1 ckpt file for each coarse category

In [14]:
ckpt_paths_fine = defaultdict(str)
for f in os.listdir(FINE_CHECKPOINT_BASE):
    m = re.match(FINE_CKPT_REGEX, f)
    if m:
        coarse_idx = int(m.group(1))
        ckpt_paths_fine[coarse_idx] = os.path.join(FINE_CHECKPOINT_BASE, f)
ckpt_paths_fine

defaultdict(str,
            {6: '/dcase/trained-models-branches/20190607_042231_coarse=6_best_epoch_5_val_loss=0.1422.ckpt',
             4: '/dcase/trained-models-branches/20190607_042231_coarse=4_best_epoch_11_val_loss=0.2119.ckpt',
             1: '/dcase/trained-models-branches/20190607_042231_coarse=1_best_epoch_18_val_loss=0.0848.ckpt',
             3: '/dcase/trained-models-branches/20190607_042231_coarse=3_best_epoch_14_val_loss=0.2054.ckpt',
             0: '/dcase/trained-models-branches/20190607_042231_coarse=0_best_epoch_13_val_loss=0.2727.ckpt',
             5: '/dcase/trained-models-branches/20190607_042231_coarse=5_best_epoch_14_val_loss=0.3228.ckpt'})

## Set up label names

In [15]:
# re-orders output as [fine, coarse] to match demo .csv file (maybe not necessary)
def reorder_labels_for_submission(list_or_array):
    if type(list_or_array) == torch.Tensor:
        list_or_array = list(list_or_array.to('cpu').numpy())
    list_or_array = list(list_or_array)
    return list_or_array[8:] + list_or_array[0:8]

In [16]:
# process label names
label_names = joblib.load('label_order.pkl')
label_names = [re.sub('_presence', '', label_names[i]) for i in range(len(label_names))]

In [17]:
#label_names

In [20]:
def make_results_csv(csv_out_path_template, model_class, ckpt_path, use_fine_model=False, coarse_idx=None,
                    label_start=None, label_end=None, num_fine_classes=None):
    # Skip if path to checkpoint doesn't exit (happens for single-class fine models that don't exist)
    if not ckpt_path:
        return
    
    if use_fine_model:
        csv_out_path = csv_out_path_template % coarse_idx
        model = model_class(num_fine_classes)
    else:
        # Coarse (high-level) model.
        csv_out_path = csv_out_path_template 
        model = model_class()

    # Initialize model weights from checkpoint.
    ckpt = torch.load(ckpt_path)
    model.load_state_dict(ckpt)
    model.eval()    
        
    print(f'Making results file: {csv_out_path}')
    with open(csv_out_path, 'w') as c:
        writer = csv.writer(c, delimiter=',')

        header = ['audio_filename'] + reorder_labels_for_submission(label_names)
        writer.writerow(header)
        data_rows = []
        for filename in tqdm_notebook(os.listdir(basepath)):
            # Only eval on non-augmented validation set data. TODO: remove extraneous files from validation dataset
            if '_pitch_changes-0_volume_changes-0_background_changes-None-None' not in filename:
                continue
            audio_filename = filename[0:9] + '.wav'
            spectrogram, vgg, label = joblib.load(os.path.join(basepath, filename))
    #         print(spectrogram.shape, emb.shape, label.shape)
            spectrogram = np.expand_dims(spectrogram, axis=0)
            spectrogram = np.expand_dims(spectrogram, axis=0)

    #         print(vgg.flatten().shape)
            vgg = torch.from_numpy(vgg.flatten().reshape((1, 1280)))
            spectrogram = spectrogram.astype(np.float32)
            spectrogram = torch.from_numpy(spectrogram)
    #         print(spectrogram.shape, emb.shape, label.shape)
            in_data = (spectrogram, vgg)

            with torch.no_grad():
                results = model(in_data)
    #             print(len(results[0]))
                results = torch.sigmoid(results[0])
                #print(label)
                #print(results)
                #print()
                label_true = np.where(label == 1)[0]
                label_name = [label_names[i] for i in label_true]
    #             print(label_name)
    #             img_array = img_array.reshape((16, 16))
    #             plt.imshow(img_array, cmap='hot', interpolation='nearest')
    #             plt.show()
    #             print(results.shape)

                results = results.detach().numpy()
                if use_fine_model:
                    if FINE_TO_COARSE_LOGIC:
                        coarse_labels = []
                        fine_label_names = label_names[NUM_COARSE_LABELS:]
                        coarse_label_dict = {i: [] for i in range(NUM_COARSE_LABELS)}
                        for i, r in enumerate(results):
        #                     print(fine_label_names[i][0])
                            coarse_label_dict[int(fine_label_names[i][0])-1].append(r)
                        for i in range(NUM_COARSE_LABELS):
                            coarse_labels.append(max(coarse_label_dict[i]))
        #                 print('coarse labels', coarse_labels)
                        results = coarse_labels + list(results)
                    else:
                        full_results = np.zeros(NUM_TOTAL_LABELS)
                        full_results[label_start : label_end] = results
                        results = full_results
                        
                elif COARSE_WITH_FINE_TOP_LEVEL_MODEL:
                    results = list(results)
                else:
                    # Coarse model.
                    results = list(results) + [0 for i in range(NUM_FINE_LABELS)]

                results = reorder_labels_for_submission(results)
                #print (results)
                data_rows.append([audio_filename] + results)
    #             print([audio_filename] + results)
        writer.writerows(data_rows)
    
    # Clean up.
    del ckpt
    del model
    torch.cuda.empty_cache()

# Make Predictions

##### N.B: checkpoint must have been trained with same model architecture

## Make Coarse predictions

In [21]:
 make_results_csv(RESULT_CSV_COARSE_PATH, VGG_alt, COARSE_CHECKPOINT)

Making results file: csvs/results-20190610_231917-coarse.csv


HBox(children=(IntProgress(value=0, max=4125), HTML(value='')))

## Make Fine predictions

In [None]:
for coarse_idx in range(NUM_COARSE_LABELS):
    label_start, label_end, num_fine_classes = get_label_range(coarse_idx)
    make_results_csv(RESULT_CSV_FINE_PATH_TEMPLATE, VGG_alt_fine, ckpt_paths_fine[coarse_idx], 
                     use_fine_model=True, coarse_idx=coarse_idx, 
                     label_start=label_start, label_end=label_end, num_fine_classes=num_fine_classes)

## Combine separate prediction CSVs into final output CSV

1. Load coarse-model results CSV.
2. For each coarse class with a high score, copy in the fine-level results.

In [22]:
df = pd.read_csv(RESULT_CSV_COARSE_PATH)
print(RESULT_CSV_COARSE_PATH)
df.head()


csvs/results-20190610_231917-coarse.csv


Unnamed: 0,audio_filename,1-1_small-sounding-engine,1-2_medium-sounding-engine,1-3_large-sounding-engine,1-X_engine-of-uncertain-size,2-1_rock-drill,2-2_jackhammer,2-3_hoe-ram,2-4_pile-driver,2-X_other-unknown-impact-machinery,...,7-X_other-unknown-human-voice,8-1_dog-barking-whining,1_engine,2_machinery-impact,3_non-machinery-impact,4_powered-saw,5_alert-signal,6_music,7_human-voice,8_dog
0,00_001177.wav,0.003182,0.055715,0.343944,0.006606,0.0002088875,0.0007313141,9.954495e-05,7.954438e-06,0.003333,...,2e-05,0.000112,0.576407,0.044812,0.00823,0.001054,0.005202,2e-05,0.015133,0.000114
1,04_000468.wav,6e-06,0.000548,0.000127,5.5e-05,1.237619e-07,8.403098e-07,2.178314e-07,3.705563e-06,5.3e-05,...,0.003009,0.001009,0.002682,0.002562,0.000227,1.2e-05,0.009579,0.00607,0.909381,0.001002
2,04_000182.wav,0.001075,0.011721,0.019819,0.001747,3.884907e-05,0.0001390877,0.0008654617,0.002741874,0.007309,...,0.011229,0.001572,0.081388,0.149512,0.063902,0.00039,0.025009,0.021499,0.738031,0.001569
3,03_001573.wav,0.000122,0.00164,0.000623,0.005783,0.0001629591,0.0008093012,5.721565e-06,0.0002983857,6.2e-05,...,0.001544,0.086537,0.006376,0.007409,0.001108,0.002556,0.650215,0.115984,0.159117,0.091038
4,04_000588.wav,0.001325,0.106243,0.245586,0.006404,0.0001928769,0.002912376,1.654024e-05,4.136291e-08,0.000303,...,9e-06,0.000727,0.63974,0.021023,0.000196,0.000145,0.00648,2e-06,0.005945,0.000715


In [None]:
dfs_fine = defaultdict(lambda: None)
label_ranges = {}
for coarse_idx in range(NUM_COARSE_LABELS):
    csv_path = RESULT_CSV_FINE_PATH_TEMPLATE % coarse_idx
    label_start, label_end, _ = get_label_range(coarse_idx)
     # convert from starting fine labels at col 8 to starting at col 1
    label_start -= NUM_COARSE_LABELS - 1  # -1 is to account for audio_filename in col 0
    label_end -= NUM_COARSE_LABELS - 1
    label_ranges[coarse_idx] = (label_start, label_end)
    if os.path.exists(csv_path):
        dfs_fine[coarse_idx] = pd.read_csv(csv_path)

In [None]:
label_ranges

In [None]:
#dfs_fine[1].head()

In [None]:
df.values.shape

In [None]:
for i in tqdm_notebook(range(len(df))):
    # Find the coarse-predicitons that are over-threshold OR are max over all coarse categories.
    # Find argmax.
    coarse_preds = df.values[i][1 + NUM_FINE_LABELS:]
    best_coarse_idx = np.argmax(coarse_preds)
    for coarse_idx in range(NUM_COARSE_LABELS):
        start_idx, end_idx = label_ranges[coarse_idx]
        coarse_prob = df.values[i][1 + NUM_FINE_LABELS + coarse_idx]
        if coarse_prob > COARSE_PREDICT_THRESHOLD or coarse_idx == best_coarse_idx:  # + 1 is because of filename in col 0
            df_fine = dfs_fine[coarse_idx]
            if df_fine is None:
                assert end_idx - start_idx == 1
                df.iloc[i, start_idx] = coarse_prob
            else:
                # Copy fine-predictions over into the results data.
                df.iloc[i, start_idx:end_idx] = df_fine.values[i][start_idx:end_idx]
                

In [23]:
df.head()

Unnamed: 0,audio_filename,1-1_small-sounding-engine,1-2_medium-sounding-engine,1-3_large-sounding-engine,1-X_engine-of-uncertain-size,2-1_rock-drill,2-2_jackhammer,2-3_hoe-ram,2-4_pile-driver,2-X_other-unknown-impact-machinery,...,7-X_other-unknown-human-voice,8-1_dog-barking-whining,1_engine,2_machinery-impact,3_non-machinery-impact,4_powered-saw,5_alert-signal,6_music,7_human-voice,8_dog
0,00_001177.wav,0.003182,0.055715,0.343944,0.006606,0.0002088875,0.0007313141,9.954495e-05,7.954438e-06,0.003333,...,2e-05,0.000112,0.576407,0.044812,0.00823,0.001054,0.005202,2e-05,0.015133,0.000114
1,04_000468.wav,6e-06,0.000548,0.000127,5.5e-05,1.237619e-07,8.403098e-07,2.178314e-07,3.705563e-06,5.3e-05,...,0.003009,0.001009,0.002682,0.002562,0.000227,1.2e-05,0.009579,0.00607,0.909381,0.001002
2,04_000182.wav,0.001075,0.011721,0.019819,0.001747,3.884907e-05,0.0001390877,0.0008654617,0.002741874,0.007309,...,0.011229,0.001572,0.081388,0.149512,0.063902,0.00039,0.025009,0.021499,0.738031,0.001569
3,03_001573.wav,0.000122,0.00164,0.000623,0.005783,0.0001629591,0.0008093012,5.721565e-06,0.0002983857,6.2e-05,...,0.001544,0.086537,0.006376,0.007409,0.001108,0.002556,0.650215,0.115984,0.159117,0.091038
4,04_000588.wav,0.001325,0.106243,0.245586,0.006404,0.0001928769,0.002912376,1.654024e-05,4.136291e-08,0.000303,...,9e-06,0.000727,0.63974,0.021023,0.000196,0.000145,0.00648,2e-06,0.005945,0.000715


In [24]:
df.to_csv(RESULT_CSV_FINAL_PATH)

In [25]:
RESULT_CSV_FINAL_PATH

'csvs/results-20190610_231917-final.csv'

# Eval

In [26]:
# run a command similar to this on the command line to get results  # 20190609_031224
if not LINUX:
    !python evaluate_predictions.py results_2019-5-6-aug.csv "D:\DCASE_2019\annotations.csv" "D:\DCASE_2019\dcase-ust-taxonomy.yaml"
else:
    !python evaluate_predictions.py csvs/results-20190610_231917-final.csv /dcase/datasets-dcase-2019-5/annotations.csv /dcase/datasets-dcase-2019-5/dcase-ust-taxonomy.yaml

  thresh_0pt5_idx = (eval_df['threshold'] >= 0.5).nonzero()[0][0]
Fine level evaluation:
 * Micro AUPRC:           0.6560867707337048
 * Micro F1-score (@0.5): 0.3982202447163515
 * Macro AUPRC:           0.40059566061982355
 * Coarse Tag AUPRC:
      - 1: 0.6405304300056462
      - 2: 0.21380721739093014
      - 3: 0.3336595526102611
      - 4: 0.2761679795550256
      - 5: 0.70229675965
      - 6: 0.1563059224638172
      - 7: 0.8819974232829082
      - 8: 0.0
  thresh_0pt5_idx = (eval_df['threshold'] >= 0.5).nonzero()[0][0]
Coarse level evaluation:
 * Micro AUPRC:           0.7687132764444584
 * Micro F1-score (@0.5): 0.5328109696376102
 * Macro AUPRC:           0.5552582865865167
 * Coarse Tag AUPRC:
      - 1: 0.8178125638838614
      - 2: 0.42810526358580453
      - 3: 0.3332328931822347
      - 4: 0.6770075945049289
      - 5: 0.8674275089257218
      - 6: 0.37801009220794807
      - 7: 0.9404703764016337
      - 8: 0.0


In [None]:
# python extract_embedding.py "D:\DCASE_2019\annotations.csv" $SONYC_UST_PATH/data $SONYC_UST_PATH/features $SONYC_UST_PATH/vggish