Copyright (c) Microsoft Corporation. All rights reserved.
Licensed under the MIT License.

In [1]:
import torch
import torch.optim as optim
import torch.utils.data as data_utils
import os
import numpy as np
from sklearn.preprocessing import LabelBinarizer, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold, StratifiedKFold
import csv
import pandas as pd
import matplotlib.pyplot as plt
import glob
import gc
import h5py
import pickle as pk

from utils import log_results, SaveBestModel, train, test
from utils import mel_sp_to_image

from models import VGG16_pool

In [17]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Set directories

In [2]:
dataDir = 'processed'
resultsDir = 'Results'
tempDir = 'temp'

if not os.path.exists(resultsDir):
    os.makedirs(resultsDir)
if not os.path.exists(tempDir):
    os.makedirs(tempDir)

# Load data

In [5]:
fname = 'birds_cornell_spectr_100_species_sr_32000_len_7_sec_New.h5'
fileLoc = os.path.join(dataDir,fname) # 19707 samples per class
hf = h5py.File(fileLoc, 'r')
mel_sp = hf.get('mel_spectr')[()]
metadata_total = pd.read_hdf(fileLoc, 'info')
hf.close()

In [6]:
original_label = list(metadata_total['ebird_code'])

In [None]:
lb_bin = LabelBinarizer()
lb_enc = LabelEncoder()
labels_one_hot = lb_bin.fit_transform(original_label)
labels_multi_lbl = lb_enc.fit_transform(original_label)

number_of_sample_classes = len(lb_enc.classes_)
print("Number of Species: ", number_of_sample_classes)
species_id_class_dict_tp = dict()
for (class_label, species_id) in enumerate(lb_bin.classes_):
    species_id_class_dict_tp[species_id] = class_label

In [27]:
mel_sp_normalized = []
for i in range(len(mel_sp)):
    xx_ = mel_sp_to_image(mel_sp[i]).astype('float32')
    mel_sp_normalized += [np.rollaxis(xx_, 2, 0)]
mel_sp_normalized = np.array(mel_sp_normalized)

In [None]:
batch_size = 16*2
num_classes=100
shuffleBatches=True
num_epoch = 50

In [None]:
skf = StratifiedKFold(n_splits=5, random_state=42)
exp_no = 0
log_file_name = f'100_species_spectr_vgg16_7sec_{exp_no}.p'
store_ = log_results(file_name=log_file_name, results_dir = resultsDir)

exp_ind = 0
for train_ind, test_ind in skf.split(mel_sp_normalized, labels_multi_lbl):
    
    PATH_curr = os.path.join(tempDir, f'currentModel_vgg16_{exp_no}_{exp_ind}.pt')
    saveModel = SaveBestModel(PATH=PATH_curr, monitor=-np.inf, verbose=True)

    X_train, X_test_p_valid = mel_sp_normalized[train_ind,:], mel_sp_normalized[test_ind,:]
    
    y_train, y_test_p_valid = labels_one_hot[train_ind], labels_one_hot[test_ind]
    y_train_mlbl, y_test_p_valid_mlbl = labels_multi_lbl[train_ind], labels_multi_lbl[test_ind]
    X_valid, X_test, y_valid, y_test = train_test_split(X_test_p_valid, y_test_p_valid,
                                                           test_size=0.5,
                                                           stratify=y_test_p_valid_mlbl,
                                                           random_state=42)

    print('X_train shape: ', X_train.shape)
    print('X_valid shape: ', X_valid.shape)
    print('X_test shape: ', X_test.shape)

    X_train, X_valid  = torch.from_numpy(X_train).float(), torch.from_numpy(X_valid).float()
    y_train, y_valid = torch.from_numpy(y_train), torch.from_numpy(y_valid)
      
    y_train, y_valid = y_train.float(), y_valid.float()
    train_use = data_utils.TensorDataset(X_train, y_train)
    train_loader = data_utils.DataLoader(train_use, batch_size=batch_size, shuffle=shuffleBatches)

    val_use = data_utils.TensorDataset(X_valid, y_valid)
    val_loader = data_utils.DataLoader(val_use, batch_size=32, shuffle=False)
    
    model = VGG16_pool(num_classes=100)
    model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr = 0.0001, weight_decay=1e-7)

    val_acc_epochs = []
    val_loss_epochs = []
    for epoch in range(1, num_epoch+1):
        train_loss = train(model, train_loader, optimizer, epoch, 
                            device,
                            verbose=1, loss_fn = 'bceLogit')
        val_loss, val_acc = test(model, val_loader,
                                    device,
                                    loss_fn = 'bceLogit')
        val_acc_epochs.append(val_acc)
        val_loss_epochs.append(val_loss)
        print('val loss = %f, val acc = %f'%(val_loss, val_acc))
        saveModel.check(model, val_acc, comp='max')
    
    # loading best validated model
    model = VGG16_pool(num_classes=100)
    model.to(device)
    model.load_state_dict(torch.load(PATH_curr))

    X_test, y_test  = torch.from_numpy(X_test).float(), torch.from_numpy(y_test).float()

    test_use = data_utils.TensorDataset(X_test, y_test)
    test_loader = data_utils.DataLoader(test_use, batch_size=32, shuffle=False)
    test_loss, test_acc = test(model, test_loader,
                                device,
                                loss_fn = 'bceLogit')
    print('test loss = %f, test acc = %f'%(test_loss, test_acc))
    
    log_ = dict(
            exp_ind = exp_ind,
            epochs = num_epoch,
            validation_accuracy = val_acc_epochs,
            validation_loss = val_loss_epochs,
            test_loss = test_loss,
            test_accuracy = test_acc,
            X_train_shape = X_train.shape,
            X_valid_shape = X_valid.shape,
            batch_size =batch_size,
    )
    store_.update(log_)
    exp_ind += 1    