This notebook (and script) contains full code to my participation in Rainforest Connection Species Audio Detection competition

https://www.kaggle.com/c/rfcx-species-audio-detection

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

from sklearn.model_selection import StratifiedKFold, KFold

import torch
from torch.utils.data import Dataset, DataLoader
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

%run kaggle_functions.py

# Save all training data as mel-spectograms images
* 1) 10 second random(or center) crop around particular label
* 2) split 1 training file to 6 consequent 10 seconds chunks - this is for evaluation of valid files with same strategy as testing predictions


In [None]:
files = pd.read_csv("E:\\kaggle_data\\train\\_tp.csv")

path_train = "E:\\kaggle_data\\train"
path_to_save_train = "E:\\kaggle_data\\spectograms\\train_full"
path_to_save_valid = "E:\\kaggle_data\\spectograms\\valid_full"

file_length = 10
fmax = 20000

for index, row in files.iterrows():
    
    file_to_load = f"{files['recording_id'][index]}.flac"
    
    data, sr = load_audio(os.path.join(path_train, file_to_load))
    
    # crreating of spectogram from labeled crop
    spectogram = prepare_mel_spectogram(data,
                        sampling_rate = sr,
                        tmin = files['t_min'][index],
                        tmax = files['t_max'][index],
                        fmin = 90,
                        fmax = fmax, 
                        crop = 'center'                
                       )
    
    filename = f"{files['species_id'][index]}_{files['recording_id'][index]}_{int(round(files['t_min'][index]*10,0))}.png"
    fullpath = os.path.join(path_to_save_train, filename)
    cv2.imwrite(fullpath, spectogram)

    
    # creating of 6 spectograms from consequent 10-seconds chunks
    for start in np.arange(0, 60, file_length):
    
        spectogram = prepare_mel_spectogram(data,
                                            sampling_rate = sr,
                                            tmin = start,
                                            tmax = start + file_length,
                                            fmin = 90,
                                            fmax = fmax,
                                            crop = 'center'
                                           )
        filename = f"{file_to_load.split('.')[0]}_{start}.png"       
        fullpath = os.path.join(path_to_save_valid, filename)
        cv2.imwrite(fullpath, spectogram)
    
    
    #if index % 50 ==0:
    #    print(f"Proceeded {index} images out of {files.shape[0]}") 

# Saving testing set as 6x10s chunks mel-spectograms

In [None]:
file_length = 10 #split test file to file length seconds
path_test = "E:\\kaggle_data\\test"
test_files = os.listdir(path_test)
path_to_save = "E:\\kaggle_data\\spectograms\\test"
fmax = 20000

counter = 0
for file in test_files:  
    data, sr = load_audio(os.path.join(path_test, file))
    
    for start in np.arange(0, 60, file_length):
    
        spectogram = prepare_mel_spectogram(data,
                                            sampling_rate = sr,
                                            tmin = start,
                                            tmax = start + file_length,
                                            fmin = 90,
                                            fmax = fmax,
                                            crop = 'center')
        
        filename = f"{file.split('.')[0]}_{start}.png"
        
        fullpath = os.path.join(path_to_save, filename)
        cv2.imwrite(fullpath, spectogram)
    
    #if counter % 50 ==0:
    #    print(f"Proceeded {counter} images") 
    #counter = counter+1

# Visualise some mel-spectograms

# Plot 3 mel-spectograms with label 19

In [None]:

path_train = "E:\\kaggle_data\\spectograms\\train_full"

desired_index = 19   # images with this target should be plotted
image_number = 3    # first n images with this target will be plotted

print_images(path_train, desired_index, image_number)

# Plot 3 mel-spectograms with label 10

In [None]:
path_train = "E:\\kaggle_data\\spectograms\\train_full"

desired_index = 10 # images with this target should be plotted
image_number = 3    # first n images with this target will be plotted

print_images(path_train, desired_index, image_number)

# Resnet training and evaluation phase definition

In [None]:
def setlr(optimizer, lr):
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr
    return optimizer

def lr_decay(optimizer, epoch):
    if epoch%8==0:
        new_lr = learning_rate / (10**(epoch//8))
        optimizer = setlr(optimizer, new_lr)
        print(f'Changed learning rate to {new_lr}')
    return optimizer



def train_model(model, criterion, optimizer, curr_fold, num_epochs, change_lr, evaluate_fake = False):

    # TRAIN PHASE
    train_loss = []
    valid_loss = []
    train_acc = []
    valid_acc = []
    train_LWLRAP = []
    valid_LWLRAP = []
    
    
    files = pd.read_csv("E:\\kaggle_data\\train\\_tp.csv")
    labs = []
    preds = []
    
    for i in range(0,24):
        labs.append(f's{i}_true')
        preds.append(f's{i}')
    
    for epoch in range(num_epochs):
        print('-----------------------------------')
        print(f'Epoch {epoch}/{num_epochs}')

        
        model.train() 
        #Tells the model to use train mode
        #Dropout layer behaves differently for train/eval phases
        
        actual_loss = 0
        num_corrects = 0
        
        if change_lr:
            optimizer = change_lr(optimizer, epoch)       
        
        for inputs, labels in train_loader:
            inputs = inputs.to(device, dtype=torch.float)
            labels = labels.to(device, dtype=torch.float)
            
            optimizer.zero_grad() 
            # otherwise by calling loss.backward() gradient of parameters would be summed
            
            outputs = model(inputs) 
            
            loss = criterion(outputs, labels)     
            # creates graph of parameters, is connected to model throught outputs
        
            loss.backward()  #computes gradient of loss with respect to the parameters
            optimizer.step() #updates models parameters
            # it is possible to put optimizer.step and optimizer.zero(grad) out of batch for with slower conv
            
            actual_loss += loss.item() * inputs.size(0) #sum of losses for given batch
            num_corrects += torch.sum(outputs.argmax(dim=1) == labels.argmax(dim=1)).item() 

        train_loss.append(actual_loss / len(train_data)) 
        train_acc.append(num_corrects / len(train_data))

        print(f'train Loss: {round(train_loss[epoch],6)} Acc: {round(train_acc[epoch],6)} Num_corrects: {round(num_corrects,6)}/ {len(train_data)} ')
        
        
        #VALIDATION_PHASE
        
        with torch.no_grad(): # is it necessary?

            model.eval() 
            
            
            #computing LWLRAP everytime XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX

            evaluated_data = score_and_extend(evaluate_path, model, list_train, files)
            actual_LWLRAP= LWLRAP(torch.Tensor(evaluated_data[preds].to_numpy()),
                            torch.Tensor(evaluated_data[labs].to_numpy()))    
            train_LWLRAP.append(actual_LWLRAP)
            print(f"TRAIN LWLRAP:{actual_LWLRAP}")
            
            evaluated_data = score_and_extend(evaluate_path, model, list_valid, files)
            actual_LWLRAP= LWLRAP(torch.Tensor(evaluated_data[preds].to_numpy()),
                            torch.Tensor(evaluated_data[labs].to_numpy()))    
            valid_LWLRAP.append(actual_LWLRAP)
            print(f"VALID LWLRAP:{actual_LWLRAP}")


            #XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
            

            if evaluate_fake == True:
            
                valid_actual_loss = 0
                valid_num_corrects = 0
        
                for inputs, labels in valid_loader:
                    inputs = inputs.to(device, dtype=torch.float)
                    labels = labels.to(device, dtype=torch.float)
            
                    outputs = model(inputs)               
                    loss = criterion(outputs, labels)
            
                    valid_actual_loss += loss.item() * inputs.size(0)
                    valid_num_corrects += torch.sum(outputs.argmax(dim=1) == labels.argmax(dim=1)).item()
            
                valid_loss.append(valid_actual_loss / len(valid_data))
                actual_acc = valid_num_corrects / len(valid_data)
                valid_acc.append(valid_num_corrects / len(valid_data))

                
                print(f'valid Loss: {round(valid_loss[epoch],6)} Acc: {round(valid_acc[epoch],6)} Num_corrects: {round(valid_num_corrects,6)}/ {len(valid_data)} ')
                    
    torch.save(model, f'E:\\kaggle_data\\saved_models\\model_{curr_fold}')
             
    if evaluate_fake == True:
        fig, axs = plt.subplots(2, figsize=(14,8))
        axs[0].plot(train_loss)
        axs[0].plot(valid_loss)
        axs[0].set_title('blue = Train BCE, orange = Valid Bce')

        axs[1].plot(train_LWLRAP)
        axs[1].plot(valid_LWLRAP)
        axs[1].set_title('Blue = Train LWLRAP, orange = Valid LWLRAP')
        plt.show()
    

# CV model training

In [None]:
files = pd.read_csv("E:\\kaggle_data\\train\\_tp.csv")
path_train = "E:\\kaggle_data\\spectograms\\train_full"
evaluate_path = "E:\\kaggle_data\\spectograms\\valid_full"
num_classes=24

n_splits = 4
cross_val = StratifiedKFold(n_splits= n_splits, shuffle=True, random_state = 10)

valid_acc = []
valid_lwlrap = []

i = 0
for train_indexes, valid_indexes in cross_val.split(files['recording_id'], files['species_id']):
    print('XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX')
    print(f'Starting fold {i+1}')
    print('XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX')
    
    
    list_train = files['recording_id'][train_indexes].unique().tolist()
    list_valid = files['recording_id'][valid_indexes].unique().tolist()
    list_valid = [x for x in list_valid if x not in list_train] # remove from valid recording ids that are in both train and valid
    
    train_data = torch_dataset(list_train, path_train, num_classes, phase = 'train') 
    
    valid_data = torch_dataset(list_valid, path_train, num_classes, phase = 'valid')
    train_loader = DataLoader(train_data, batch_size=16, shuffle = True)
    valid_loader = DataLoader(valid_data, batch_size=16, shuffle = True)
    print('Data loaded')
    
    model_ft = return_nn(num_classes, device)
    
    learning_rate = 0.0001
    optimizer = torch.optim.Adam(model_ft.parameters(), lr=learning_rate)
    
    #loss_function = nn.BCEWithLogitsLoss()

    loss_function = FocalLoss()
    loss_function = loss_function.cuda()
    
    lwlrap = train_model(model_ft, loss_function, optimizer, i, 12, lr_decay ,evaluate_fake = True)
    #valid_acc.append(acc)
    valid_lwlrap.append(lwlrap)
    i = i+1
    

# SCORING OF TEST SET

In [None]:
test_files = os.listdir("E:\\kaggle_data\\test")
test_files = [x.split('.')[0] for x in test_files]
test_path = "E:\\kaggle_data\\spectograms\\test"

result = score_images_test_set(test_path, test_files)
result.to_csv('E:\\kaggle_data\\submission.csv', header = True, index = False)