# 0. Functions import

In [3]:
import sys,os
sys.path.append(os.getcwd() + os.sep + 'src/')

import numpy as np
#import glob2
#import joblib
import pathlib
from scipy.io.wavfile import write as write_waveform
from collections import OrderedDict
import matplotlib.pyplot as plt
#import seaborn as sns
import pickle
import pandas as pd

import torch
import torch.nn.functional as F
import torch.nn as nn
from torch import optim

from sklearn.metrics import accuracy_score

from spectrogram_stream import SpectrogramStream
from autoencoders import ConvolutionalAutoencoder
from encoders import ConvolutionalEncoder
from bottlenecks import ConvolutionalBottleneck
from reconstructors import ConvolutionalDecoder
from visualization import spectrogram_to_waveform, compute_reconstruction_plot

In [4]:
from models import spectrogram_model

# 1. Data loading

In [5]:
data_path = 'data'
models_path = 'models'
experiment_name = 'dataset2filtered_b64_baseline_larger_l1'
results_path = os.path.join('results', experiment_name)
frame_step = 46
n_iter = 300
sampling_rate = 16000
n_batch = 10 # batch size
n_epochs = 10 # epochs for training
test_train_split = 0.8
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [8]:
# load data stream
stream = SpectrogramStream(root_path=data_path, batch_size=n_batch,test_train_split=test_train_split, val_train_split=0.0, subsets_to_load=['train','test'], sr=sampling_rate)

#load labels
label_df = pd.read_csv("data/labels.tsv", sep='\t').drop(columns="Unnamed: 0").set_index("sound_id")

#for w in range(len(stream.train_loader)) :#get training data (step by step)
    #data = next(iter(stream.train_loader))['sound'].unsqueeze(1).float()
    #spectrograms = data['sound'].unsqueeze(1).float().to(device)

Loading Spectrogram dataset...
Number of samples detected: 39718
Spectrogram dataset initialized.
Building Spectrogram dataloaders...
Building the data splitter...
Building the train data loader...
Train loader size: 3178
Building the test data loader...
Test loader size: 795
Spectrogram dataloaders built successfully


# 2. Model training

In [62]:
# instantiate model
model = spectrogram_model(2)
model.to(device)

# optimization and loss
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

In [63]:
# model training
for epoch in range(n_epochs):
    running_loss = 0
    for w in range(len(stream.train_loader)) :#get training data (step by step)
        data = next(iter(stream.train_loader))
        sound = data['sound'].unsqueeze(1).float().to(device)
        sound_id = data['sound_id']

        # target values for the chosen variable 
        var = "gender"
        target_mod = pd.DataFrame([],index=sound_id).merge(label_df[[var]], left_index=True, right_index=True)[var].values
        #target = torch.Tensor(np.zeros((10,3)))
        #for i in range(len(target_mod)):
        #    target[i][target_mod[i]] = 1
        target_mod = torch.Tensor(target_mod).long().to(device)
        
        # zero the parameter gradients
        optimizer.zero_grad()

        # compute output
        output = model(sound)

        # loss computation and propagation
        loss = criterion(output, target_mod)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        
        if w%500 == 0:
            print("Epoch N°", epoch, " batch n°", w, " running loss = ", running_loss/500)
            running_loss = 0
        

Epoch N° 0  batch n° 0  running loss =  0.0013955731391906737
Epoch N° 0  batch n° 500  running loss =  0.658361890733242
Epoch N° 0  batch n° 1000  running loss =  0.6515746988654136
Epoch N° 0  batch n° 1500  running loss =  0.658188049018383
Epoch N° 0  batch n° 2000  running loss =  0.6544179874658584
Epoch N° 0  batch n° 2500  running loss =  0.649261391222477
Epoch N° 0  batch n° 3000  running loss =  0.6392378455400467
Epoch N° 1  batch n° 0  running loss =  0.0018332420587539673
Epoch N° 1  batch n° 500  running loss =  0.615932634472847
Epoch N° 1  batch n° 1000  running loss =  0.5868706212639808
Epoch N° 1  batch n° 1500  running loss =  0.5745286088585854
Epoch N° 1  batch n° 2000  running loss =  0.5594303228855133
Epoch N° 1  batch n° 2500  running loss =  0.5520093460977078
Epoch N° 1  batch n° 3000  running loss =  0.5256716779470444
Epoch N° 2  batch n° 0  running loss =  0.0013226598501205444
Epoch N° 2  batch n° 500  running loss =  0.5139135766923427
Epoch N° 2  bat

In [64]:
save = True
if save :
    torch.save(model.state_dict(), "spectrogram_" + var + "_classifier.statedict")
else :
    x = torch.load("spectrogram_" + var +"_classifier.statedict")

# 3. Model Evaluation

## 1. Load Model

In [15]:
!ls

data				sound_id.pickle
latent_space_exploration.ipynb	spectrogram_age_classifier.statedict
models				spectrogram_classifier.ipynb
models.py			spectrogram_gender_classifier.statedict
projection.pickle		src
__pycache__			tsne_df.pickle
results


In [83]:
var = "gender" # "age", "accent"

In [11]:
# load the corresponding model
label_df[var + "_prediction"] = -1
model = spectrogram_model(2) # for gender : 2, for age : 3, for accent 15
model.load_state_dict(torch.load("models/spectrogram_" + var +"_classifier.statedict"))
device = "cpu"

# load data
stream = SpectrogramStream(root_path=data_path, batch_size=n_batch,test_train_split=test_train_split, val_train_split=0.0, subsets_to_load=['train','test'], sr=sampling_rate)

Loading Spectrogram dataset...
Number of samples detected: 39718
Spectrogram dataset initialized.
Building Spectrogram dataloaders...
Building the data splitter...
Building the train data loader...
Train loader size: 3178
Building the test data loader...
Test loader size: 795
Spectrogram dataloaders built successfully


## 2. Predict on test

In [85]:
# Make prediction to test the model

for w in range(len(stream.test_loader)) :#get test data (step by step)
    data = next(iter(stream.test_loader))
    sound = data['sound'].unsqueeze(1).float().to(device)
    sound_id = data['sound_id']

    # target values for the chosen variable 
    target_mod = pd.DataFrame([],index=sound_id).merge(label_df[[var]], left_index=True, right_index=True)[var].values
    #target = torch.Tensor(np.zeros((10,3)))
    #for i in range(len(target_mod)):
    #    target[i][target_mod[i]] = 1
    #target_mod = torch.Tensor(target_mod).long().to(device)

    # compute output
    output = model(sound)
    
    pred = torch.argmax(output, dim = 1)
    
    for i in range(len(sound_id)):
        label_df[var + "_prediction"].loc[label_df.index == sound_id[i]] = pred[i].item()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
Traceback (most recent call last):
  File "/usr/lib/python3.8/multiprocessing/queues.py", line 235, in _feed
    close()
  File "/usr/lib/python3.8/multiprocessing/connection.py", line 177, in close
    self._close()
  File "/usr/lib/python3.8/multiprocessing/connection.py", line 361, in _close
    _close(self._handle)
OSError: [Errno 9] Bad file descriptor


## 3. Analyze predictions

### 1. Gender prediction

In [86]:
label_df.gender_prediction.value_counts() # -1 values for prediction, correspond to the element of the training set (on which we do not predict)

-1    34707
 0     2998
 1     2013
Name: gender_prediction, dtype: int64

In [87]:
# confusion matrix
pd.crosstab(label_df.gender, label_df.gender_prediction).iloc[:,1:]

gender_prediction,0,1
gender,Unnamed: 1_level_1,Unnamed: 2_level_1
0,2779,400
1,219,1613


In [88]:
# accuracy
y_true = label_df.gender.loc[label_df.gender_prediction != -1]
y_pred = label_df.gender_prediction.loc[label_df.gender_prediction != -1]

accuracy_score(y_true, y_pred)

0.8764717621233287

### 2. Age prediction

In [75]:
label_df.age_prediction.value_counts() # -1 values for prediction, correspond to the element of the training set (on which we do not predict)

-1    34694
 1     4629
 0      395
Name: age_prediction, dtype: int64

In [76]:
# confusion matrix
pd.crosstab(label_df.age, label_df.age_prediction).iloc[:,1:]

age_prediction,0,1
age,Unnamed: 1_level_1,Unnamed: 2_level_1
0,213,1999
1,179,2558
2,3,72


In [78]:
# accuracy
y_true = label_df.age.loc[label_df.age_prediction != -1]
y_pred = label_df.age_prediction.loc[label_df.age_prediction != -1]

accuracy_score(y_true, y_pred) # accuracy is not very good (probably the age feature is irrelevant or not well defined)

0.5515525477707006

# 4. Predict on rigged data

In [2]:
var = "gender" # "age", "accent"

In [13]:
# load the corresponding model
label_df[var + "_prediction"] = -1
model = model = spectrogram_model(2) # for gender : 2, for age : 3, for accent 15
model.load_state_dict(torch.load("models/spectrogram_" + var +"_classifier.statedict"))
device = "cpu"