In [1]:
%load_ext autoreload
%autoreload 2
%pylab inline
import pandas as pd
import os

Populating the interactive namespace from numpy and matplotlib


In [2]:
import itertools
import pickle
from dnn.parser import get_sv_parser
from dnn.data.dataloader import init_default_loaders, init_embed_loaders
from dnn.train.model import init_seed, init_speechnet
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA

In [3]:
from tqdm import tqdm_notebook
import torch
from torch.autograd import Variable

def embeds(opt, val_dataloader, model):
    val_iter = iter(val_dataloader)
    model.eval()
    splice_dim = opt.splice_frames
    embeddings = []
    labels = []
    for batch in tqdm_notebook(val_iter):
        x, y = batch
        time_dim = x.size(2)
        split_points = range(0, time_dim-splice_dim+1, splice_dim)
        model_outputs = []
        for point in split_points:
            x_in = Variable(x.narrow(2, point, splice_dim))
            if opt.cuda:
                x_in = x_in.cuda()
            model_outputs.append(model.embed(x_in).cpu().data)
            
        # snippet scale LDA
        model_output = torch.cat(model_outputs, dim=0)
        y = torch.cat([y]*len(model_outputs), dim=0)
        
        ## uttrs scale LDA, it's better now
#         model_output = torch.stack(model_outputs, dim=0)
#         model_output = model_output.mean(0)

        embeddings.append(model_output.numpy())
        labels.append(y.numpy())
    embeddings = np.vstack(embeddings)
    labels = np.hstack(labels)
    return embeddings, labels

In [4]:
import resource
rlimit = resource.getrlimit(resource.RLIMIT_NOFILE)
resource.setrlimit(resource.RLIMIT_NOFILE, (4096, rlimit[1]))

In [5]:
def secToSample(sec):
    return int(16000 * sec)

In [6]:
options = get_sv_parser().parse_args(args=[])

options.n_dct_filters = 40
options.n_mels = 40
options.timeshift_ms = 100
options.window_size= 0.025
options.window_stride= 0.010
options.cache_size = 32768
options.input_format = "mfcc"
options.input_clip = True
options.input_length = secToSample(3)
options.splice_frames = secToSample(0.1)//160+1

### Load Dataset

In [7]:
options.data_folder = "/home/muncok/DL/dataset/SV_sets/reddots_r2015q4_v1/wav/"
val_df = pd.read_pickle("trials/reddots/m_part1/m_part1_files.pkl")
val_df['spk'] = val_df.file.apply(lambda x: x.split('/')[0])
reddots_spks = val_df.spk.unique().tolist()
val_df['label'] = val_df.spk.apply(lambda x: reddots_spks.index(x))
val_df.to_pickle("trials/reddots/m_part1/m_part1_files.pkl")

In [8]:
options.data_folder = "/home/muncok/DL/dataset/SV_sets/speech_commands/"
val_df = pd.read_pickle("trials/commands/final/equal_num_102spk_iden.pkl")
val_df['file'] = val_df.apply(lambda x: os.path.join(x.sent, x.file), axis=1)

In [9]:
# valid_spks = val_df.spk.value_counts()[val_df.spk.value_counts() > 10].index
# val_df_valid = val_df[val_df.spk.isin(valid_spks)]
# len(val_df)

### Load Model

In [10]:
# options.val_manifest = "manifests/commands/words/si/si_uni_manifest.csv"
options.input_format = 'fbank'
options.input = "models/commands/equal_num_102spk_dot1.pt"
options.model = "SimpleCNN"
model = init_speechnet(options)

models/commands/equal_num_102spk_dot1.pt is loaded


In [11]:
options.input_format = 'mfcc'
options.input = "models/voxc/si_train/full_train/si_voxc_res15_0.1s_full_1.pt"
options.model = "res15"
model = init_speechnet(options)

models/voxc/si_train/full_train/si_voxc_res15_0.1s_full_1.pt is loaded


### Embedings

In [12]:
val_dataloader = init_embed_loaders(options, val_df)
embedings, labels = embeds(options, val_dataloader, model) # embeddings: sample x emb_size




### LDA Estimator

In [13]:
n_test = 100 # for test samples

In [14]:
n_samples = embedings.shape[0]
clf = LDA()
random_idx = np.random.permutation(np.arange(0,n_samples))
train_X, train_y = embedings[random_idx[:n_samples-n_test]], labels[random_idx[:n_samples-n_test]]
test_X, test_y = embedings[random_idx[-n_test:]], labels[random_idx[-n_test:]]
clf.fit(train_X, train_y)

LinearDiscriminantAnalysis(n_components=None, priors=None, shrinkage=None,
              solver='svd', store_covariance=False, tol=0.0001)

In [15]:
score = clf.score(test_X, test_y)
print(score) # test_score

0.1


In [16]:
lda_out = "models/lda/{}_splice_lda.pkl".format(options.input.split('/')[-1][:-3])
pickle.dump(clf, open(lda_out, "wb"))