In [1]:
import matplotlib.pyplot as plt
import kaldi_io
import numpy as np
from glob import glob 
import collections
from sklearn import (manifold, datasets, decomposition, ensemble,
                     discriminant_analysis, random_projection)
import re
import os

In [2]:
import torch.optim as optim
import torch.utils.data as utils
import torch.nn.functional as F
import torch.nn as nn
import torch

In [3]:
from sklearn import metrics
from sklearn.cluster import KMeans
from sklearn.datasets import load_digits
from sklearn.decomposition import PCA
from sklearn.preprocessing import scale
from sklearn.model_selection import train_test_split
from sklearn.mixture import GaussianMixture
from sklearn.mixture import BayesianGaussianMixture

In [4]:
emo_map={'Angry':0, 'Fear':1,'Happy':2, 'Neutral':3,'Sad':4, 'Surprise':5}

In [5]:
csv_dir="/home/gyzhang/projects/unsupervised_acoustic_clustring/mix_csv/"
feas_list=[]
emo_id_list=[]
for csv_path in glob(csv_dir+'/*.csv'):
    csv_path_base = os.path.basename(csv_path)
    emotion_id = re.split('\-',csv_path_base)[1]
    emo_id_list.append(emo_map[emotion_id])
    with open(csv_path,'rb') as file_id:
        csv_lines=file_id.readlines()
    fea=csv_lines[-1]
    num_fea=re.split("\,",fea.decode('utf-8'))
    feas_list.append(num_fea[1:-1])

### normal training

In [6]:
X=np.array(feas_list,dtype=np.float32)
y=np.array(emo_id_list,np.int32)

In [9]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.1,random_state=1234,shuffle=True)

In [10]:
mean = np.mean(X_train,axis=0)
std = np.std(X_train,axis=0)
X_train = (X_train - mean)/(std+1e-18)
X_test = (X_test - mean)/(std+1e-18)

In [13]:
y_train=np.expand_dims(y_train,axis=1)
y_test = np.expand_dims(y_test,axis=1)

In [14]:
train_dataloader = utils.DataLoader(utils.TensorDataset(torch.from_numpy(X_train),torch.from_numpy(y_train)),batch_size=32,shuffle=True)

In [16]:
TX_test = torch.from_numpy(X_test)
Ty_test = torch.from_numpy(y_test).type(torch.LongTensor)
Ty_test=Ty_test.view(-1)

In [17]:
class EmoClass(torch.nn.Module):
    def __init__(self,):
        super(EmoClass, self).__init__()
        self.first_linear = nn.Linear(384,100)
        self.bn1 = nn.BatchNorm1d(num_features=100)
        self.hidden_layers = nn.ModuleList([nn.Linear(100,100) for _ in range(2)])
        self.bns = nn.ModuleList([nn.BatchNorm1d(num_features=100) for _ in range(2)]) 
        self.last_linear = nn.Linear(100, 6)
    def forward(self, X):
        # output of first layer 100 dims
        h_o1 = self.bn1(self.first_linear(X))
        x = F.relu(h_o1)
        for bn, hl in zip(self.bns, self.hidden_layers):
            h_o = bn(hl(x))
            x = F.relu(h_o)
        x = self.last_linear(x)
        return h_o1, h_o, x

In [18]:
net = EmoClass()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(),lr=0.001,momentum=0.9)

In [19]:
for epoch in range(15):
    running_loss = 0.0
    for i, data in enumerate(train_dataloader,0):
        X_batch, labels = data
        labels = torch.squeeze(labels).type(torch.LongTensor)
        optimizer.zero_grad()
        out_data = net(X_batch)
        _, _, outputs = out_data
        loss = criterion(outputs,labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        if i%10 == 9:  
            T_output_datas = net(TX_test)
            _,_,T_outputs = T_output_datas
            test_loss = criterion(T_outputs,Ty_test)
            print('[%d, %5d] train loss:%.3f test loss:%.3f ' % (epoch+1,i+1,running_loss/10,test_loss))
            running_loss = 0.0

[1,    10] train loss:1.820 test loss:1.777 
[1,    20] train loss:1.735 test loss:1.735 
[1,    30] train loss:1.727 test loss:1.690 
[1,    40] train loss:1.665 test loss:1.650 
[1,    50] train loss:1.638 test loss:1.612 
[1,    60] train loss:1.608 test loss:1.573 
[1,    70] train loss:1.543 test loss:1.537 
[1,    80] train loss:1.549 test loss:1.501 
[1,    90] train loss:1.495 test loss:1.466 
[1,   100] train loss:1.447 test loss:1.432 
[2,    10] train loss:1.392 test loss:1.391 
[2,    20] train loss:1.353 test loss:1.360 
[2,    30] train loss:1.334 test loss:1.331 
[2,    40] train loss:1.300 test loss:1.301 
[2,    50] train loss:1.245 test loss:1.269 
[2,    60] train loss:1.244 test loss:1.238 
[2,    70] train loss:1.227 test loss:1.209 
[2,    80] train loss:1.190 test loss:1.181 
[2,    90] train loss:1.158 test loss:1.155 
[2,   100] train loss:1.182 test loss:1.128 
[3,    10] train loss:1.064 test loss:1.098 
[3,    20] train loss:1.056 test loss:1.070 
[3,    30]

#### Evaluation on test set

In [20]:
_,_,outputs = net(TX_test)
_, predicted = torch.max(outputs, 1)

In [21]:
correct = 0 
correct += (predicted == Ty_test).sum().item()

In [22]:
correct/len(Ty_test)

0.8083333333333333

#### Using intermediate features for clustering

In [23]:
csv_dir="/home/gyzhang/projects/unsupervised_acoustic_clustring/wz_csv/"
feas_list=[]
emo_id_list=[]
for csv_path in glob(csv_dir+'/*.csv'):
    csv_path_base = os.path.basename(csv_path)
    emotion_id = re.split('\-',csv_path_base)[1]
    emo_id_list.append(emo_map[emotion_id])
    with open(csv_path,'rb') as file_id:
        csv_lines=file_id.readlines()
    fea=csv_lines[-1]
    num_fea=re.split("\,",fea.decode('utf-8'))
    feas_list.append(num_fea[1:-1])

In [24]:
X=np.array(feas_list,dtype=np.float32)
y=np.array(emo_id_list,np.int32)
X_norm=scale(X)

TX_test = torch.from_numpy(X_norm)
Ty_test = torch.from_numpy(y).type(torch.LongTensor)

approx,bottle,outputs = net(TX_test)

approx_numpy = approx.data.numpy()
bottle_numpy = bottle.data.numpy()
outputs_numpy = outputs.data.numpy()



##### evaluate kmeans

In [28]:
score_list=[]
for i in range(10):
    est=KMeans(init='k-means++', n_clusters=6, n_init=30)
    est.fit(outputs_numpy)
    # STEP 3：evaluations
    score=metrics.adjusted_rand_score(y, est.labels_)
    score_list.append(score)
print(score_list)
print(np.mean(np.array(score_list)))

[0.14661572073942009, 0.14661572073942009, 0.14657261616934184, 0.14657261616934184, 0.14661572073942009, 0.14657261616934184, 0.14661572073942009, 0.14661572073942009, 0.14661572073942009, 0.14661572073942009]
0.1466027893683966


In [32]:
for c_data in [X_norm, approx_numpy,bottle_numpy,outputs_numpy]:
    score_list=[]
    for i in range(1):
        estimator = GaussianMixture(n_components=6,
              covariance_type='full', max_iter=30, random_state=117,n_init=10,reg_covar=1e-6)
    
        estimator.fit(c_data)
        y_train_pred = estimator.predict(c_data)
        # STEP 3：evaluations
        score=metrics.adjusted_rand_score(y, y_train_pred)
        score_list.append(score)
    print(score_list)
    print(np.mean(np.array(score_list)))
    print("<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<")


[0.23152439184088183]
0.23152439184088183
<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
[0.3179182586963861]
0.3179182586963861
<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
[0.17872446834740202]
0.17872446834740202
<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
[0.14499576315057214]
0.14499576315057214
<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
