In [7]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score, f1_score
from sklearn.preprocessing import normalize
from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedKFold

In [4]:
import torch
from torch.utils.data import DataLoader
import os
import torch.nn as nn
import time
import copy
from torch.optim import lr_scheduler
from torch.utils.data import Dataset, random_split

In [2]:
feats_val = pd.read_csv('valence_features_200.csv', index_col=0)
feats_ar = pd.read_csv('arousal_features_200.csv', index_col=0)
feats_dom = pd.read_csv('dominance_features_200.csv', index_col=0)
feats_lik = pd.read_csv('liking_features_200.csv', index_col=0)

targets = pd.read_csv('za_klasifikaciju.csv', index_col=0)

In [None]:
targets = targets[['Valence', 'Arousal', 'Dominance', 'Liking']]
targets[targets < 4.5] = 0
targets[targets >= 4.5] = 1

from sklearn.model_selection import cross_val_score
from sklearn.model_selection import cross_validate

from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

features_std = scaler.fit_transform(features)

In [19]:
class EmotionsDataset(Dataset):

    def __init__(self, features, classes):
        #self.x = features.to_numpy()
        self.x = features
        self.y = classes.to_numpy()

    def __len__(self):
        return len(self.y)

    def __getitem__(self, idx):
        return [torch.from_numpy(self.x[idx,:]), torch.tensor(self.y[idx])]

In [20]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [38]:
torch.manual_seed(999)

def crossval_train(x, y, num_epochs, learning_rate, batch_size, num_classes, n_folds, verbose=True):

    skf = StratifiedKFold(n_splits=n_folds, random_state=999, shuffle=True)
    i_fold = 0
    best_acc = np.zeros(n_folds)
    best_f1 = np.zeros(n_folds)

    start = time.time()

    for train_index, test_index in skf.split(x, y):

        #x_tr, x_val = x.iloc[train_index, :], x.iloc[test_index, :]
        x_tr, x_val = x[train_index, :], x[test_index, :]
        y_tr, y_val = y.iloc[train_index], y.iloc[test_index]

        train_set = EmotionsDataset(x_tr, y_tr)
        val_set = EmotionsDataset(x_val, y_val)

        #labels_weights = [1, 0.5]
        #weights_train = [labels_weights[int(yy)] for yy in y_tr]
        #weights_val = [labels_weights[int(yy)] for yy in y_val]
        #sampler_train = torch.utils.data.sampler.WeightedRandomSampler(weights_train, len(weights_train))
        #sampler_val = torch.utils.data.sampler.WeightedRandomSampler(weights_val, len(weights_val))

        train_loader = DataLoader(train_set, batch_size)
        val_loader = DataLoader(val_set, batch_size)

        data_loader = {'train':train_loader, 'val':val_loader}
    
        model = GospodinKlasifikator(num_classes).to(device)

        criterion = nn.BCELoss()
        optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)
        scheduler = lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)


        running_loss = 0.0

        for epoch in range(num_epochs):

            for phase in ['train', 'val']:
                if phase == 'train':
                    model.train()
                else:
                    model.eval()

                running_loss = 0.0   
                running_tp = 0 
                running_acc = 0
                running_f1 = 0
        
                for i, (inputs, labels) in enumerate(data_loader[phase]):
                    inputs = inputs.to(device)
                    labels = labels.to(device)

                    with torch.set_grad_enabled(phase=='train'):
                        #forward
                        output = model(inputs.float())
                        preds = output[:, 1]
                        loss = criterion(preds.float(), labels.float()[:])

                        #backward
                        optimizer.zero_grad()
                        if phase == 'train':
                            loss.backward()
                            optimizer.step()
                
                
                    preds[preds<0.5] = 0
                    preds[preds>=0.5] = 1
                    running_loss += loss.item()*labels.shape[0]
                    running_acc += accuracy_score(labels, preds.detach())
                    running_f1 += f1_score(labels, preds.detach())

                if phase == 'train':
                    scheduler.step()

                epoch_loss = running_loss/len(data_loader[phase])
                epoch_acc = running_acc/len(data_loader[phase])
                epoch_f1 = running_f1/len(data_loader[phase])


            if phase == 'val' and epoch_acc>best_acc[i_fold]:
                best_acc[i_fold] = epoch_acc
                best_model = copy.deepcopy(model.state_dict())
            if phase == 'val' and epoch_f1>best_f1[i_fold]:
                best_f1[i_fold] = epoch_f1

        if verbose:
            print('-'*40)
            print('Best validation accuracy: {:.3f}, fold {}'.format(best_acc[i_fold], i_fold))
        i_fold += 1

    t = time.time()-start
    print('-'*40)
    print('Time to completion: {:.0f}min {:.0f}sec'.format(t//60, t%60))
    print('Crossvalidation accuracy: mean = {:.4f}, std = {:.4f}'.format(np.mean(best_acc), np.std(best_acc)))
    print('Crossvalidation f1 score: mean = {:.4f}, std = {:.4f}'.format(np.mean(best_f1), np.std(best_f1)))

    return best_model

In [37]:
class GospodinKlasifikator(nn.Module):

  def __init__(self, num_classes):
    super().__init__()

    self.layer1 = nn.Linear(data.shape[1], 60)
    self.layer2 = nn.Linear(60, 30)
    self.relu = nn.LeakyReLU()
    self.bn12 = nn.BatchNorm1d(60)
    self.lastlayer = nn.Linear(30, num_classes)
    self.dropout = nn.Dropout(0.5)

  def forward(self, input):

    out1 = self.layer1(input)
    
    out = self.relu(out1)
    out = self.bn12(out)
    out = self.dropout(out)
    out = self.layer2(out)
    
    
    out = self.relu(out)
   # out = self.dropout(out)
    #out = out.reshape(out.size(0), -1)
    out = self.lastlayer(out)
    out = torch.sigmoid(out)

    return out

In [26]:
num_epochs = 50
num_classes = 2
batch_size = 64
learning_rate = 0.02
n_folds = 7

# Modelling

In [39]:
for c in ['Valence', 'Arousal', 'Dominance', 'Liking']:
    
    if c == 'Valence':
        data = normalize(feats_val, axis=0)
    elif c == 'Arousal':
        data = normalize(feats_ar, axis=0)
    elif c == 'Dominance':
        data = normalize(feats_dom, axis=0)
    elif c == 'Liking':
        data = normalize(feats_lik, axis=0)

    x_train, x_test, y_train, y_test = train_test_split(data, targets[c], test_size=0.2)
    best_model = crossval_train(x_train, y_train, num_epochs, learning_rate, batch_size, num_classes, n_folds, False)
    

----------------------------------------
Time to completion: 0min 38sec
Crossvalidation accuracy: mean = 0.6431, std = 0.0248
Crossvalidation f1 score: mean = 0.7791, std = 0.0198
----------------------------------------
Time to completion: 0min 40sec
Crossvalidation accuracy: mean = 0.6457, std = 0.0129
Crossvalidation f1 score: mean = 0.7832, std = 0.0100
----------------------------------------
Time to completion: 0min 39sec
Crossvalidation accuracy: mean = 0.6597, std = 0.0210
Crossvalidation f1 score: mean = 0.7928, std = 0.0162
----------------------------------------
Time to completion: 0min 38sec
Crossvalidation accuracy: mean = 0.7116, std = 0.0087
Crossvalidation f1 score: mean = 0.8249, std = 0.0076
