In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [45]:
features = pd.read_csv("valence_features_100.csv", index_col=0)
targets = pd.read_csv("valence_targets.csv", index_col=0)

from sklearn.model_selection import cross_val_score
from sklearn.model_selection import cross_validate

In [20]:
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score

# Standardizacija

from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

features_std = scaler.fit_transform(features)

In [23]:
from sklearn.preprocessing import normalize

In [24]:
features_norm = normalize(features, axis=0)

In [25]:
from sklearn.model_selection import train_test_split

In [26]:
#x_train_std, x_test_std, y_train, y_test = train_test_split(features_std, diabetes, test_size=0.1)
x_train, x_test, y_train, y_test = train_test_split(features_norm, targets, test_size=0.1, stratify=targets)

# Torch

In [11]:
import torch
from torch.utils.data import DataLoader
import os
import torch.nn as nn
import time
import copy
from torch.optim import lr_scheduler

In [12]:
from torch.utils.data import Dataset, random_split

In [56]:
class EmotionsDataset(Dataset):

    def __init__(self, features, classes):
        #self.x = features.to_numpy()
        self.x = features
        self.y = classes.to_numpy()

    def __len__(self):
        return len(self.y)

    def __getitem__(self, idx):
        return [torch.from_numpy(self.x[idx,:]), torch.from_numpy(self.y[idx])]

In [57]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [58]:
from sklearn.model_selection import StratifiedKFold
torch.manual_seed(999)

def crossval_train(x, y, num_epochs, learning_rate, batch_size, num_classes, n_folds):

    skf = StratifiedKFold(n_splits=n_folds, random_state=999, shuffle=True)
    i_fold = 0
    best_acc = np.zeros(n_folds)

    start = time.time()

    for train_index, test_index in skf.split(x, y):

        #x_tr, x_val = x.iloc[train_index, :], x.iloc[test_index, :]
        x_tr, x_val = x[train_index, :], x[test_index, :]
        y_tr, y_val = y.iloc[train_index], y.iloc[test_index]

        train_set = EmotionsDataset(x_tr, y_tr)
        val_set = EmotionsDataset(x_val, y_val)

        #labels_weights = [1, 0.5]
        #weights_train = [labels_weights[int(yy)] for yy in y_tr]
        #weights_val = [labels_weights[int(yy)] for yy in y_val]
        #sampler_train = torch.utils.data.sampler.WeightedRandomSampler(weights_train, len(weights_train))
        #sampler_val = torch.utils.data.sampler.WeightedRandomSampler(weights_val, len(weights_val))

        train_loader = DataLoader(train_set, batch_size)
        val_loader = DataLoader(val_set, batch_size)

        data_loader = {'train':train_loader, 'val':val_loader}
    
        model = GospodinKlasifikator(num_classes).to(device)

        criterion = nn.BCELoss()
        optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)
        scheduler = lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)


        running_loss = 0.0

        for epoch in range(num_epochs):

            for phase in ['train', 'val']:
                if phase == 'train':
                    model.train()
                else:
                    model.eval()

                running_loss = 0.0   
                running_tp = 0 
                running_acc = 0
        
                for i, (inputs, labels) in enumerate(data_loader[phase]):
                    inputs = inputs.to(device)
                    labels = labels.to(device)

                    with torch.set_grad_enabled(phase=='train'):
                        #forward
                        output = model(inputs.float())
                        preds = output[:, 1]
                        loss = criterion(preds.float(), labels.float()[:, 0])

                        #backward
                        optimizer.zero_grad()
                        if phase == 'train':
                            loss.backward()
                            optimizer.step()
                
                
                    preds[preds<0.5] = 0
                    preds[preds>=0.5] = 1
                    running_loss += loss.item()*labels.shape[0]
                    running_acc += accuracy_score(labels, preds.detach())

                if phase == 'train':
                    scheduler.step()

                epoch_loss = running_loss/len(data_loader[phase])
                epoch_acc = running_acc/len(data_loader[phase])


            if phase == 'val' and epoch_acc>best_acc[i_fold]:
                best_acc[i_fold] = epoch_acc
                best_model = copy.deepcopy(model.state_dict())

        
        print('-'*40)
        print('Best validation accuracy: {:.3f}, fold {}'.format(best_acc[i_fold], i_fold))
        i_fold += 1

    t = time.time()-start
    print('-'*40)
    print('Time to completion: {:.0f}min {:.0f}sec'.format(t//60, t%60))
    print('Crossvalidation accuracy: mean = {:.4f}, std = {:.4f}'.format(np.mean(best_acc), np.std(best_acc)))

    return best_model

In [92]:
class GospodinKlasifikator(nn.Module):

  def __init__(self, num_classes):
    super().__init__()

    self.layer1 = nn.Linear(98, 60)
    self.layer2 = nn.Linear(60, 30)
    self.relu = nn.LeakyReLU()
    self.bn12 = nn.BatchNorm1d(60)
    self.lastlayer = nn.Linear(30, num_classes)
    self.dropout = nn.Dropout(0.5)

  def forward(self, input):

    out1 = self.layer1(input)
    
    out = self.relu(out1)
    out = self.bn12(out)
    out = self.dropout(out)
    out = self.layer2(out)
    
    
    out = self.relu(out)
   # out = self.dropout(out)
    #out = out.reshape(out.size(0), -1)
    out = self.lastlayer(out)
    out = torch.sigmoid(out)

    return out

In [93]:
num_epochs = 50
num_classes = 2
batch_size = 64
learning_rate = 0.02
n_folds = 7

In [94]:
best_model = crossval_train(x_train, y_train, num_epochs, learning_rate, batch_size, num_classes, n_folds)

----------------------------------------
Best validation accuracy: 0.659, fold 0
----------------------------------------
Best validation accuracy: 0.627, fold 1
----------------------------------------
Best validation accuracy: 0.639, fold 2
----------------------------------------
Best validation accuracy: 0.625, fold 3
----------------------------------------
Best validation accuracy: 0.639, fold 4
----------------------------------------
Best validation accuracy: 0.611, fold 5
----------------------------------------
Best validation accuracy: 0.653, fold 6
----------------------------------------
Time to completion: 0min 42sec
Crossvalidation accuracy: mean = 0.6361, std = 0.0156


In [95]:
model = GospodinKlasifikator(num_classes).to(device)
model.eval()
model.load_state_dict(best_model)

#predicted = model(torch.from_numpy(x_test.to_numpy()).float())
predicted = model(torch.from_numpy(x_test).float())

In [96]:
y_pred = np.array(predicted[:, 1].detach())
y_pred[y_pred<0.5] = 0
y_pred[y_pred>=0.5] = 1

In [97]:
print("Test set accuracy: {:.3f}".format(accuracy_score(y_test, y_pred)))
print("Test set F1 score: {:.3f}".format(f1_score(y_test, y_pred)))

Test set accuracy: 0.641
Test set F1 score: 0.772


In [98]:
y_pred

array([1., 1., 1., 1., 0., 1., 1., 1., 1., 1., 1., 1., 0., 1., 1., 1., 1.,
       1., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 0., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 0., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1.], dtype=float32)

In [99]:
from sklearn.metrics import confusion_matrix

In [100]:
print(confusion_matrix(y_test, y_pred))

[[ 4 43]
 [ 3 78]]
