In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score, f1_score
from sklearn.model_selection import train_test_split
import catboost

In [2]:
feats_val = pd.read_csv('valence_features_200.csv', index_col=0)
feats_ar = pd.read_csv('arousal_features_200.csv', index_col=0)
feats_dom = pd.read_csv('dominance_features_200.csv', index_col=0)
feats_lik = pd.read_csv('liking_features_200.csv', index_col=0)

targets = pd.read_csv('za_klasifikaciju.csv', index_col=0)

In [None]:
targets = targets[['Valence', 'Arousal', 'Dominance', 'Liking']]
targets[targets < 4.5] = 0
targets[targets >= 4.5] = 1

In [10]:
targets.sample(15)

Unnamed: 0_level_0,Valence,Arousal,Dominance,Liking
Participant_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
6,0.0,1.0,0.0,1.0
12,1.0,0.0,0.0,1.0
29,1.0,1.0,1.0,1.0
27,1.0,0.0,1.0,1.0
2,1.0,0.0,0.0,1.0
7,1.0,1.0,0.0,1.0
8,0.0,1.0,0.0,1.0
22,0.0,1.0,1.0,0.0
24,0.0,1.0,0.0,1.0
20,0.0,1.0,0.0,0.0


# Modeling

In [83]:
cat_params = {
    'loss_function':'Logloss',
    'eval_metric':'F1',
    'learning_rate':0.001,
    'depth':5,
    'subsample': 0.8
}

for c in ['Valence', 'Arousal', 'Dominance', 'Liking']:

    if c == 'Valence':
        data = feats_val
    elif c == 'Arousal':
        data = feats_ar
    elif c == 'Dominance':
        data = feats_dom
    elif c == 'Liking':
        data = feats_lik
    cat_crossval = catboost.Pool(data=data, label=targets[c])

    cat_cv = catboost.cv(pool=cat_crossval,
                        params=cat_params,
                        num_boost_round=5000,
                        nfold=10,
                        verbose_eval=0,
                        early_stopping_rounds=15
                        )
    ind_max = np.argmax(cat_cv['test-F1-mean'])
    print(f'{c}: test F1 mean = {cat_cv.loc[ind_max, "test-F1-mean"]}, std = {cat_cv.loc[ind_max, "test-F1-std"]}')
    

Stopped by overfitting detector  (15 iterations wait)
Valence: test F1 mean = 0.7739468566013188, std = 0.0015963747852691674
Stopped by overfitting detector  (15 iterations wait)
Arousal: test F1 mean = 0.779790896307484, std = 0.0015909335084335135
Stopped by overfitting detector  (15 iterations wait)
Dominance: test F1 mean = 0.8015023805325339, std = 0.0014764262294698824
Stopped by overfitting detector  (15 iterations wait)
Liking: test F1 mean = 0.8202764976958525, std = 0.0


In [84]:
cat_params = {
    'loss_function':'Logloss',
    'eval_metric':'Accuracy',
    'learning_rate':0.001,
    'depth':5,
    'subsample': 0.8
}

for c in ['Valence', 'Arousal', 'Dominance', 'Liking']:

    if c == 'Valence':
        data = feats_val
    elif c == 'Arousal':
        data = feats_ar
    elif c == 'Dominance':
        data = feats_dom
    elif c == 'Liking':
        data = feats_lik
    cat_crossval = catboost.Pool(data=data, label=targets[c])

    cat_cv = catboost.cv(pool=cat_crossval,
                        params=cat_params,
                        num_boost_round=5000,
                        nfold=10,
                        verbose_eval=0,
                        early_stopping_rounds=15
                        )
    ind_max = np.argmax(cat_cv['test-Accuracy-mean'])
    print(f'{c}: test Accuracy mean = {cat_cv.loc[ind_max, "test-Accuracy-mean"]}, std = {cat_cv.loc[ind_max, "test-Accuracy-std"]}')

Stopped by overfitting detector  (15 iterations wait)
Valence: test Accuracy mean = 0.6312531473173412, std = 0.0042505079530956325
Stopped by overfitting detector  (15 iterations wait)
Arousal: test Accuracy mean = 0.6390658380638466, std = 0.002135124947667429
Stopped by overfitting detector  (15 iterations wait)
Dominance: test Accuracy mean = 0.6687582020997374, std = 0.0020565310114929653
Stopped by overfitting detector  (15 iterations wait)
Liking: test Accuracy mean = 0.6953125, std = 0.0
