In [41]:
import torch
import numpy as np
import pandas as pd
import os
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import StratifiedKFold
from sklearn.svm import SVC
import lightgbm as lgb

In [93]:

path = './data_csv/arousal'
partition = ['train', 'test']
feature_set = ['vggface']
valid = pd.read_csv(os.path.join(path, feature_set[0] +'_'+ partition[0] +'.csv'))
test = pd.read_csv(os.path.join(path, feature_set[0] +'_'+ partition[1] + '.csv'))

In [94]:
X_valid = valid.loc[:,'segment_id':'511']
y_valid = valid['class_id'].apply(lambda x : 0)
X_test = test.loc[:,'segment_id':'511']
y_test = test.rename(columns = {'id':'class_id'})
y_test = y_test['class_id'].apply(lambda x : 1)

In [95]:
X_all = X_valid.append(X_test).reset_index(drop=True)
y_all = y_valid.append(y_test).reset_index(drop=True)

In [96]:
X_valid.shape, X_test.shape

((4313, 513), (1260, 513))

In [97]:

np.random.seed(random_state)

lgb_params = {
    'boosting_type': 'gbdt',
    'objective': 'binary',
    'metric': 'auc',
    'verbose': 1,
    'learning_rate': 0.05,
    'num_leaves': 31,
    'feature_fraction': 0.7,
    'min_data_in_leaf': 200,
    'bagging_fraction': 0.8,
    'bagging_freq': 20,
    'min_hessian': 0.01,
    'feature_fraction_seed': 2,
    'bagging_seed': 3,
    "seed": random_state
}

In [98]:
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=random_state)
for fold, (trn_idx, val_idx) in enumerate(skf.split(X_all, y_all)):
    X_train, y_train = X_all.iloc[trn_idx], y_all.iloc[trn_idx]
    X_valid, y_valid = X_all.iloc[val_idx], y_all.iloc[val_idx]
    train_data = lgb.Dataset(X_train, label=y_train)
    valid_data = lgb.Dataset(X_valid, label=y_valid)

    evals_result = {}
    lgb_clf = lgb.train(lgb_params,
                        train_data,
                        7500,
                        valid_sets=valid_data,
                        early_stopping_rounds=100,
                        verbose_eval=50,
                        evals_result=evals_result)
    
    pred = lgb_clf.predict(X_valid, num_iteration=lgb_clf.best_iteration)
#     model = SVC(C=10000, gamma='scale')
#     model.fit(X_train,y_train)
#     pred = model.predict(X_valid)
    acc = roc_auc_score(y_valid, pred)
    print("Fold_{}_ROC = {}".format(fold, acc))
    

Training until validation scores don't improve for 100 rounds
[50]	valid_0's auc: 0.951827
[100]	valid_0's auc: 0.959911
[150]	valid_0's auc: 0.964021
[200]	valid_0's auc: 0.965539
[250]	valid_0's auc: 0.965794
[300]	valid_0's auc: 0.967438
[350]	valid_0's auc: 0.968417
[400]	valid_0's auc: 0.968884
[450]	valid_0's auc: 0.969293
[500]	valid_0's auc: 0.969293
[550]	valid_0's auc: 0.969923
[600]	valid_0's auc: 0.970119
[650]	valid_0's auc: 0.969863
[700]	valid_0's auc: 0.970422
[750]	valid_0's auc: 0.970169
Early stopping, best iteration is:
[681]	valid_0's auc: 0.970477
Fold_0_ROC = 0.9704772020820689
Training until validation scores don't improve for 100 rounds
[50]	valid_0's auc: 0.947971
[100]	valid_0's auc: 0.95766
[150]	valid_0's auc: 0.96381
[200]	valid_0's auc: 0.966679
[250]	valid_0's auc: 0.969381
[300]	valid_0's auc: 0.971815
[350]	valid_0's auc: 0.971995
[400]	valid_0's auc: 0.972533
[450]	valid_0's auc: 0.97282
[500]	valid_0's auc: 0.972264
Early stopping, best iteration is: