In [1]:
import optuna
import pandas as pd
import numpy as np
import os
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC, LinearSVC
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.metrics import accuracy_score, f1_score, mean_absolute_error, recall_score

In [2]:
def dataset(partition, feature_set):
    train = pd.read_csv(os.path.join('./data_csv/',partition,feature_set+'_train.csv'))
    valid = pd.read_csv(os.path.join('./data_csv/',partition,feature_set+'_devel.csv'))
    return train, valid

In [3]:
def f1(y_true, y_pred):
    return round(f1_score(y_true, y_pred, average='micro') * 100, 3)

def uar(y_true, y_pred):
    return round(recall_score(y_true, y_pred, average='macro') * 100, 3)

def combine(y_true, y_pred):
    return round((0.66 * f1(y_true, y_pred) + 0.34 * uar(y_true, y_pred)), 3)

In [4]:
def fast_method(partition, feature_set, last_columns):
    X_train, y_train = train.loc[:,'segment_id':'{}'.format(last_columns)], train['class_id']
    X_valid, y_valid = valid.loc[:,'segment_id':'{}'.format(last_columns)], valid['class_id']
    def objective(trial):

        classifier_name = trial.suggest_categorical('classifier', ['GaussianNB','AdaBoostClassifier','QuadraticDiscriminantAnalysis'])

        if classifier_name == 'AdaBoostClassifier':
            classifier_obj = AdaBoostClassifier()

        elif classifier_name == 'QuadraticDiscriminantAnalysis':
            classifier_obj = QuadraticDiscriminantAnalysis()

        elif classifier_name == 'GaussianNB':
            classifier_obj = GaussianNB()

        classifier_obj.fit(X_train, y_train)
        pred = classifier_obj.predict(X_valid)

        accuracy = combine(y_valid, pred)
        return accuracy

    study = optuna.create_study(direction='maximize')
    study.optimize(objective, n_trials=10)
    trial = study.best_trial
    print('Accuracy: {}'.format(trial.value))
    print('Parameter: {}'.format(trial.params))
    
    df = pd.DataFrame([[partition, feature_set, trial.value, trial.params]], columns=['patition', 'feature_set', 'value', 'params'])
    if not os.path.exists('./optimize'):
        os.makedirs('./optimize')
    if os.path.exists('./optimize/optimize_{}.csv'.format(feature_set)):
        df2 = pd.read_csv('./optimize/optimize_{}.csv'.format(feature_set))
        df2 = df2.append(df, ignore_index = True)
        df2.to_csv('./optimize/optimize_{}.csv'.format(feature_set), index=False)
    else:
        df.to_csv('./optimize/optimize_{}.csv'.format(feature_set), index=False)

In [5]:
def slow_method(partition, feature_set, last_columns):
    X_train, y_train = train.loc[:,'segment_id':'{}'.format(last_columns)], train['class_id']
    X_valid, y_valid = valid.loc[:,'segment_id':'{}'.format(last_columns)], valid['class_id']
    
    def objective(trial):

        classifier_name = trial.suggest_categorical('classifier', ['SVC'])

        if classifier_name == 'SVC':
            svc_c = trial.suggest_loguniform('svc_c', 1e-10, 1000)
            classifier_obj = SVC(C=svc_c, gamma='auto')

        elif classifier_name == 'RandomForest':
            rf_max_depth = int(trial.suggest_loguniform('rf_max_depth', 2, 32))
            classifier_obj = RandomForestClassifier(max_depth=rf_max_depth, n_estimators=10)

        elif classifier_name == 'KNeighborsClassifier':
            n_neighbors = trial.suggest_discrete_uniform('n_neighbors', 1, 1000,1)
            classifier_obj = KNeighborsClassifier(n_neighbors=int(n_neighbors))

        elif classifier_name == 'DecisionTreeClassifier':
            max_depth = trial.suggest_discrete_uniform('max_depth',1,100,1)
            classifier_obj = DecisionTreeClassifier(max_depth=max_depth)

        classifier_obj.fit(X_train, y_train)
        pred = classifier_obj.predict(X_valid)

        accuracy = combine(y_valid, pred)

        return accuracy


    study = optuna.create_study(direction='maximize')
    study.optimize(objective, n_trials=1000)

    trial = study.best_trial
    print('Accuracy: {}'.format(trial.value))
    print('Parameter: {}'.format(trial.params))
    
    df = pd.DataFrame([[partition, feature_set, trial.value, trial.params]], columns=['patition', 'feature_set', 'value', 'params'])
    if not os.path.exists('./optimize'):
        os.makedirs('./optimize')
    if os.path.exists('./optimize/optimize_{}.csv'.format(feature_set)):
        df2 = pd.read_csv('./optimize/optimize_{}.csv'.format(feature_set))
        df2 = df2.append(df, ignore_index = True)
        df2.to_csv('./optimize/optimize_{}.csv'.format(feature_set), index=False)
    else:
        df.to_csv('./optimize/optimize_{}.csv'.format(feature_set), index=False)

In [6]:
maps = {
         'vggface': 512,
}

for partition in ['arousal','valence']:
    for feature_set, num_columns in maps.items():
        train, valid = dataset(partition, feature_set)
#         fast_method(partition, feature_set, num_columns-1)
        slow_method(partition, feature_set, num_columns-1)

[I 2020-07-17 18:14:44,941] Finished trial#0 with value: 42.672 with parameters: {'classifier': 'LinearSVC', 'svc_c': 0.0002504532000645177}. Best is trial#0 with value: 42.672.
[I 2020-07-17 18:14:56,366] Finished trial#1 with value: 41.244 with parameters: {'classifier': 'LinearSVC', 'svc_c': 0.22941345301283592}. Best is trial#0 with value: 42.672.
[I 2020-07-17 18:15:07,595] Finished trial#2 with value: 42.131 with parameters: {'classifier': 'LinearSVC', 'svc_c': 0.007750716712714445}. Best is trial#0 with value: 42.672.
[I 2020-07-17 18:15:07,784] Finished trial#3 with value: 47.64 with parameters: {'classifier': 'LinearSVC', 'svc_c': 5.836646920550962e-07}. Best is trial#3 with value: 47.64.
[I 2020-07-17 18:15:16,839] Finished trial#4 with value: 42.201 with parameters: {'classifier': 'LinearSVC', 'svc_c': 0.0016561647652487902}. Best is trial#3 with value: 47.64.
[I 2020-07-17 18:15:27,515] Finished trial#5 with value: 42.204 with parameters: {'classifier': 'LinearSVC', 'svc_c'

Accuracy: 48.896
Parameter: {'classifier': 'LinearSVC', 'svc_c': 3.7400130924831853e-07}


[I 2020-07-17 18:25:17,410] Finished trial#0 with value: 32.083 with parameters: {'classifier': 'LinearSVC', 'svc_c': 0.5037481401217664}. Best is trial#0 with value: 32.083.
[I 2020-07-17 18:25:18,083] Finished trial#1 with value: 36.104 with parameters: {'classifier': 'LinearSVC', 'svc_c': 0.00011267793620395065}. Best is trial#1 with value: 36.104.
[I 2020-07-17 18:25:18,287] Finished trial#2 with value: 36.607 with parameters: {'classifier': 'LinearSVC', 'svc_c': 3.799687287102443e-06}. Best is trial#2 with value: 36.607.
[I 2020-07-17 18:25:30,383] Finished trial#3 with value: 32.902 with parameters: {'classifier': 'LinearSVC', 'svc_c': 0.09353739936445049}. Best is trial#2 with value: 36.607.
[I 2020-07-17 18:25:32,118] Finished trial#4 with value: 36.424 with parameters: {'classifier': 'LinearSVC', 'svc_c': 0.00035618348583816347}. Best is trial#2 with value: 36.607.
[I 2020-07-17 18:25:32,553] Finished trial#5 with value: 35.905 with parameters: {'classifier': 'LinearSVC', 'svc

Accuracy: 38.117
Parameter: {'classifier': 'LinearSVC', 'svc_c': 2.2314910551084317e-05}
