In [2]:
import os
import sys
import pickle

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV

from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.model_selection import cross_validate
from sklearn.utils import shuffle
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.model_selection import StratifiedKFold, KFold

import boto3

In [3]:
module_path = os.path.abspath(os.path.join('../../../../'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [53]:
from global_config import ROOT_DIR, emotion_id_to_emotion_abr, conf_cmap, AU_INTENSITY_COLS, GAZE_COLS, POSE_COLS
from src.analysis.supervised_learning.evaluation.confusion_matrix import ConfusionMatrixCreator


from src.analysis.data_exploration import plot_time_series_means_subplots

from src.preprocessing.dataset_creation.scaling import Scaler
from src.preprocessing.dataset_creation.helpers import slice_by, get_cols, get_fixed_col
from src.preprocessing.dataset_creation.aggregation import get_aggregate_measures
from src.preprocessing.dataset_creation.interpolation import Interpolator

Using kernel: ml.c5.18xlarge (72 vCPU + 144 GiB)

In [107]:
def param_search(x_, y_):
    # regularization paramater, lower C -> more regularization (less overfitting), large C -> less regularization (and possibly more overfitting)
    c_values = [0.1, 1, 5, 10, 25, 50, 75, 100]
    # 
    gamma = [1, 0.1, 0.01, 0.001, 0.0001]
        
    kernel = ['rbf', 'linear', 'poly', 'sigmoid']
    
    parameters = {'class_weight': ['balanced'],
                  'C': c_values,
                  'gamma': gamma,
                  'kernel': kernel,
                  }
    
    
    skf = KFold(n_splits=5, shuffle=True)
    splits = skf.split(x_, y_)
    
    svc = SVC()
    clf = GridSearchCV(estimator=svc,
                       param_grid=parameters,
                       scoring='accuracy',
                       verbose=5,
                       cv=splits,
                       n_jobs=-1,
                       )
    
    clf.fit(x_, y_)
    
    print(clf.best_params_)
    return clf

In [17]:
def evaluate_scores(x_, y_, svc, scoring_method):
    
    skf = KFold(n_splits=5, shuffle=True)
    
    # get scores
    scores = cross_validate(X=x_, y=y_,
                            estimator           = svc,
                            scoring             = [scoring_method],
                            verbose             = 1,
                            cv                  = skf.split(x_, y_),
                            n_jobs              = -1,
                            return_train_score  = True                        
                           )
    
    print('printing {} measures'.format(scoring_method))
    print('avg (train):', np.mean(scores['train_{}'.format(scoring_method)]))
    print('std (train):', np.std(scores['train_{}'.format(scoring_method)]))
    print('avg (validation):', np.mean(scores['test_{}'.format(scoring_method)]))
    print('std (validation):', np.std(scores['test_{}'.format(scoring_method)]))

In [7]:
from s3fs.core import S3FileSystem
s3 = S3FileSystem()
from sagemaker import get_execution_role

# this datafile has standardscaled data on a per actor basis
role = get_execution_role()
bucket='files-and-examples-01'
file = 'datasets/su_dataset/query_13_videos.csv'

path = 's3://{}/{}'.format(bucket, file)

df = pd.read_csv(path)

In [8]:
slices = slice_by(df, "filename")

In [9]:
len(slices)

4544

In [10]:
interpolator = Interpolator([*AU_INTENSITY_COLS, *GAZE_COLS, *POSE_COLS])
slices = interpolator.remove_interpolate(slices)

In [11]:
len(slices)

4540

In [12]:
au = get_cols(slices, AU_INTENSITY_COLS)
y = get_fixed_col(slices, "emotion_1_id")

# Action units

## Without prior normalization

In [13]:
au_agg = get_aggregate_measures(au,
                               means=True,
                               variance=False,
                               deltas=False,
                               peaks=False)
scaler = MinMaxScaler()
au_agg = scaler.fit_transform(au_agg)

In [14]:
au_agg.shape

(4540, 85)

In [15]:
clf = param_search(au_agg, y)

Fitting 5 folds for each of 160 candidates, totalling 800 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 72 concurrent workers.
[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:    4.5s
[Parallel(n_jobs=-1)]: Done 144 tasks      | elapsed:   10.6s
[Parallel(n_jobs=-1)]: Done 306 tasks      | elapsed:   19.6s
[Parallel(n_jobs=-1)]: Done 504 tasks      | elapsed:   30.8s
[Parallel(n_jobs=-1)]: Done 800 out of 800 | elapsed:   56.1s finished


{'C': 10, 'class_weight': 'balanced', 'gamma': 1, 'kernel': 'rbf'}


In [19]:
svc = SVC(**clf.best_params_)
evaluate_scores(au_agg, y, svc, scoring_method="accuracy")

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 72 concurrent workers.


printing accuracy measures
avg (train): 0.9241189427312776
std (train): 0.004738883254174033
avg (validation): 0.17709251101321585
std (validation): 0.012982521610829406


[Parallel(n_jobs=-1)]: Done   5 out of   5 | elapsed:    5.8s finished


## Normalized by actor

### Standard

In [21]:
video_ids = get_fixed_col(slices, "video_id")

In [22]:
scaler = Scaler(au, "standard")
scaler.scale_by_video_id(video_ids)
au = scaler.slices

In [23]:
au_agg = get_aggregate_measures(au,
                               means=True,
                               variance=False,
                               deltas=False,
                               peaks=False)
scaler = MinMaxScaler()
au_agg = scaler.fit_transform(au_agg)

In [24]:
au_agg.shape

(4540, 85)

In [25]:
clf = param_search(au_agg, y)

Fitting 5 folds for each of 160 candidates, totalling 800 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 72 concurrent workers.
[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:    3.4s
[Parallel(n_jobs=-1)]: Done 144 tasks      | elapsed:    9.7s
[Parallel(n_jobs=-1)]: Done 306 tasks      | elapsed:   18.8s
[Parallel(n_jobs=-1)]: Done 504 tasks      | elapsed:   30.2s
[Parallel(n_jobs=-1)]: Done 800 out of 800 | elapsed:   53.7s finished


{'C': 5, 'class_weight': 'balanced', 'gamma': 1, 'kernel': 'rbf'}


In [28]:
svc = SVC(**clf.best_params_)
evaluate_scores(au_agg, y, svc, scoring_method="accuracy")

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 72 concurrent workers.


printing accuracy measures
avg (train): 0.9373898678414097
std (train): 0.0021083909382243063
avg (validation): 0.22114537444933918
std (validation): 0.01093363316827388


[Parallel(n_jobs=-1)]: Done   5 out of   5 | elapsed:    5.9s finished


### Min Max

In [29]:
au = get_cols(slices, AU_INTENSITY_COLS)

In [30]:
scaler = Scaler(au, "min_max")
scaler.scale_by_video_id(video_ids)
au = scaler.slices

In [31]:
au_agg = get_aggregate_measures(au,
                               means=True,
                               variance=False,
                               deltas=False,
                               peaks=False)
scaler = MinMaxScaler()
au_agg = scaler.fit_transform(au_agg)

In [32]:
au_agg.shape

(4540, 85)

In [33]:
clf = param_search(au_agg, y)

Fitting 5 folds for each of 160 candidates, totalling 800 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 72 concurrent workers.
[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:    3.3s
[Parallel(n_jobs=-1)]: Done 144 tasks      | elapsed:    9.3s
[Parallel(n_jobs=-1)]: Done 306 tasks      | elapsed:   18.6s
[Parallel(n_jobs=-1)]: Done 504 tasks      | elapsed:   29.9s
[Parallel(n_jobs=-1)]: Done 800 out of 800 | elapsed:   57.7s finished


{'C': 10, 'class_weight': 'balanced', 'gamma': 1, 'kernel': 'rbf'}


In [36]:
svc = SVC(**clf.best_params_)
evaluate_scores(au_agg, y, svc, scoring_method="accuracy")

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 72 concurrent workers.


printing accuracy measures
avg (train): 0.9629955947136564
std (train): 0.0020939596191444317
avg (validation): 0.1841409691629956
std (validation): 0.013850405217852408


[Parallel(n_jobs=-1)]: Done   5 out of   5 | elapsed:    5.9s finished


## Normalized by intensity

In [38]:
au = get_cols(slices, AU_INTENSITY_COLS)
intensity_levels = get_fixed_col(slices, "intensity_level")

In [39]:
scaler = Scaler(au, "standard")
scaler.scale_by_intensity(intensity_levels)
au = scaler.slices

In [40]:
au_agg = get_aggregate_measures(au,
                               means=True,
                               variance=False,
                               deltas=False,
                               peaks=False)
scaler = MinMaxScaler()
au_agg = scaler.fit_transform(au_agg)

In [41]:
au_agg.shape

(4540, 85)

In [42]:
clf = param_search(au_agg, y)

Fitting 5 folds for each of 160 candidates, totalling 800 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 72 concurrent workers.
[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:    3.6s
[Parallel(n_jobs=-1)]: Done 144 tasks      | elapsed:    9.6s
[Parallel(n_jobs=-1)]: Done 306 tasks      | elapsed:   19.2s
[Parallel(n_jobs=-1)]: Done 504 tasks      | elapsed:   31.1s
[Parallel(n_jobs=-1)]: Done 800 out of 800 | elapsed:  1.0min finished


{'C': 10, 'class_weight': 'balanced', 'gamma': 1, 'kernel': 'linear'}


In [44]:
svc = SVC(**clf.best_params_)
evaluate_scores(au_agg, y, svc, scoring_method="accuracy")

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 72 concurrent workers.


printing accuracy measures
avg (train): 0.3080947136563877
std (train): 0.0036485173509862123
avg (validation): 0.11475770925110133
std (validation): 0.002905485893892713


[Parallel(n_jobs=-1)]: Done   5 out of   5 | elapsed:    4.6s finished


## Normalized by video id and intensity

In [54]:
au = get_cols(slices, AU_INTENSITY_COLS)

scaler = Scaler(au, "standard")
scaler.scale_by_video_id_and_intensity(video_ids, intensity_levels)
au = scaler.slices

In [46]:
au_agg = get_aggregate_measures(au,
                               means=True,
                               variance=False,
                               deltas=False,
                               peaks=False)
scaler = MinMaxScaler()
au_agg = scaler.fit_transform(au_agg)

In [47]:
au_agg.shape

(4540, 85)

In [51]:
clf = param_search(au_agg, y)

Fitting 5 folds for each of 160 candidates, totalling 800 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 72 concurrent workers.
[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:    3.8s
[Parallel(n_jobs=-1)]: Done 144 tasks      | elapsed:    9.9s
[Parallel(n_jobs=-1)]: Done 306 tasks      | elapsed:   19.5s
[Parallel(n_jobs=-1)]: Done 504 tasks      | elapsed:   31.4s
[Parallel(n_jobs=-1)]: Done 800 out of 800 | elapsed:   59.3s finished


{'C': 10, 'class_weight': 'balanced', 'gamma': 1, 'kernel': 'linear'}


In [52]:
svc = SVC(**clf.best_params_)
evaluate_scores(au_agg, y, svc, scoring_method="accuracy")

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 72 concurrent workers.


printing accuracy measures
avg (train): 0.28436123348017617
std (train): 0.0005879448376669203
avg (validation): 0.10991189427312775
std (validation): 0.003646023206442037


[Parallel(n_jobs=-1)]: Done   5 out of   5 | elapsed:    4.5s finished


## Conclusion

Using standardscaler in the first step and scaling by actor gives the best results. Use this method

# Gaze

In [56]:
gaze = get_cols(slices, GAZE_COLS)

In [57]:
scaler = Scaler(gaze, "standard")
scaler.scale_by_video_id(video_ids)
gaze = scaler.slices

In [58]:
gaze_agg = get_aggregate_measures(gaze,
                               means=True,
                               variance=False,
                               deltas=False,
                               peaks=False)
scaler = MinMaxScaler()
gaze_agg = scaler.fit_transform(gaze_agg)

In [59]:
gaze_agg.shape

(4540, 40)

In [61]:
clf = param_search(gaze_agg, y)

Fitting 5 folds for each of 160 candidates, totalling 800 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 72 concurrent workers.
[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:    4.0s
[Parallel(n_jobs=-1)]: Done 144 tasks      | elapsed:    8.5s
[Parallel(n_jobs=-1)]: Done 306 tasks      | elapsed:   14.5s
[Parallel(n_jobs=-1)]: Done 504 tasks      | elapsed:   23.8s
[Parallel(n_jobs=-1)]: Done 800 out of 800 | elapsed:  3.0min finished


{'C': 100, 'class_weight': 'balanced', 'gamma': 1, 'kernel': 'rbf'}


In [62]:
svc = SVC(**clf.best_params_)
evaluate_scores(gaze_agg, y, svc, scoring_method="accuracy")

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 72 concurrent workers.


printing accuracy measures
avg (train): 0.5267621145374449
std (train): 0.005158019395306828
avg (validation): 0.12136563876651982
std (validation): 0.006731588894244989


[Parallel(n_jobs=-1)]: Done   5 out of   5 | elapsed:    4.1s finished


# Pose 

In [63]:
pose = get_cols(slices, POSE_COLS)

scaler = Scaler(pose, "standard")
scaler.scale_by_video_id(video_ids)
pose = scaler.slices

In [64]:
pose_agg = get_aggregate_measures(pose,
                               means=True,
                               variance=False,
                               deltas=False,
                               peaks=False)
scaler = MinMaxScaler()
pose_agg = scaler.fit_transform(pose_agg)

In [65]:
pose_agg.shape

(4540, 30)

In [93]:
pose_agg.dtype

dtype('float64')

In [67]:
clf = param_search(pose_agg, y)

Fitting 5 folds for each of 160 candidates, totalling 800 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 72 concurrent workers.
[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:    3.6s
[Parallel(n_jobs=-1)]: Done 144 tasks      | elapsed:    7.3s
[Parallel(n_jobs=-1)]: Done 306 tasks      | elapsed:   12.2s
[Parallel(n_jobs=-1)]: Done 504 tasks      | elapsed:   19.3s
[Parallel(n_jobs=-1)]: Done 800 out of 800 | elapsed:  1.2min finished


{'C': 75, 'class_weight': 'balanced', 'gamma': 1, 'kernel': 'rbf'}


In [68]:
svc = SVC(**clf.best_params_)
evaluate_scores(pose_agg, y, svc, scoring_method="accuracy")

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 72 concurrent workers.


printing accuracy measures
avg (train): 0.6438876651982379
std (train): 0.003558478947445703
avg (validation): 0.18656387665198237
std (validation): 0.01126582542074399


[Parallel(n_jobs=-1)]: Done   5 out of   5 | elapsed:    3.2s finished


# Early fusion

## Per actor normalization

In [79]:
au_gaze_pose = get_cols(slices, [*AU_INTENSITY_COLS, *GAZE_COLS, *POSE_COLS])

In [82]:
len(au_gaze_pose)

4540

In [83]:
scaler = Scaler(au_gaze_pose, "standard")
scaler.scale_by_video_id(video_ids)
au_gaze_pose = scaler.slices

In [101]:
au_gaze_pose_agg = get_aggregate_measures(au_gaze_pose,
                               means=True,
                               variance=False,
                               deltas=False,
                               peaks=False)
scaler = MinMaxScaler()
au_gaze_pose_agg = scaler.fit_transform(au_gaze_pose_agg)

In [102]:
au_gaze_pose_agg.shape

(4540, 155)

In [103]:
y.dtype

dtype('int64')

In [108]:
clf = param_search(au_gaze_pose_agg, y)

Fitting 5 folds for each of 160 candidates, totalling 800 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 72 concurrent workers.
[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:    5.7s
[Parallel(n_jobs=-1)]: Done 144 tasks      | elapsed:   15.5s
[Parallel(n_jobs=-1)]: Done 306 tasks      | elapsed:   29.2s
[Parallel(n_jobs=-1)]: Done 504 tasks      | elapsed:   46.1s
[Parallel(n_jobs=-1)]: Done 800 out of 800 | elapsed:  1.2min finished


{'C': 10, 'class_weight': 'balanced', 'gamma': 1, 'kernel': 'rbf'}


In [89]:
y.shape

(4540,)

In [110]:
svc = SVC(**clf.best_params_)
evaluate_scores(au_gaze_pose_agg, y, svc, scoring_method="accuracy")

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 72 concurrent workers.


printing accuracy measures
avg (train): 1.0
std (train): 0.0
avg (validation): 0.38920704845814974
std (validation): 0.005422261508850735


[Parallel(n_jobs=-1)]: Done   5 out of   5 | elapsed:    9.6s finished


## Without normalization

In [111]:
au_gaze_pose = get_cols(slices, [*AU_INTENSITY_COLS, *GAZE_COLS, *POSE_COLS])
au_gaze_pose_agg = get_aggregate_measures(au_gaze_pose,
                               means=True,
                               variance=False,
                               deltas=False,
                               peaks=False)
scaler = MinMaxScaler()
au_gaze_pose_agg = scaler.fit_transform(au_gaze_pose_agg)

In [112]:
clf = param_search(au_gaze_pose_agg, y)

Fitting 5 folds for each of 160 candidates, totalling 800 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 72 concurrent workers.
[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:    8.4s
[Parallel(n_jobs=-1)]: Done 144 tasks      | elapsed:   18.1s
[Parallel(n_jobs=-1)]: Done 306 tasks      | elapsed:   32.4s
[Parallel(n_jobs=-1)]: Done 504 tasks      | elapsed:   49.8s
[Parallel(n_jobs=-1)]: Done 800 out of 800 | elapsed:  1.3min finished


{'C': 10, 'class_weight': 'balanced', 'gamma': 1, 'kernel': 'rbf'}


In [114]:
svc = SVC(**clf.best_params_)
evaluate_scores(au_gaze_pose_agg, y, svc, scoring_method="accuracy")

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 72 concurrent workers.


printing accuracy measures
avg (train): 0.9981277533039649
std (train): 0.0005615660257260841
avg (validation): 0.34162995594713663
std (validation): 0.020935541105206082


[Parallel(n_jobs=-1)]: Done   5 out of   5 | elapsed:    9.5s finished


# Yesterdays runs 

In [48]:
def evaluate_scores(X, y, svc, scoring_method):
    
    skf = KFold(n_splits=100, shuffle=True)
    
    # get scores
    scores = cross_validate(X=X, y=y,
                            estimator           = svc,
                            scoring             = [scoring_method],
                            verbose             = 1,
                            cv                  = skf.split(X, y),
                            n_jobs              = -1,
                            return_train_score  = True                        
                           )
    
    print('printing {} measures'.format(scoring_method))
    print('avg (train):', np.mean(scores['train_{}'.format(scoring_method)]))
    print('std (train):', np.std(scores['train_{}'.format(scoring_method)]))
    print('avg (validation):', np.mean(scores['test_{}'.format(scoring_method)]))
    print('std (validation):', np.std(scores['test_{}'.format(scoring_method)]))

In [None]:
from s3fs.core import S3FileSystem
s3 = S3FileSystem()
from sagemaker import get_execution_role

# this datafile has standardscaled data on a per actor basis
role = get_execution_role()
bucket='files-and-examples-01'
file = 'datasets/su_dataset/all_data.pickle'

path = s3.open('s3://{}/{}'.format(bucket, file))

SyntaxError: invalid syntax (<ipython-input-5-2bb7f4f2f84d>, line 10)

In [31]:
file = pickle.load(path)

x = file['x']
y = file['y']

au = x['au']
gaze = x['gaze']
pose = x['pose']

In [32]:
len(au)

10963

In [33]:
au[0].shape

(38, 17)

In [34]:
au[1].shape

(69, 17)

In [35]:
au_agg = get_aggregate_measures(au,
                               means=True,
                               variance=False,
                               deltas=False,
                               peaks=False)

In [36]:
scaler = MinMaxScaler()
au_agg = scaler.fit_transform(au_agg)

In [37]:
skf = KFold(n_splits=5, shuffle=True)
splits = skf.split(au_agg, y)

In [38]:
# regularization paramater, lower C -> more regularization (less overfitting), large C -> less regularization (and possibly more overfitting)
c_values = [0.1, 1, 5, 10, 25, 50, 75, 100]

# 
gamma = [1, 0.1, 0.01, 0.001, 0.0001]
kernel = ['rbf', 'linear', 'poly', 'sigmoid']

parameters = {'class_weight': ['balanced'],
              'C': c_values,
              'gamma': gamma,
              'kernel': kernel,
              }

svc = SVC()
clf = GridSearchCV(estimator=svc,
                   param_grid=parameters,
                   scoring='accuracy',
                   verbose=5,
                   cv=splits,
                   n_jobs=-1,
                   )

In [39]:
clf.fit(au_agg, y)

Fitting 5 folds for each of 160 candidates, totalling 800 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 72 concurrent workers.
[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:   19.5s
[Parallel(n_jobs=-1)]: Done 144 tasks      | elapsed:   55.4s
[Parallel(n_jobs=-1)]: Done 306 tasks      | elapsed:  1.8min
[Parallel(n_jobs=-1)]: Done 504 tasks      | elapsed:  2.8min
[Parallel(n_jobs=-1)]: Done 800 out of 800 | elapsed:  5.1min finished


GridSearchCV(cv=<generator object _BaseKFold.split at 0x7fe6a49447d0>,
             error_score=nan,
             estimator=SVC(C=1.0, break_ties=False, cache_size=200,
                           class_weight=None, coef0=0.0,
                           decision_function_shape='ovr', degree=3,
                           gamma='scale', kernel='rbf', max_iter=-1,
                           probability=False, random_state=None, shrinking=True,
                           tol=0.001, verbose=False),
             iid='deprecated', n_jobs=-1,
             param_grid={'C': [0.1, 1, 5, 10, 25, 50, 75, 100],
                         'class_weight': ['balanced'],
                         'gamma': [1, 0.1, 0.01, 0.001, 0.0001],
                         'kernel': ['rbf', 'linear', 'poly', 'sigmoid']},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring='accuracy', verbose=5)

In [40]:
clf.best_params_

{'C': 10, 'class_weight': 'balanced', 'gamma': 1, 'kernel': 'rbf'}

In [49]:
svc = SVC(**clf.best_params_)
evaluate_scores(au_agg, y, svc, scoring_method="accuracy")

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 72 concurrent workers.
[Parallel(n_jobs=-1)]: Done  58 out of 100 | elapsed:  1.4min remaining:   59.9s


printing accuracy measures
avg (train): 0.9549715918192548
std (train): 0.0005959840092633306
avg (validation): 0.23423603002502083
std (validation): 0.04235634749554646


[Parallel(n_jobs=-1)]: Done 100 out of 100 | elapsed:  2.1min finished


# Grid Search without shuffle/min max normalization

In [16]:
clf.best_params_

{'C': 1, 'class_weight': 'balanced', 'gamma': 0.01, 'kernel': 'rbf'}

In [17]:
svc = SVC(**clf.best_params_)
evaluate_scores(au_agg, y, svc, scoring_method="accuracy")

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 48 concurrent workers.


printing accuracy measures
avg (train): 0.19410736662373554
std (train): 0.004637956432709442
avg (validation): 0.12341420611700801
std (validation): 0.0124849801140342


[Parallel(n_jobs=-1)]: Done   5 out of   5 | elapsed:   26.1s finished


In [21]:
#**{'C': 10, 'class_weight': 'balanced', 'gamma': 1, 'kernel': 'rbf'}

svc = SVC(**{'C': 2, 'class_weight': 'balanced', 'gamma': 0.01, 'kernel': 'rbf'})
evaluate_scores(au_agg, y, svc, scoring_method="accuracy")

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 48 concurrent workers.


printing accuracy measures
avg (train): 0.23225845460713476
std (train): 0.004177855646772091
avg (validation): 0.12095182581605039
std (validation): 0.009047825781993136


[Parallel(n_jobs=-1)]: Done   5 out of   5 | elapsed:   25.1s finished


In [23]:
#scaler = MinMaxScaler()
#au_agg = scaler.fit_transform(au_agg)
au_agg, y = shuffle(au_agg, y)

In [24]:
svc = SVC(**clf.best_params_)
evaluate_scores(au_agg, y, svc, scoring_method="accuracy")

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 48 concurrent workers.


printing accuracy measures
avg (train): 0.1940390997751349
std (train): 0.002745758375877358
avg (validation): 0.13746380320928236
std (validation): 0.011494319533980748


[Parallel(n_jobs=-1)]: Done   5 out of   5 | elapsed:   25.9s finished
