In [109]:
import os
import sys
import boto3
import pickle

import numpy as np
from sklearn.svm import SVC

from sagemaker.sklearn.estimator import SKLearn
from sklearn.model_selection import cross_validate


In [None]:
os.chdir('/root/emotional-recognition/notebooks/functionals/supervised_learning/')

In [13]:
pwd

'/root/emotional-recognition/notebooks/functionals/supervised_learning'

In [10]:
module_path = os.path.abspath(os.path.join('../../../'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [11]:
from label_transforms import *


In [84]:
from s3fs.core import S3FileSystem
s3 = S3FileSystem()
from sagemaker import get_execution_role

role = get_execution_role()
bucket='files-and-examples-01'
train_file = 'datasets/su_dataset/video_data_functionals_A74.npz'

train_path = s3.open('s3://{}/{}'.format(bucket, train_file))

f = np.load(train_path)

x = f['x']
y = f['y']

In [85]:
train_uri = f"s3://{bucket}/{train_file}"

In [86]:
train_uri

's3://files-and-examples-01/datasets/su_dataset/video_data_functionals_A74.npz'

In [34]:
x.shape

(341, 85)

In [35]:
y.shape

(341,)

In [95]:
train_instance_type = "ml.m5.large"

inputs = {
    "train": train_uri
}

In [96]:
inputs

{'train': 's3://files-and-examples-01/datasets/su_dataset/video_data_functionals_A74.npz'}

In [97]:
estimator_parameters = {
    "entry_point": "svm_param_search.py",
    "source_dir": "scripts",
    "framework_version": "0.23-1",
    "py_version": "py3",
    "instance_type": train_instance_type,
    "instance_count": 1,
    "role": role,
    "base_job_name": "svm-param-search",
}

estimator = SKLearn(**estimator_parameters)

In [126]:
estimator.fit(inputs, logs="None")


2022-11-02 16:51:27 Starting - Starting the training job..
2022-11-02 16:51:42 Starting - Preparing the instances for training...............
2022-11-02 16:53:00 Downloading - Downloading input data.......
2022-11-02 16:53:41 Training - Downloading the training image........
2022-11-02 16:54:26 Training - Training image download completed. Training in progress.....
2022-11-02 16:54:52 Uploading - Uploading generated training model..
2022-11-02 16:55:08 Completed - Training job completed


In [99]:
!aws s3 cp {estimator.model_data} ./model/model.tar.gz
!tar -xvzf ./model/model.tar.gz -C ./model

download: s3://sagemaker-eu-west-1-061749825266/svm-param-search-2022-11-02-15-51-51-300/output/model.tar.gz to model/model.tar.gz


In [103]:
loaded_model = pickle.load(open("model/model.pickle", 'rb'))



In [104]:
def evaluate_scores(X, y, clf, scoring_method):
    # get scores
    scores = cross_validate(X=X, y=y,
                            estimator           = clf,
                            scoring             = [scoring_method],
                            verbose             = 1,
                            n_jobs              = -1,
                            return_train_score  = True                        
                           )
    
    print('printing {} measures'.format(scoring_method))
    print('avg (train):', np.mean(scores['train_{}'.format(scoring_method)]))
    print('std (train):', np.std(scores['train_{}'.format(scoring_method)]))
    print('avg (validation):', np.mean(scores['test_{}'.format(scoring_method)]))
    print('std (validation):', np.std(scores['test_{}'.format(scoring_method)]))

In [107]:
svc = loaded_model.best_estimator_

In [110]:
evaluate_scores(x, y, svc, "accuracy")

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.


printing accuracy measures
avg (train): 0.5102644904115492
std (train): 0.030418571777857506
avg (validation): 0.1758312020460358
std (validation): 0.0666594983968966


[Parallel(n_jobs=-1)]: Done   5 out of   5 | elapsed:    1.5s finished


In [111]:
svc = SVC(**loaded_model.best_params_)

In [112]:
evaluate_scores(x, y, svc, "accuracy")

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.


printing accuracy measures
avg (train): 0.5102644904115492
std (train): 0.030418571777857506
avg (validation): 0.1758312020460358
std (validation): 0.0666594983968966


[Parallel(n_jobs=-1)]: Done   5 out of   5 | elapsed:    0.5s finished


# Testing on A220 dataset

In [113]:
from s3fs.core import S3FileSystem
s3 = S3FileSystem()
from sagemaker import get_execution_role

role = get_execution_role()
bucket='files-and-examples-01'
train_file = 'datasets/su_dataset/video_data_functionals_A220.npz'

train_path = s3.open('s3://{}/{}'.format(bucket, train_file))

f = np.load(train_path)

x = f['x']
y = f['y']

In [116]:
train_uri = f"s3://{bucket}/{train_file}"

In [117]:
train_uri

's3://files-and-examples-01/datasets/su_dataset/video_data_functionals_A220.npz'

In [118]:
train_instance_type = "ml.m5.large"

inputs = {
    "train": train_uri
}

In [119]:
estimator_parameters = {
    "entry_point": "svm_param_search.py",
    "source_dir": "scripts",
    "framework_version": "0.23-1",
    "py_version": "py3",
    "instance_type": train_instance_type,
    "instance_count": 1,
    "role": role,
    "base_job_name": "svm-param-search",
}

estimator = SKLearn(**estimator_parameters)

In [125]:
estimator.fit(inputs, logs="None")


2022-11-02 16:31:24 Starting - Starting the training job..........
2022-11-02 16:32:18 Starting - Preparing the instances for training..............
2022-11-02 16:33:33 Downloading - Downloading input data.......
2022-11-02 16:34:13 Training - Downloading the training image.........
2022-11-02 16:35:03 Training - Training image download completed. Training in progress......
2022-11-02 16:35:34 Uploading - Uploading generated training model..
2022-11-02 16:35:50 Completed - Training job completed


In [121]:
!aws s3 cp {estimator.model_data} ./model/model.tar.gz
!tar -xvzf ./model/model.tar.gz -C ./model

download: s3://sagemaker-eu-west-1-061749825266/svm-param-search-2022-11-02-16-16-09-460/output/model.tar.gz to model/model.tar.gz
model.pickle


In [122]:
loaded_model = pickle.load(open("model/model.pickle", 'rb'))



In [123]:
svc = SVC(**loaded_model.best_params_)
evaluate_scores(x, y, svc, "accuracy")

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.


printing accuracy measures
avg (train): 0.7187324903460286
std (train): 0.024943040369078975
avg (validation): 0.40321931589537224
std (validation): 0.08675192111740986


[Parallel(n_jobs=-1)]: Done   5 out of   5 | elapsed:    1.2s finished
