# Refinement MLP model

In [2]:
import pandas as pd
import numpy as np
import os
from sklearn.metrics import accuracy_score, f1_score
from utils import print_f1_scores

import sagemaker
from sagemaker import get_execution_role
from sagemaker.sklearn.estimator import SKLearn
import warnings
warnings.filterwarnings('ignore')

In [3]:
prefix = 'wildfire'
data_dir = 'wildfire_data'
sagemaker_session = sagemaker.Session()
bucket = sagemaker_session.default_bucket()
output_path = 's3://{}/{}'.format(bucket, prefix)
role = get_execution_role()

In [4]:
test_ref_location = sagemaker_session.upload_data(os.path.join(data_dir, 'test_ref.csv'), key_prefix=prefix)
val_ref_location = sagemaker_session.upload_data(os.path.join(data_dir, 'validation_ref.csv'), key_prefix=prefix)
train_ref_location = sagemaker_session.upload_data(os.path.join(data_dir, 'train_ref.csv'), key_prefix=prefix)

In [5]:
sklearn_mlp = SKLearn(entry_point='train_mlp.py', 
                 source_dir='source_sklearn',
                 role=role,
                 train_instance_count=1,
                 train_instance_type='ml.c4.xlarge',
                 sagemaker_session=sagemaker_session,
                 py_version='py3',
                 framework_version='0.23-1')

train_instance_type has been renamed in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.
train_instance_count has been renamed in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.
train_instance_count has been renamed in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.
train_instance_type has been renamed in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.


In [None]:
%%time
output_path='s3://{}/{}/'.format(bucket, prefix)

# Train your estimator on S3 training data
sklearn_mlp.fit({'train': output_path})

2021-02-10 21:32:57 Starting - Starting the training job...
2021-02-10 21:33:21 Starting - Launching requested ML instancesProfilerReport-1612992776: InProgress
......
2021-02-10 21:34:22 Starting - Preparing the instances for training......
2021-02-10 21:35:23 Downloading - Downloading input data...
2021-02-10 21:35:54 Training - Training image download completed. Training in progress..[34m2021-02-10 21:35:55,121 sagemaker-containers INFO     Imported framework sagemaker_sklearn_container.training[0m
[34m2021-02-10 21:35:55,124 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2021-02-10 21:35:55,134 sagemaker_sklearn_container.training INFO     Invoking user training script.[0m
[34m2021-02-10 21:35:55,493 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2021-02-10 21:35:55,507 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2021-02-10 21:35:55,519 sage

In [None]:
mlp_predictor = sklearn_mlp.deploy(initial_instance_count=1, instance_type='ml.t2.medium')

---------

In [8]:
val = pd.read_csv(os.path.join(data_dir, 'validation_ref.csv'), header=None)
test_x = pd.read_csv(os.path.join(data_dir, 'test_ref.csv'), header=None)
test_y = pd.read_csv(os.path.join(data_dir, 'test_ref_y.csv'), header=None)
val_x = val.iloc[:,1:]
val_y = val.iloc[:,0]

In [9]:
prediction_batches_test = [mlp_predictor.predict(batch) for batch in np.array_split(test_x, 100)]
prediction_batches_val = [mlp_predictor.predict(batch) for batch in np.array_split(val_x, 100)]
test_y_preds = np.concatenate([batch for batch in prediction_batches_test])
val_y_preds = np.concatenate([batch for batch in prediction_batches_val])

In [10]:
accuracy_mlp_test = accuracy_score(test_y, test_y_preds)
accuracy_mlp_val = accuracy_score(val_y, val_y_preds)
f1_score_mlp_test = f1_score(test_y, test_y_preds, average=None)
f1_score_mlp_val = f1_score(val_y, val_y_preds, average=None)

In [11]:
cause_for_code_refinement = {0: 'Lightning', 1: 'Debris Burning', 2: 'Arson', 3: 'Miscellaneous', 4:'Missing/Undefined', 5: 'Other'}
df_f1_score_mlp_test = print_f1_scores(f1_score_mlp_test, cause_for_code_refinement)
df_f1_score_mlp_val = print_f1_scores(f1_score_mlp_val, cause_for_code_refinement)

In [12]:
print(f"The accuracy for val set is: {accuracy_mlp_val}")
print(f"The accuracy for test set is: {accuracy_mlp_test}")

The accuracy for val set is: 0.41180842459614836
The accuracy for test set is: 0.3902012741726335


In [13]:
df_f1_score_mlp_test

Unnamed: 0,Causes Description,F1 scores
0,Lightning,0.612258
1,Debris Burning,0.495596
2,Arson,0.298165
3,Miscellaneous,0.376625
4,Missing/Undefined,0.098685
5,Other,0.257304


In [14]:
df_f1_score_mlp_val

Unnamed: 0,Causes Description,F1 scores
0,Lightning,0.615521
1,Debris Burning,0.506771
2,Arson,0.321058
3,Miscellaneous,0.353165
4,Missing/Undefined,0.144904
5,Other,0.290898


In [15]:
mlp_predictor.delete_endpoint()