# MLP Model

In [1]:
import pandas as pd
import numpy as np
import sagemaker
import os
from utils import standardize_data
from sagemaker import LinearLearner
from sagemaker import get_execution_role

# specify an output path
prefix = 'wildfire'
data_dir = 'wildfire_data'
sagemaker_session = sagemaker.Session()
bucket = sagemaker_session.default_bucket()
output_path = 's3://{}/{}'.format(bucket, prefix)
role = get_execution_role()

In [2]:
from sagemaker.sklearn.estimator import SKLearn

sklearn_mlp = SKLearn(entry_point='train_mlp.py', 
                 source_dir='source_sklearn',
                 role=role,
                 train_instance_count=1,
                 train_instance_type='ml.c4.xlarge',
                 sagemaker_session=sagemaker_session,
                 py_version='py3',
                 framework_version='0.23-1')

train_instance_type has been renamed in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.
train_instance_count has been renamed in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.
train_instance_count has been renamed in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.
train_instance_type has been renamed in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.


In [3]:
%%time
output_path='s3://{}/{}/'.format(bucket, prefix)

# Train your estimator on S3 training data
sklearn_mlp.fit({'train': output_path})

2021-02-05 06:44:57 Starting - Starting the training job...
2021-02-05 06:45:00 Starting - Launching requested ML instancesProfilerReport-1612507497: InProgress
......
2021-02-05 06:46:08 Starting - Preparing the instances for training......
2021-02-05 06:47:17 Downloading - Downloading input data...
2021-02-05 06:47:57 Training - Training image download completed. Training in progress..[34m2021-02-05 06:47:58,351 sagemaker-containers INFO     Imported framework sagemaker_sklearn_container.training[0m
[34m2021-02-05 06:47:58,353 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2021-02-05 06:47:58,364 sagemaker_sklearn_container.training INFO     Invoking user training script.[0m
[34m2021-02-05 06:47:58,690 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2021-02-05 06:48:01,716 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2021-02-05 06:48:01,729 sage

In [4]:
mlp_predictor = sklearn_mlp.deploy(initial_instance_count=1, instance_type='ml.t2.medium')

-----------------!

In [5]:
test_x = pd.read_csv(os.path.join(data_dir, 'test.csv'), header=None)
test_y = pd.read_csv(os.path.join(data_dir, 'test_y.csv'), header=None)

In [8]:
prediction_batches = [mlp_predictor.predict(batch) for batch in np.array_split(test_x, 100)]

In [9]:
prediction_batches[0]

array([0, 0, 0, ..., 6, 4, 4])

In [10]:
test_y_preds = np.concatenate([batch for batch in prediction_batches])

In [15]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from utils import print_f1_scores
accuracy_mlp = accuracy_score(test_y, test_y_preds)
f1_score_mlp = f1_score(test_y, test_y_preds, average=None)
df_f1_score_mlp = print_f1_scores(f1_score_mlp)

In [16]:
accuracy_mlp

0.3790318045049352

In [17]:
df_f1_score_mlp

Unnamed: 0,Causes Description,F1 scores
0,Lightning,0.5849
1,Equipment Use,0.004372
2,Smoking,0.0
3,Campfire,0.098736
4,Debris Burning,0.520976
5,Railroad,0.0
6,Arson,0.22961
7,Children,0.031177
8,Miscellaneous,0.385196
9,Fireworks,0.100149
