# Spam classification local pipeline

This pipeline is adjusted to run on a local machine. It consists of: dataset preprocessing, model training and model serving.

In [None]:
import os
import boto3
import sagemaker
import numpy as np
from sagemaker.tensorflow import TensorFlow

Download the dataset:

In [None]:
!wget https://archive.ics.uci.edu/ml/machine-learning-databases/00228/smsspamcollection.zip
!unzip smsspamcollection.zip -d data && rm data/readme smsspamcollection.zip

## Data processing

Preprocess dataset and save vocabulary, train and test data locally.

In [None]:
run processing/data_processing.py --data_path=data --output_path=/tmp/sagemaker_output

## Model training

Get role for local training:

In [None]:
client = boto3.client("iam")
roles = client.list_roles()["Roles"]
sagemaker_execution_roles = list(
    filter(lambda x: "AmazonSageMaker-ExecutionRole" in x["RoleName"], roles)
)
role = sagemaker_execution_roles[-1]["Arn"]

Train model locally:

In [None]:
hyperparameters = {"epochs": 5, "batch_size": 32, "learning_rate": 0.01}

estimator = TensorFlow(
    source_dir="training",
    entry_point="train.py",
    model_dir="/opt/ml/model",
    train_instance_type="local",
    train_instance_count=1,
    hyperparameters=hyperparameters,
    role=role,
    base_job_name="spam-classification",
    framework_version="2.1",
    py_version="py3",
    script_mode=True,
)

inputs = {
    "train": "file:///tmp/sagemaker_output/train",
    "test": "file:///tmp/sagemaker_output/test",
}

estimator.fit(inputs)

## Model deployment

Deploy model locally:

In [None]:
predictor = estimator.deploy(initial_instance_count=1, instance_type="local")

Test model:

In [None]:
def get_results(score):
    return 1 if score > 0.5 else 0


test_samples = np.load("/tmp/sagemaker_output/test/X_test.npy")
result = predictor.predict(test_samples[0])["predictions"][0][0]
print(f"\n\nPrediction result: {get_results(result)}")

Delete endpoint (stop serving container):

In [None]:
predictor.delete_endpoint()