# LSTM Model

In [1]:
import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.optim as optim

from modules.utils.s3 import get_from_s3, put_to_s3
from constants import BUCKET, DATA_PROCESSED_PREFIX

In [2]:
# Load data
train_X = pd.read_csv(f"s3://{BUCKET}/{DATA_PROCESSED_PREFIX}train_X.csv", header=None).to_numpy()
train_Y = pd.read_csv(f"s3://{BUCKET}/{DATA_PROCESSED_PREFIX}train_Y.csv", header=None).to_numpy()
test_X = pd.read_csv(f"s3://{BUCKET}/{DATA_PROCESSED_PREFIX}test_X.csv", header=None).to_numpy()
test_Y = pd.read_csv(f"s3://{BUCKET}/{DATA_PROCESSED_PREFIX}test_Y.csv", header=None).to_numpy()

In [3]:
train_data = torch.from_numpy(train_X).long()
train_labels = torch.from_numpy(train_Y[:,0]).float()  # Take first label

In [4]:
# Turn the input pandas dataframe into tensors
train_sample_y = train_labels[:100]
train_sample_X = train_data[:100,:]

# Build the dataset
train_sample_ds = torch.utils.data.TensorDataset(train_sample_X, train_sample_y)
train_sample_dl = torch.utils.data.DataLoader(train_sample_ds, batch_size=10)

In [5]:
class LSTMClassifier(nn.Module):

    def __init__(self, vocab_size, embedding_dim, hidden_dim):
        """Create a RNN model by setting up the layers.
        
        Parameters
        ----------
        vocab_size : int
            size of word dictionary
        embedding_dim: int
            size of embedding layer
        hidden_dim: int
            size of hidden layer
        """
        super(LSTMClassifier, self).__init__()

        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=0)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim)
        self.linear = nn.Linear(in_features=hidden_dim, out_features=1)
        self.activation = nn.Sigmoid()

    def forward(self, x):
        """Conduct forward pass."""
        x = self.embedding(x)  # out: batch_size, seq_length, embedding_dim
        x = torch.transpose(x, 0, 1)  # Same thing as LSTM's batch_first=True but more efficient?
        x, (h, c) = self.lstm(x)  # hidden out: batch_size, hidden_dim
        x = self.linear(h[-1])  # out: batch_size, 1
        return self.activation(x.squeeze())

In [6]:
# Sample
model = LSTMClassifier(10000, 100, 5000)  # 10000 is the size of word dictionary
optimizer = optim.Adam(model.parameters())
loss_fn = torch.nn.BCELoss()
# train(model, train_sample_dl, 5, optimizer, loss_fn)

In [7]:
epochs=5
train_loader=train_sample_dl
device = "cuda" if torch.cuda.is_available() else "cpu"

print("Using {} device".format(device))

model.to(device)
for epoch in range(1, epochs + 1):
    model.train()
    total_loss = 0
    for batch in train_loader:         
        batch_X, batch_y = batch

        batch_X = batch_X.to(device)
        batch_y = batch_y.to(device)

        output = model(batch_X)
        loss = loss_fn(output, batch_y)

        # Backward pass and calculate gradients
        optimizer.zero_grad()
        loss.backward()
        # Clip gradient in case it explodes
        torch.nn.utils.clip_grad_norm_(model.parameters(), 5)
        # Update parameters
        optimizer.step()

        total_loss += loss.item()
    print("Epoch: {}, BCELoss: {}".format(epoch, total_loss / len(train_loader)))

Using cuda device
[2021-09-27 03:00:52.390 pytorch-1-6-gpu-py3-ml-g4dn-xlarge-c38c5dd95dfe4839ad1d26077718:38 INFO utils.py:27] RULE_JOB_STOP_SIGNAL_FILENAME: None
[2021-09-27 03:00:52.479 pytorch-1-6-gpu-py3-ml-g4dn-xlarge-c38c5dd95dfe4839ad1d26077718:38 INFO profiler_config_parser.py:102] Unable to find config at /opt/ml/input/config/profilerconfig.json. Profiler is disabled.
Epoch: 1, BCELoss: 1.145200675725937
Epoch: 2, BCELoss: 0.3667999912053347
Epoch: 3, BCELoss: 0.26083638407289983
Epoch: 4, BCELoss: 0.2583302170038223
Epoch: 5, BCELoss: 0.25588244646787645


## Set up data for SageMaker

In [8]:
import numpy as np
import pandas as pd
import sagemaker

from constants import BUCKET


sagemaker_session = sagemaker.Session(default_bucket=BUCKET)
role = sagemaker.get_execution_role()
prefix = "sagemaker/lstm_model"

In [9]:
# Create concatenated train and test dataset
train_data = pd.concat([pd.DataFrame(train_Y[:, 0]), pd.DataFrame(train_X)], axis=1)
test_data = pd.concat([pd.DataFrame(test_Y[:, 0]), pd.DataFrame(test_X)], axis=1)

# Save locally first
train_data.to_csv("data/train.csv", header=False, index=False)
test_data.to_csv("data/test.csv", header=False, index=False)

# Upload to S3 via SageMaker
train_location = sagemaker_session.upload_data("data/train.csv", key_prefix=prefix)
test_location = sagemaker_session.upload_data("data/test.csv", key_prefix=prefix)

print("Train: {}".format(train_location))
print("Test: {}".format(test_location))

Train: s3://aiforallcapstone-crimson/sagemaker/lstm_model/train.csv
Test: s3://aiforallcapstone-crimson/sagemaker/lstm_model/test.csv


## Train using SageMaker Endpoint

In [24]:
from sagemaker.pytorch import PyTorch

estimator = PyTorch(
    entry_point="train.py",
    source_dir="train",
    role=role,
    py_version='py3',
    framework_version='0.4.0',
    instance_count=1,
    instance_type='ml.g4dn.xlarge',
    hyperparameters={
        "epochs": 10,
        "vocab_size": 10002, 
#         "embedding_dim": 100,
#         "hidden_dim": 5000,
    }
)

In [None]:
estimator.fit({'training': train_location})

INFO:sagemaker.image_uris:Defaulting to the only supported framework/algorithm version: latest.
INFO:sagemaker.image_uris:Ignoring unnecessary instance type: None.
INFO:sagemaker:Creating training-job with name: sagemaker-pytorch-2021-09-27-03-47-20-144


2021-09-27 03:47:20 Starting - Starting the training job...
2021-09-27 03:47:44 Starting - Launching requested ML instancesProfilerReport-1632714440: InProgress
......
2021-09-27 03:48:45 Starting - Preparing the instances for training......
2021-09-27 03:49:46 Downloading - Downloading input data......
2021-09-27 03:50:50 Training - Training image download completed. Training in progress..[34mbash: cannot set terminal process group (-1): Inappropriate ioctl for device[0m
[34mbash: no job control in this shell[0m
[34m2021-09-27 03:50:51,178 sagemaker-containers INFO     Imported framework sagemaker_pytorch_container.training[0m
[34m2021-09-27 03:50:51,200 sagemaker_pytorch_container.training INFO     Block until all host DNS lookups succeed.[0m
[34m2021-09-27 03:50:51,203 sagemaker_pytorch_container.training INFO     Invoking user training script.[0m
[34m2021-09-27 03:50:51,408 sagemaker-containers INFO     Module train does not provide a setup.py. [0m
[34mGenerating setup