# How to use PyTorch LSTMs for time series regression

# Data

1. Download the data from the URLs listed in the docstring in `preprocess_data.py`.
2. Run the `preprocess_data.py` script to compile the individual sensors PM2.5 data into
   a single DataFrame.

In [26]:
import pandas as pd

df = pd.read_csv("dataset/allemgdata.csv")
df['pos_elbow'] = df['pos_elbow'] - df['pos_elbow'].min()
df['pos_shfe'] = df['pos_shfe'] - df['pos_shfe'].min()
df['pos_shaa'] = df['pos_shaa'] - df['pos_shaa'].min()
df

Unnamed: 0,emg_elbow,emg_shfe,emg_shaa,pos_elbow,pos_shfe,pos_shaa
0,13.698,11.835,8.3784,0.061359,0.049088,0.595180
1,13.706,11.901,8.3871,0.061359,0.049088,0.589050
2,13.716,11.970,8.3912,0.061359,0.049088,0.582910
3,13.742,12.055,8.3937,0.061359,0.049088,0.576780
4,13.762,12.146,8.4052,0.061359,0.049088,0.570640
...,...,...,...,...,...,...
65939,154.180,127.530,145.7300,0.092039,0.128852,0.067496
65940,152.720,126.040,144.1700,0.092039,0.128852,0.067496
65941,151.510,124.770,142.8900,0.092039,0.128852,0.067496
65942,150.350,123.420,141.7400,0.092039,0.128852,0.067496


In [27]:
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
pio.templates.default = "plotly_white"

plot_template = dict(
    layout=go.Layout({
        "font_size": 18,
        "xaxis_title_font_size": 24,
        "yaxis_title_font_size": 24})
)

fig = px.line(df[['emg_elbow','emg_shfe','emg_shaa']][0:10000], labels=dict(
    created_at="Date", value="Amplitude", variable="Sensor"
))
fig.update_layout(
  template=plot_template, legend=dict(orientation='h', y=1.02, title_text="")
)
fig.show()
# fig.write_image("emgdata.png", width=1200, height=600)

In [28]:
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
pio.templates.default = "plotly_white"

plot_template = dict(
    layout=go.Layout({
        "font_size": 18,
        "xaxis_title_font_size": 24,
        "yaxis_title_font_size": 24})
)

fig = px.line(df[['pos_elbow','pos_shfe','pos_shaa']][0:10000], labels=dict(
    created_at="Date", value="Amplitude", variable="Sensor"
))
fig.update_layout(
  template=plot_template, legend=dict(orientation='h', y=1.02, title_text="")
)
fig.show()

## Create the target variable

Here we are shifting the data n times ahead

In [29]:
target_sensor = ['pos_elbow','pos_shfe','pos_shaa']

# If you only want EMG data as features:
# features = list(df.columns.difference([target_sensor]))

# If you want both position and EMG as features:
features = list(df.columns[0:6])
print("our features are: ", features)
    
forecast_lead = 15
target = f"pos_elbow_lead{forecast_lead} pos_shfe_lead{forecast_lead} pos_shaa_lead{forecast_lead}"
target = target.split()
print("our target to predict is: ", target)

df[target] = df[target_sensor].shift(-forecast_lead)
df = df.iloc[:-forecast_lead]


our features are:  ['emg_elbow', 'emg_shfe', 'emg_shaa', 'pos_elbow', 'pos_shfe', 'pos_shaa']
our target to predict is:  ['pos_elbow_lead15', 'pos_shfe_lead15', 'pos_shaa_lead15']


## Create a hold-out test set and preprocess the data

In [30]:
test_start = 55000

df_train = df.loc[:test_start].copy()
df_test = df.loc[test_start:].copy()

print("Test set fraction:", len(df_test) / len(df))

Test set fraction: 0.16576923660301232


## Standardize the features and target, based on the training set

In [31]:
target_mean = df_train[target].mean()
target_stdev = df_train[target].std()

for c in df_train.columns:
    mean = df_train[c].mean()
    stdev = df_train[c].std()

    df_train[c] = (df_train[c] - mean) / stdev
    df_test[c] = (df_test[c] - mean) / stdev

## Create datasets that PyTorch `DataLoader` can work with

In [32]:
import torch
from torch.utils.data import Dataset

class SequenceDataset(Dataset):
    def __init__(self, dataframe, target, features, sequence_length=5):
        self.features = features
        self.target = target
        self.sequence_length = sequence_length
        self.y = torch.tensor(dataframe[self.target].values).float()
        self.X = torch.tensor(dataframe[self.features].values).float()

    def __len__(self):
        return self.X.shape[0]

    def __getitem__(self, i): 
        if i >= self.sequence_length - 1:
            i_start = i - self.sequence_length + 1
            x = self.X[i_start:(i + 1), :]
        else:
            padding = self.X[0].repeat(self.sequence_length - i - 1, 1)
            x = self.X[0:(i + 1), :]
            x = torch.cat((padding, x), 0)

        return x, self.y[i]

In [33]:
i = 100
sequence_length = 50

train_dataset = SequenceDataset(
    df_train,
    target=target,
    features=features,
    sequence_length=sequence_length
)

X, y = train_dataset[i]
print(X)
print(y)

tensor([[-0.7729, -0.6598, -0.8442, -0.6368, -0.5575, -0.0947],
        [-0.7579, -0.6537, -0.8443, -0.6368, -0.5575, -0.1084],
        [-0.7376, -0.6433, -0.8443, -0.6368, -0.5575, -0.1221],
        [-0.7209, -0.6350, -0.8446, -0.6368, -0.5575, -0.1221],
        [-0.6965, -0.6173, -0.8436, -0.6368, -0.5575, -0.1358],
        [-0.6717, -0.6024, -0.8427, -0.6368, -0.5575, -0.1494],
        [-0.6471, -0.5862, -0.8422, -0.6368, -0.5575, -0.1631],
        [-0.6144, -0.5601, -0.8411, -0.6368, -0.5575, -0.1768],
        [-0.5737, -0.5269, -0.8392, -0.6368, -0.5575, -0.1768],
        [-0.5356, -0.4936, -0.8381, -0.6368, -0.5575, -0.1905],
        [-0.5022, -0.4639, -0.8375, -0.6368, -0.5575, -0.2042],
        [-0.4746, -0.4391, -0.8376, -0.6368, -0.5575, -0.2178],
        [-0.4475, -0.4144, -0.8370, -0.6368, -0.5575, -0.2178],
        [-0.4185, -0.3881, -0.8363, -0.6368, -0.5575, -0.2315],
        [-0.3869, -0.3584, -0.8353, -0.6368, -0.5575, -0.2452],
        [-0.3437, -0.3157, -0.8206, -0.6

In [34]:
X, y = train_dataset[i + 1]
print(X)

tensor([[-0.7579, -0.6537, -0.8443, -0.6368, -0.5575, -0.1084],
        [-0.7376, -0.6433, -0.8443, -0.6368, -0.5575, -0.1221],
        [-0.7209, -0.6350, -0.8446, -0.6368, -0.5575, -0.1221],
        [-0.6965, -0.6173, -0.8436, -0.6368, -0.5575, -0.1358],
        [-0.6717, -0.6024, -0.8427, -0.6368, -0.5575, -0.1494],
        [-0.6471, -0.5862, -0.8422, -0.6368, -0.5575, -0.1631],
        [-0.6144, -0.5601, -0.8411, -0.6368, -0.5575, -0.1768],
        [-0.5737, -0.5269, -0.8392, -0.6368, -0.5575, -0.1768],
        [-0.5356, -0.4936, -0.8381, -0.6368, -0.5575, -0.1905],
        [-0.5022, -0.4639, -0.8375, -0.6368, -0.5575, -0.2042],
        [-0.4746, -0.4391, -0.8376, -0.6368, -0.5575, -0.2178],
        [-0.4475, -0.4144, -0.8370, -0.6368, -0.5575, -0.2178],
        [-0.4185, -0.3881, -0.8363, -0.6368, -0.5575, -0.2315],
        [-0.3869, -0.3584, -0.8353, -0.6368, -0.5575, -0.2452],
        [-0.3437, -0.3157, -0.8206, -0.6368, -0.5575, -0.2589],
        [-0.3060, -0.2778, -0.8059, -0.6

In [35]:
print(df_train[features].iloc[(i - sequence_length + 1): (i + 1)])

     emg_elbow  emg_shfe  emg_shaa  pos_elbow  pos_shfe  pos_shaa
51   -0.772946 -0.659814 -0.844197  -0.636816 -0.557476 -0.094735
52   -0.757853 -0.653743 -0.844289  -0.636816 -0.557476 -0.108424
53   -0.737578 -0.643337 -0.844327  -0.636816 -0.557476 -0.122090
54   -0.720889 -0.635016 -0.844646  -0.636816 -0.557476 -0.122090
55   -0.696476 -0.617300 -0.843562  -0.636816 -0.557476 -0.135779
56   -0.671748 -0.602371 -0.842725  -0.636816 -0.557476 -0.149445
57   -0.647138 -0.586183 -0.842232  -0.636816 -0.557476 -0.163134
58   -0.614371 -0.560146 -0.841094  -0.636816 -0.557476 -0.176800
59   -0.573683 -0.526923 -0.839238  -0.636816 -0.557476 -0.176800
60   -0.535596 -0.493597 -0.838060  -0.636816 -0.557476 -0.190489
61   -0.502178 -0.463926 -0.837550  -0.636816 -0.557476 -0.204178
62   -0.474633 -0.439127 -0.837644  -0.636816 -0.557476 -0.217844
63   -0.447461 -0.414370 -0.836961  -0.636816 -0.557476 -0.217844
64   -0.418497 -0.388147 -0.836348  -0.636816 -0.557476 -0.231533
65   -0.38

In [36]:
from torch.utils.data import DataLoader
torch.manual_seed(99)

train_loader = DataLoader(train_dataset, batch_size=3, shuffle=True)

X, y = next(iter(train_loader))
print(X.shape)
print(X)

torch.Size([3, 50, 6])
tensor([[[-5.0768e-02, -4.3283e-02, -1.8811e-01, -4.6790e-01, -5.0615e-01,
          -5.1880e-01],
         [ 4.0362e-02,  4.5958e-02, -1.0350e-01, -4.6790e-01, -5.0615e-01,
          -5.3248e-01],
         [ 1.3124e-01,  1.3627e-01, -1.7339e-02, -4.6790e-01, -5.0615e-01,
          -5.3248e-01],
         [ 2.1752e-01,  2.2316e-01,  6.7213e-02, -4.6790e-01, -5.3181e-01,
          -5.4616e-01],
         [ 2.9799e-01,  3.0782e-01,  1.4834e-01, -4.6790e-01, -5.3181e-01,
          -5.4616e-01],
         [ 3.8084e-01,  3.9359e-01,  2.3165e-01, -4.6790e-01, -5.3181e-01,
          -5.5984e-01],
         [ 4.6476e-01,  4.7560e-01,  3.1592e-01, -4.6790e-01, -5.3181e-01,
          -5.5984e-01],
         [ 5.3965e-01,  5.4890e-01,  3.9557e-01, -4.6790e-01, -5.3181e-01,
          -5.7352e-01],
         [ 6.0783e-01,  6.1246e-01,  4.6827e-01, -4.6790e-01, -5.3181e-01,
          -5.7352e-01],
         [ 6.6325e-01,  6.6383e-01,  5.2853e-01, -4.6790e-01, -5.3181e-01,
          -

## Create the datasets and data loaders for real

Using just 4 time periods to forecast 15 time periods ahead seems challenging, so let's
use sequences of length 30 (60 minutes) instead.

The PyTorch `DataLoader` is a very convenient way to iterate through these datasets. For
the training set we'll shuffle (the rows *within* each training sequence are not
shuffled, only the order in which we draw those blocks). For the test set, shuffling
isn't necessary.

In [37]:
torch.manual_seed(101)

batch_size = 4
sequence_length = 30

train_dataset = SequenceDataset(
    df_train,
    target=target,
    features=features,
    sequence_length=sequence_length
)
test_dataset = SequenceDataset(
    df_test,
    target=target,
    features=features,
    sequence_length=sequence_length
)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

X, y = next(iter(train_loader))

print("Features shape:", X.shape)
print("Target shape:", y.shape)

Features shape: torch.Size([4, 30, 6])
Target shape: torch.Size([4, 3])


# The model and learning algorithm

Most importantly, we have to keep track of which dimension represents the batch in our input tensors. As we just saw, our data loaders use the first dimension for this, but the PyTorch LSTM layer’s default is to use the second dimension instead. So we set batch_first=True to make the dimensions line up, but confusingly, this doesn’t apply to the hidden and cell state tensors. In the forward method, we initialize h0 and c0 with batch size as the second dimension.

In [38]:
from torch import nn

class ShallowRegressionLSTM(nn.Module):
    def __init__(self, num_sensors, hidden_units):
        super().__init__()
        self.num_sensors = num_sensors  # this is the number of features
        self.hidden_units = hidden_units
        self.num_layers = 1

        self.lstm = nn.LSTM(
            input_size=num_sensors,
            hidden_size=hidden_units,
            batch_first=True,
            num_layers=self.num_layers
        )

        self.linear = nn.Linear(
            in_features=self.hidden_units, 
            out_features=3
            )

    def forward(self, x):
        batch_size = x.shape[0]
        h0 = torch.zeros(self.num_layers, batch_size, self.hidden_units).requires_grad_()
        c0 = torch.zeros(self.num_layers, batch_size, self.hidden_units).requires_grad_()
        
        _, (hn, _) = self.lstm(x, (h0, c0))
        out = self.linear(hn[0])  # First dim of Hn is num_layers, which is set to 1 above.

        return out


In [39]:
learning_rate = 5e-5
num_hidden_units = 16

model = ShallowRegressionLSTM(num_sensors=len(features), hidden_units=num_hidden_units)
loss_function = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Train

In [40]:
def train_model(data_loader, model, loss_function, optimizer):
    num_batches = len(data_loader)
    total_loss = 0
    model.train()
    
    for X, y in data_loader:
        output = model(X)
        loss = loss_function(output, y)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    avg_loss = total_loss / num_batches
    print(f"Train loss: {avg_loss}")

def test_model(data_loader, model, loss_function):
    
    num_batches = len(data_loader)
    total_loss = 0

    model.eval()
    with torch.no_grad():
        for X, y in data_loader:
            output = model(X)
            total_loss += loss_function(output, y).item()

    avg_loss = total_loss / num_batches
    print(f"Test loss: {avg_loss}")

In [41]:
print("Untrained test\n--------")
test_model(test_loader, model, loss_function)
print()

epoch_number = 50

for ix_epoch in range(epoch_number):
    print(f"Epoch {ix_epoch}\n---------")
    train_model(train_loader, model, loss_function, optimizer=optimizer)
    test_model(test_loader, model, loss_function)
    print()

Untrained test
--------
Test loss: 1.241955145368161

Epoch 0
---------
Train loss: 0.268157596215387
Test loss: 0.0832852192057538

Epoch 1
---------
Train loss: 0.031107015237798533
Test loss: 0.046760862448355464

Epoch 2
---------
Train loss: 0.018116253379630597
Test loss: 0.03969702581656268

Epoch 3
---------
Train loss: 0.014623786779663856
Test loss: 0.040241501670430876

Epoch 4
---------
Train loss: 0.01282083013830291
Test loss: 0.037978070674417594

Epoch 5
---------
Train loss: 0.011567201605879813
Test loss: 0.03668145830423064

Epoch 6
---------
Train loss: 0.010371571335035553
Test loss: 0.03621154252418897

Epoch 7
---------
Train loss: 0.00911830728234103
Test loss: 0.03435907375507949

Epoch 8
---------
Train loss: 0.008003477370045585
Test loss: 0.033065350938017

Epoch 9
---------
Train loss: 0.0071127956415854045
Test loss: 0.03218200058595638

Epoch 10
---------
Train loss: 0.0064607725129786605
Test loss: 0.028798215402302294

Epoch 11
---------
Train loss: 0.0

KeyboardInterrupt: 

# Evaluation

In [None]:
def predict(data_loader, model):
    """Just like `test_loop` function but keep track of the outputs instead of the loss
    function.
    """
    output = torch.tensor([])
    model.eval()
    with torch.no_grad():
        for X, _ in data_loader:
            y_star = model(X)
            output = torch.cat((output, y_star), 0)
    
    return output

In [None]:
train_eval_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)

ystar_col = ['predpos_elbow', 'predpos_shfe', 'predpos_shaa']
df_train[ystar_col] = predict(train_eval_loader, model).numpy()
df_test[ystar_col] = predict(test_loader, model).numpy()

In [22]:
targets = target
targets.extend(ystar_col)
df_out = pd.concat((df_train, df_test))[[targets]]

for c in df_out.columns:
    df_out[c] = df_out[c] * target_stdev + target_mean

print(df_out)

KeyError: "None of [Index([None], dtype='object')] are in the [columns]"

In [None]:
fig = px.line(df_out, labels={'value': "PM2.5 (ug/m3)", 'created_at': 'Date'})
fig.add_vline(x=test_start, line_width=4, line_dash="dash")
fig.add_annotation(xref="paper", x=0.75, yref="paper", y=0.8, text="Test set start", showarrow=False)
fig.update_layout(
  template=plot_template, legend=dict(orientation='h', y=1.02, title_text="")
)
fig.show()
fig.write_image("pm25_forecast.png", width=1200, height=600)