## Temperature prediction using LSTM in Pytorch
In this notebook we'll explore how to use a LSTM to predict temperature from weather data using a Pytorch model.

In [2]:
import warnings

warnings.filterwarnings("ignore")

import sys

if sys.version_info < (3,):
    raise Exception(
        "pytorch_toolkit does not support Python 2. Please use a Python 3+ interpreter!"
    )

import os
import sys
import pathlib

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# torch imports
import torch
import torch.nn as nn
import torchsummary as ts
import torch_training_toolkit as t3

SEED = 41
t3.seed_all(SEED)

# tweaking libraries
plt.rcParams["figure.figsize"] = (5, 4)
np.set_printoptions(suppress=True, precision=3, linewidth=110)
pd.set_option("display.float_format", "{:,.3f}".format)


print(
    f"Using Pytorch {torch.__version__}. GPU {'is available :)' if torch.cuda.is_available() else 'is not available :('}"
)
DEVICE = (
    "cuda"
    if torch.cuda.is_available()
    else ("dml" if (hasattr(torch, "dml") and torch.dml.is_available()) else "cpu")
)
print(f"Will train models on {DEVICE}")

MODEL_SAVE_DIR = pathlib.Path(os.getcwd()) / "model_states"
print(f"Model state will be saved to {MODEL_SAVE_DIR}")
DATA_DIR = pathlib.Path(os.getcwd()) / "csv_files"

Using Pytorch 2.1.2+rocm5.6. GPU is not available :(
Will train models on cpu
Model state will be saved to /home/mjbhobe/code/git-projects/dl-pytorch/model_states


In [3]:
# read the data from URL
from urllib.request import urlretrieve

url = "https://github.com/jbrownlee/Datasets/blob/master/daily-min-temperatures.csv"
target_file_name = DATA_DIR / "daily-min-temperatures.csv"
if not os.path.exists(target_file_name):
    # download only if necessary
    urlretrieve(url, target_file_name)
    assert os.path.exists(
        target_file_name
    ), f"FATAL ERROR: unable to download to {target_file_name}"
# open data file
df = pd.read_csv(target_file_name, index_col=0)
df.head()

Unnamed: 0_level_0,Temp
Date,Unnamed: 1_level_1
1981-01-01,20.7
1981-01-02,17.9
1981-01-03,18.8
1981-01-04,14.6
1981-01-05,15.8


In [4]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
df["Temp"] = scaler.fit_transform(df[["Temp"]])
df.head()

Unnamed: 0_level_0,Temp
Date,Unnamed: 1_level_1
1981-01-01,0.787
1981-01-02,0.681
1981-01-03,0.715
1981-01-04,0.555
1981-01-05,0.601


In [5]:
# prepare sequences using a 10 row window
sequence_length = 10
sequences = []
for i in range(len(df) - sequence_length):
    # grad data from indexes 0-10, 1-11, 2-12...
    seq = df["Temp"].iloc[i : i + sequence_length + 1].values
    sequences.append(seq)
print(sequences[:20])

[array([0.787, 0.681, 0.715, 0.555, 0.601, 0.601, 0.601, 0.662, 0.829, 0.76 , 0.616]), array([0.681, 0.715, 0.555, 0.601, 0.601, 0.601, 0.662, 0.829, 0.76 , 0.616, 0.506]), array([0.715, 0.555, 0.601, 0.601, 0.601, 0.662, 0.829, 0.76 , 0.616, 0.506, 0.635]), array([0.555, 0.601, 0.601, 0.601, 0.662, 0.829, 0.76 , 0.616, 0.506, 0.635, 0.817]), array([0.601, 0.601, 0.601, 0.662, 0.829, 0.76 , 0.616, 0.506, 0.635, 0.817, 0.951]), array([0.601, 0.601, 0.662, 0.829, 0.76 , 0.616, 0.506, 0.635, 0.817, 0.951, 0.787]), array([0.601, 0.662, 0.829, 0.76 , 0.616, 0.506, 0.635, 0.817, 0.951, 0.787, 0.783]), array([0.662, 0.829, 0.76 , 0.616, 0.506, 0.635, 0.817, 0.951, 0.787, 0.783, 0.943]), array([0.829, 0.76 , 0.616, 0.506, 0.635, 0.817, 0.951, 0.787, 0.783, 0.943, 0.673]), array([0.76 , 0.616, 0.506, 0.635, 0.817, 0.951, 0.787, 0.783, 0.943, 0.673, 0.589]), array([0.616, 0.506, 0.635, 0.817, 0.951, 0.787, 0.783, 0.943, 0.673, 0.589, 0.692]), array([0.506, 0.635, 0.817, 0.951, 0.787, 0.783, 0.94

In [6]:
# for each of the list in the sequence above, the first sequence_length - 1 elements is the data
# and the last item is the target
X, y = [seq[:-1] for seq in sequences], [seq[-1] for seq in sequences]
print(len(X), len(y))

3640 3640


In [7]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
# an LSTM expects input sequences in the shape (batch_size, time_steps, num_features)
X_train = np.array(X_train).reshape(len(X_train), sequence_length, 1)
X_test = np.array(X_test).reshape(len(X_test), sequence_length, 1)
y_train, y_test = np.array(y_train), np.array(y_test)
print(
    f"X_train.shape: {X_train.shape} - y_train.shap: {y_train.shape} - "
    f"X_test.shape: {X_test.shape} - y_test.shape: {y_test.shape}"
)

X_train.shape: (2912, 10, 1) - y_train.shap: (2912,) - X_test.shape: (728, 10, 1) - y_test.shape: (728,)


### Define the model

In [8]:
class LSTMModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, batch_size, output_dim=1, num_layers=2):
        super(LSTMModel, self).__init__()
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.batch_size = batch_size
        self.output_dim = output_dim
        self.num_layers = num_layers

        # define the layers of LSTM
        self.lstm = nn.LSTM(self.input_dim, self.hidden_dim, self.num_layers, batch_first=True)
        # output layer
        self.linear = nn.Linear(self.hidden_dim, self.output_dim)
        self.hidden = self.init_hidden()

    def init_hidden(self):
        # this is called in the training loop
        return (
            torch.zeros(self.num_layers, self.batch_size, self.hidden_dim),
            torch.zeros(self.num_layers, self.batch_size, self.hidden_dim),
        )

    def forward(self, input):
        # forward pass through the network
        out, self.hidden = self.lstm(
            input.view(len(input), self.batch_size, -1), self.hidden
        )
        # take the last step from out
        out = self.linear(out[-1].view(self.batch_size, -1))
        return out.view(-1)

In [9]:
# create the model
input_dim, hidden_dim, batch_size = 1, 64, 16
model = LSTMModel(input_dim=input_dim, hidden_dim=hidden_dim, batch_size=batch_size)
loss_fxn = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

In [11]:
bx = torch.tensor(X_train[0:batch_size])
bx.shape

torch.Size([16, 10, 1])

In [12]:
# training loop
num_epochs = 100

model = model.to(DEVICE)

for epoch in range(num_epochs):
    model.train()
    losses = np.array([])
    num_batches = len(X_train) // batch_size
    for i in range(num_batches):
        batch_X = torch.tensor(X_train[i * batch_size : (i + 1) * batch_size])
        batch_X = batch_X.to(DEVICE)
        batch_y = torch.tensor(y_train[i * batch_size : (i + 1) * batch_size])
        batch_y = batch_y.to(DEVICE)
        # initialize hidden state
        model.hidden = model.init_hidden()
        # zero gradients
        optimizer.zero_grad()
        # forward pass
        y_pred = model(batch_X)
        # compute loss
        loss = loss_fxn(y_pred, batch_y)
        # back propogation
        loss.backwards()
        # update gradients
        optimizer.step()

        losses.append(loss.item())

if epoch % 10 == 0:
    print(f"Epoch {epoch+1}/{num_epochs} -> loss: {np.mean(losses):.3f}")

KeyboardInterrupt: 