<a href="https://colab.research.google.com/github/jeffheaton/app_deep_learning/blob/main/t81_558_class_10_3_transformer_timeseries.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import StandardScaler
from torch.optim.lr_scheduler import ReduceLROnPlateau
import plotly.express as px
has_mps = torch.backends.mps.is_built()
device = "mps" if has_mps else "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

Using device: mps


In [2]:
# Get and process data
df = pd.read_csv("SN_d_tot_V2.0.csv")
selected_columns = ['dec_year', 'sn_value', 'year']
df = df[selected_columns]
df['moving_average'] = df['sn_value'].rolling(window=500).mean()
df_downsampled = df.iloc[::50].copy()  # Selects every 10th row, starting from index 0 
print(f'length after downsampling: {len(df_downsampled)}')

df_downsampled['sn_value'] = df_downsampled['moving_average'].astype(float)
df = df_downsampled.dropna()

length after downsampling: 1232


In [3]:
# process for training
df['sn_value'] = df['sn_value'].astype(float)
df_train = df[df['year'] < 2000]
df_test = df[df['year'] >= 2000]

spots_train = df_train['sn_value'].to_numpy().reshape(-1, 1)
spots_test = df_test['sn_value'].to_numpy().reshape(-1, 1)

scaler = StandardScaler()
spots_train = scaler.fit_transform(spots_train).flatten().tolist()
spots_test = scaler.transform(spots_test).flatten().tolist()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['sn_value'] = df['sn_value'].astype(float)


In [19]:
# Sequence Data Preparation
SEQUENCE_SIZE = 10

def to_sequences(seq_size, obs):
    x = []
    y = []
    for i in range(len(obs) - seq_size):
        window = obs[i:(i + seq_size)]
        after_window = obs[i + seq_size]
        x.append(window)
        y.append(after_window)
    return torch.tensor(x, dtype=torch.float32).view(-1, seq_size, 1), torch.tensor(y, dtype=torch.float32).view(-1, 1)

x_train, y_train = to_sequences(SEQUENCE_SIZE, spots_train)
x_test, y_test = to_sequences(SEQUENCE_SIZE, spots_test)

# Setup data loaders for batch
train_dataset = TensorDataset(x_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

test_dataset = TensorDataset(x_test, y_test)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [20]:
# Positional Encoding for Transformer
class PositionalEncoding(nn.Module):
    def __init__(self, d_model, dropout=0.1, max_len=5000):
        super(PositionalEncoding, self).__init__()
        self.dropout = nn.Dropout(p=dropout)

        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-np.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0).transpose(0, 1)
        self.register_buffer('pe', pe)

    def forward(self, x):
        x = x + self.pe[:x.size(0), :]
        return self.dropout(x)
    
# Model definition using Transformer
class TransformerModel(nn.Module):
    def __init__(self, input_dim=1, d_model=64, nhead=4, num_layers=2, dropout=0.2):
        super(TransformerModel, self).__init__()

        self.encoder = nn.Linear(input_dim, d_model)
        self.pos_encoder = PositionalEncoding(d_model, dropout)
        encoder_layers = nn.TransformerEncoderLayer(d_model, nhead)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layers, num_layers)
        self.decoder = nn.Linear(d_model, 1)

    def forward(self, x):
        x = self.encoder(x)
        x = self.pos_encoder(x)
        x = self.transformer_encoder(x)
        x = self.decoder(x[:, -1, :])
        return x

model = TransformerModel().to(device)

In [21]:
# Train the model
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
scheduler = ReduceLROnPlateau(optimizer, 'min', factor=0.5, patience=3, verbose=True)

epochs = 1000
early_stop_count = 0
min_val_loss = float('inf')

for epoch in range(epochs):
    model.train()
    for batch in train_loader:
        x_batch, y_batch = batch
        x_batch, y_batch = x_batch.to(device), y_batch.to(device)

        optimizer.zero_grad()
        outputs = model(x_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()

    # Validation
    model.eval()
    val_losses = []
    with torch.no_grad():
        for batch in test_loader:
            x_batch, y_batch = batch
            x_batch, y_batch = x_batch.to(device), y_batch.to(device)
            outputs = model(x_batch)
            loss = criterion(outputs, y_batch)
            val_losses.append(loss.item())

    val_loss = np.mean(val_losses)
    scheduler.step(val_loss)

    if val_loss < min_val_loss:
        min_val_loss = val_loss
        early_stop_count = 0
    else:
        early_stop_count += 1

    if early_stop_count >= 5:
        print("Early stopping!")
        break
    print(f"Epoch {epoch + 1}/{epochs}, Validation Loss: {val_loss:.4f}")

Epoch 1/1000, Validation Loss: 0.0249
Epoch 2/1000, Validation Loss: 0.0177
Epoch 3/1000, Validation Loss: 0.0144
Epoch 4/1000, Validation Loss: 0.0057
Epoch 5/1000, Validation Loss: 0.0161
Epoch 6/1000, Validation Loss: 0.0103
Epoch 7/1000, Validation Loss: 0.0145
Epoch 00008: reducing learning rate of group 0 to 5.0000e-04.
Epoch 8/1000, Validation Loss: 0.0067
Early stopping!


In [22]:
print(f'x train shape : {x_train.shape}')
print(f'y train shape : {y_train.shape}')

x train shape : torch.Size([1084, 10, 1])
y train shape : torch.Size([1084, 1])


In [37]:
train_dataset =TensorDataset(x_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
train_loader_data = [i for i in train_loader]
len(train_loader_data)

68

In [39]:
model.train()
for batch in train_loader:
    x_batch, y_batch = batch
    print(f'x_batch shape : {x_batch.shape}')
    print(f'y_batch shape : {y_batch.shape}')

x_batch shape : torch.Size([16, 10, 1])
y_batch shape : torch.Size([16, 1])
x_batch shape : torch.Size([16, 10, 1])
y_batch shape : torch.Size([16, 1])
x_batch shape : torch.Size([16, 10, 1])
y_batch shape : torch.Size([16, 1])
x_batch shape : torch.Size([16, 10, 1])
y_batch shape : torch.Size([16, 1])
x_batch shape : torch.Size([16, 10, 1])
y_batch shape : torch.Size([16, 1])
x_batch shape : torch.Size([16, 10, 1])
y_batch shape : torch.Size([16, 1])
x_batch shape : torch.Size([16, 10, 1])
y_batch shape : torch.Size([16, 1])
x_batch shape : torch.Size([16, 10, 1])
y_batch shape : torch.Size([16, 1])
x_batch shape : torch.Size([16, 10, 1])
y_batch shape : torch.Size([16, 1])
x_batch shape : torch.Size([16, 10, 1])
y_batch shape : torch.Size([16, 1])
x_batch shape : torch.Size([16, 10, 1])
y_batch shape : torch.Size([16, 1])
x_batch shape : torch.Size([16, 10, 1])
y_batch shape : torch.Size([16, 1])
x_batch shape : torch.Size([16, 10, 1])
y_batch shape : torch.Size([16, 1])
x_batch shap

In [29]:
x_train.shape

torch.Size([1084, 10, 1])

In [None]:
model.train()
for batch in train_loader:
    x_batch, y_batch = batch
    x_batch, y_batch = x_batch.to(device), y_batch.to(device)

    optimizer.zero_grad()
    outputs = model(x_batch)
    loss = criterion(outputs, y_batch)
    loss.backward()
    optimizer.step()

# Validation
model.eval()
val_losses = []
with torch.no_grad():
    for batch in test_loader:
        x_batch, y_batch = batch
        x_batch, y_batch = x_batch.to(device), y_batch.to(device)
        outputs = model(x_batch)
        loss = criterion(outputs, y_batch)
        val_losses.append(loss.item())

val_loss = np.mean(val_losses)
scheduler.step(val_loss)

if val_loss < min_val_loss:
    min_val_loss = val_loss
    early_stop_count = 0
else:
    early_stop_count += 1

if early_stop_count >= 5:
    print("Early stopping!")
    break

In [None]:
# Evaluation
model.eval()
predictions = []
with torch.no_grad():
    for batch in test_loader:
        x_batch, y_batch = batch
        x_batch = x_batch.to(device)
        outputs = model(x_batch)
        predictions.extend(outputs.squeeze().tolist())

rmse = np.sqrt(np.mean((scaler.inverse_transform(np.array(predictions).reshape(-1, 1)) - scaler.inverse_transform(y_test.numpy().reshape(-1, 1)))**2))
print(f"Score (RMSE): {rmse:.4f}")

Score (RMSE): 5.4468


In [8]:
px.line(df, x='dec_year', y=['sn_value', 'moving_average'], title='Sunspot Value Over Time')

The data preprocessing is the same as was introduced in the previous section. We will use data before the year 2000 as training, the rest is used for validation.

In [9]:
px.line(spots_train)

In [10]:
px.line(spots_test)

Just like we did for LSTM in the previous section, we again break the data into sequences.