# LTSF-Linear

## Import Libraries

In the following cell, we import the required libraries necessary for training and testing our LTSF-Linear models.

In [1]:
import pandas as pd
import torch
from torch import nn
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from sklearn.preprocessing import StandardScaler

device = 'cuda' if torch.cuda.is_available() else 'cpu'

## Model Definitions

In the following cells, we create the modules necessary for our long-term time series forecasting project.

In [2]:
class LinearLTSF(nn.Module):
    def __init__(self, sequence_length, prediction_length, in_channels=2):
        super(LinearLTSF, self).__init__()
        self.sequence_length = sequence_length
        self.prediction_length = prediction_length
        self.linears = nn.ModuleList([
            nn.Linear(sequence_length, prediction_length)
            for _ in range(in_channels)
        ])

    def forward(self, x):
        out = torch.tensor([], dtype=x.dtype, device=x.device)
        for channel, linear in enumerate(self.linears):
            channel_out = linear(x[:, :, channel]).unsqueeze(-1)
            out = torch.cat([out, channel_out], dim=-1)
        return out

## Dataset

We create the ApplianceEnergyUsageDataset to forecast energy usage for lights and appliances by our Linear modules we defined above. 

In [3]:
class ApplianceEnergyUsageDataset(Dataset):
    def __init__(self, energy_data, sequence_length, prediction_length):
        super().__init__()
        self.energy_data = energy_data
        self.sequence_length = sequence_length
        self.prediction_length = prediction_length        

    def __getitem__(self, index):
        sequence_begin = index
        sequence_end = sequence_begin + self.sequence_length
        x = self.energy_data[sequence_begin:sequence_end]
        prediction_begin = sequence_end
        prediction_end = prediction_begin + self.prediction_length
        y = self.energy_data[prediction_begin:prediction_end]
        return x, y

    def __len__(self):
        return len(self.energy_data) - self.sequence_length - self.prediction_length + 1

### Dataset Helpers

We create the utility functions necessary for retrieving separate training and test datasets for use by our Linear models. Additionally, we apply the normalization to the test data by the mean and variance of the training data, as suggested in the literature.

In [4]:
energy_data_file = '../../datasets/appliances_energy_prediction/energydata_complete.csv'
energy_data = pd.read_csv(energy_data_file, usecols=[1, 2])
training_samples = int(len(energy_data) * 0.8)
scaler = StandardScaler()
train_data = scaler.fit_transform(energy_data.iloc[:training_samples].values)

def get_train_dataset(sequence_length, prediction_length):
    dataset = ApplianceEnergyUsageDataset(train_data, 
                                          sequence_length, 
                                          prediction_length)
    return dataset

def get_test_dataset(sequence_length, prediction_length):
    test_data = scaler.transform(energy_data.iloc[
        training_samples - sequence_length:].values)
    dataset = ApplianceEnergyUsageDataset(test_data,
                                          sequence_length,
                                          prediction_length)
    return dataset

## Model Training/Evaluation

### Train/Evaluate Helper Functions

Additional utility functions for training and evaluating our Linear models.

In [5]:
def train(dataloader, model, criterion, optimizer):
    model.train()
    for X, y in dataloader:
        X, y = [_.to(torch.float32).to(device) for _ in (X, y,)]
        pred = model(X)
        loss = criterion(pred, y)
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

def test(dataloader, model, criterion):
    num_batches = len(dataloader)
    model.eval()
    test_loss = 0
    with torch.no_grad():
        for X, y in dataloader:
            X, y = [_.to(torch.float32).to(device) for _ in (X, y,)]
            pred = model(X)
            test_loss += criterion(pred, y).item()
    test_loss /= num_batches
    return test_loss

In the following cell, we obtain loss statistics on the test dataset for different values of the look-back window (24 or 720) and different forecasting lengths for the prediction horizon (48, 72, 96, 120, 144, 168, 192, 336, 504, 672, 720).

In [6]:
loss_data = {'Prediction Length': [], 'Sequence Length': [], 'Avg Loss': []}

for pred_length in [24, 720]:
    for seq_length in [48, 72, 96, 120, 144, 168, 192, 336, 504, 672, 720]:
        train_dataset = get_train_dataset(seq_length, pred_length)
        test_dataset = get_test_dataset(seq_length, pred_length)
        train_dataloader = DataLoader(train_dataset, batch_size=64)
        test_dataloader = DataLoader(test_dataset, batch_size=64)
        model = LinearLTSF(seq_length, pred_length).to(device)
        criterion = nn.MSELoss()
        optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
        epochs = 10
        for epoch in range(epochs):
            train(train_dataloader, model, criterion, optimizer)
        test_loss = test(test_dataloader, model, criterion)
        loss_data['Avg Loss'].append(test_loss)
        loss_data['Prediction Length'].append(pred_length)
        loss_data['Sequence Length'].append(seq_length)

print(loss_data)

{'Prediction Length': [24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 720, 720, 720, 720, 720, 720, 720, 720, 720, 720, 720], 'Sequence Length': [48, 72, 96, 120, 144, 168, 192, 336, 504, 672, 720, 48, 72, 96, 120, 144, 168, 192, 336, 504, 672, 720], 'Avg Loss': [0.5075517266748413, 0.5059373595060841, 0.5053949175823119, 0.5047441403711995, 0.49109682452774817, 0.488508376142671, 0.4892376435379828, 0.49510264829281836, 0.4896743307190557, 0.4941101271298624, 0.4937622444403748, 0.5540641330036462, 0.5489959348650539, 0.5449212579166188, 0.5413153031293083, 0.5386286418811947, 0.5371693179887884, 0.5358020797664044, 0.5263311167558035, 0.5219297783047545, 0.5222486573107102, 0.5240104695161184]}
