# Imports

In [1]:
import torch
import torch.nn as nn
import os
import pandas as pd
import numpy as np
from tqdm import tqdm
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset

# Data extraction and preprocessing

In [2]:
# Load the CSV
csv_file = "/kaggle/input/eetm-1/ETTm1.csv"
df = pd.read_csv(csv_file)
print(df.head())

                  date   HUFL   HULL   MUFL   MULL   LUFL   LULL         OT
0  2016-07-01 00:00:00  5.827  2.009  1.599  0.462  4.203  1.340  30.531000
1  2016-07-01 00:15:00  5.760  2.076  1.492  0.426  4.264  1.401  30.459999
2  2016-07-01 00:30:00  5.760  1.942  1.492  0.391  4.234  1.310  30.038000
3  2016-07-01 00:45:00  5.760  1.942  1.492  0.426  4.234  1.310  27.013000
4  2016-07-01 01:00:00  5.693  2.076  1.492  0.426  4.142  1.371  27.787001


In [3]:
window_length = 168
step_size = 1

In [4]:
# 1. Order by date
df["date"] = pd.to_datetime(df["date"])
df = df.sort_values("date").reset_index(drop=True)

def extract_windows(df, window_len, step):
    windows = []
    timestamps = []
    
    # Drop non-numeric columns for window extraction
    numeric_df = df.drop(columns=["date"])
    
    for i in range(step - 1, len(df) - window_len + 1, step):
        # Window of numeric features
        window = numeric_df.iloc[i:i+window_len].values
        windows.append(window)
        
        # Associate timestamps
        timestamps.append(df["date"].iloc[i:i+window_len].values)
    
    return np.array(windows), np.array(timestamps)

all_windows, all_timestamps = extract_windows(df, window_length, step_size)

X = all_windows  # shape: (num_samples, window_length, num_features)
meta_time = all_timestamps  # metadata: array of associated dates

print("Shape of X:", X.shape)               # (num_samples, window_length, num_features)
print("Shape of meta_time:", meta_time.shape)  # (num_samples, window_length)


Shape of X: (69513, 168, 7)
Shape of meta_time: (69513, 168)


In [5]:
from sklearn.utils import shuffle

X, meta_time = shuffle(X, meta_time, random_state=626)

train_size = int(0.7 * len(X))
val_size = int(0.1 * len(X))
test_size = len(X) - train_size - val_size

X_train = X[:train_size]
X_val = X[train_size:train_size + val_size]
X_test = X[train_size + val_size:]

meta_time_train = meta_time[:train_size]
meta_time_val = meta_time[train_size:train_size + val_size]
meta_time_test = meta_time[train_size + val_size:]

# Encoder and Decoder

Our goal was to build an encoder-decoder model able to learn a compressed representaion of the input time series, so to allow a more efficient search of similar time series in a smaller dimensional space, speeding up the task of finding k nearest neighbours. </br> The encoder gets as input a tensor of shape (batch_size, seq_len, num_features) and compresses it into a tensor of shape (batch_size, embedding_dim), while the decoder takes the output of the encoder and tries to reconstruct the original input. 

In [6]:
class Chomp1d(nn.Module):
    def __init__(self, chomp_size):
        super().__init__()
        self.chomp_size = chomp_size

    def forward(self, x):
        return x[:, :, :-self.chomp_size] if self.chomp_size > 0 else x

class TemporalBlock(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, dilation, dropout):
        super().__init__()
        padding = (kernel_size - 1) * dilation  # full causal
        self.conv1 = nn.Conv1d(in_channels, out_channels, kernel_size,
                               padding=padding, dilation=dilation)
        self.chomp1 = Chomp1d(padding)

        self.relu1 = nn.ReLU()
        self.dropout1 = nn.Dropout(dropout)

        self.conv2 = nn.Conv1d(out_channels, out_channels, kernel_size,
                               padding=padding, dilation=dilation)
        self.chomp2 = Chomp1d(padding)

        self.relu2 = nn.ReLU()
        self.dropout2 = nn.Dropout(dropout)

        self.net = nn.Sequential(
            self.conv1, self.chomp1, self.relu1, self.dropout1,
            self.conv2, self.chomp2, self.relu2, self.dropout2,
        )

        self.downsample = nn.Conv1d(in_channels, out_channels, 1) \
            if in_channels != out_channels else None
        self.relu = nn.ReLU()

    def forward(self, x):
        res = x if self.downsample is None else self.downsample(x)
        out = self.net(x)
        if out.shape != res.shape:
            # Align time dimension by cropping the residual (this might be necessary in some edge cases)
            min_len = min(out.size(-1), res.size(-1))
            out = out[..., :min_len]
            res = res[..., :min_len]
        return self.relu(out + res)

# Encoder
class TCNEncoder(nn.Module):
    def __init__(self, input_dim, emb_dim, num_channels, kernel_size=3, dropout=0.2):
        super().__init__()
        layers = []
        for i in range(len(num_channels)):
            in_ch = input_dim if i == 0 else num_channels[i - 1]
            out_ch = num_channels[i]
            dilation = 2 ** i
            layers.append(TemporalBlock(in_ch, out_ch, kernel_size, dilation, dropout))
        self.tcn = nn.Sequential(*layers)

        # Projection from [B, C, T] to [B, emb_dim]
        self.pool = nn.AdaptiveAvgPool1d(1)
        self.project = nn.Linear(num_channels[-1], emb_dim)

    def forward(self, x):
        # x: [B, T, D] → [B, D, T]
        x = x.permute(0, 2, 1)
        x = self.tcn(x)  # [B, C, T]
        x = self.pool(x).squeeze(-1)  # [B, C]
        x = self.project(x)  # [B, emb_dim]
        return x

# Decoder
class TCNDecoder(nn.Module):
    def __init__(self, emb_dim, output_dim, seq_len, num_channels, kernel_size=3, dropout=0.2):
        super().__init__()
        self.seq_len = seq_len
        self.output_dim = output_dim

        # Project embedding back to a sequence shape: [B, C, T]
        self.expand = nn.Linear(emb_dim, num_channels[0] * seq_len)

        layers = []
        for i in range(len(num_channels) - 1):
            in_ch = num_channels[i]
            out_ch = num_channels[i + 1]
            dilation = 2 ** i
            layers.append(TemporalBlock(in_ch, out_ch, kernel_size, dilation, dropout))

        self.tcn = nn.Sequential(*layers)
        self.out_proj = nn.Conv1d(num_channels[-1], output_dim, kernel_size=1)

    def forward(self, x):
        # x: [B, emb_dim] → [B, C0, T]
        x = self.expand(x)  # [B, C0 * T]
        x = x.view(x.size(0), -1, self.seq_len)  # [B, C0, T]
        x = self.tcn(x)  # [B, Cn, T]
        x = self.out_proj(x)  # [B, D, T]
        return x.permute(0, 2, 1)  # [B, T, D]
    
class TCNAutoencoder(nn.Module):
    def __init__(self, input_dim, emb_dim, seq_len, channels, kernel_size=3, dropout=0.2):
        super().__init__()
        self.encoder = TCNEncoder(input_dim, emb_dim, channels, kernel_size, dropout)
        self.decoder = TCNDecoder(emb_dim, input_dim, seq_len, channels[::-1], kernel_size, dropout)

    def forward(self, x, only_encoder = False):
        # x: [B, T, D]
        z = self.encoder(x)       # [B, emb_dim]
        if only_encoder:
            return z
        x_recon = self.decoder(z) # [B, T, D]
        return x_recon

In [7]:
# Compute the statistics of the train dataset and normalize with respect to it 

mean = X_train.mean(axis=(0, 1), keepdims=True)  # shape (1, 1, num_features)
std = X_train.std(axis=(0, 1), keepdims=True)

X_train = (X_train - mean) / std
X_val = (X_val - mean) / std
X_test = (X_test - mean) / std

print(mean, std)

[[[ 7.41981425  2.25796281  4.32981479  0.89406359  3.07951596
    0.856835   13.34120937]]] [[[7.0826444  2.04137929 6.82801614 1.80879328 1.17256459 0.59970438
   8.58232446]]]


In [8]:
# Convert into PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)

# Dataset & DataLoader
batch_size = 32
train_loader = DataLoader(TensorDataset(X_train_tensor), batch_size=batch_size, shuffle=True)
val_loader = DataLoader(TensorDataset(X_val_tensor), batch_size=batch_size)
test_loader = DataLoader(TensorDataset(X_test_tensor), batch_size=batch_size)

# Model training

In [None]:
seq_len = X_train.shape[1]
num_features = X_train.shape[2]

# Parameters
B, T, D = batch_size, seq_len, num_features
emb_dim = 64
channels = [32, 64, 128]

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Instantiate model
model = TCNAutoencoder(input_dim=D, emb_dim=emb_dim, seq_len=T, channels=channels).to(device)

def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f"Model parameters: {count_parameters(model):,}")

params = list(model.parameters())

optimizer = torch.optim.Adam(params, lr=1e-3)
loss_fn = nn.MSELoss()

epochs = 100

best_loss = 1e10

encoder_path = "/kaggle/working/encoder.pth"

def evaluate(model, loader):
    model.eval()
    total_loss = 0
    with torch.no_grad():
        for (x,) in loader:
            x = x.to(device)
            x_hat = model(x)
            loss = loss_fn(x_hat, x)
            total_loss += loss.item()
    return total_loss / len(loader)

for epoch in range(1, epochs + 1):
    model.train()
    train_loss = 0

    for (x,) in train_loader:
        x = x.to(device)
        x_hat = model(x)
        loss = loss_fn(x_hat, x)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        train_loss += loss.item()

    train_loss /= len(train_loader)
    val_loss = evaluate(model, val_loader)
    test_loss = evaluate(model, test_loader)

    if val_loss + test_loss < best_loss:
        best_loss = val_loss + test_loss
        # Saves the weights
        torch.save(model.state_dict(), encoder_path)
        print("Models saved successfully.")

    print(f"Epoch {epoch:02d} | Train MSE: {train_loss:.6f} | Val MSE: {val_loss:.6f} | Test MSE: {test_loss:.6f}")

Model parameters: 1,569,895
Models saved successfully.
Epoch 01 | Train MSE: 0.338877 | Val MSE: 0.205334 | Test MSE: 0.206716
Models saved successfully.
Epoch 02 | Train MSE: 0.179992 | Val MSE: 0.151248 | Test MSE: 0.153404
Models saved successfully.
Epoch 03 | Train MSE: 0.147473 | Val MSE: 0.130057 | Test MSE: 0.131775
Models saved successfully.
Epoch 04 | Train MSE: 0.127477 | Val MSE: 0.114420 | Test MSE: 0.115974
Models saved successfully.
Epoch 05 | Train MSE: 0.114159 | Val MSE: 0.106209 | Test MSE: 0.108207
Models saved successfully.
Epoch 06 | Train MSE: 0.105806 | Val MSE: 0.099056 | Test MSE: 0.100021
Models saved successfully.
Epoch 07 | Train MSE: 0.098935 | Val MSE: 0.090676 | Test MSE: 0.091579
Models saved successfully.
Epoch 08 | Train MSE: 0.093704 | Val MSE: 0.087473 | Test MSE: 0.088380
Models saved successfully.
Epoch 09 | Train MSE: 0.089430 | Val MSE: 0.080889 | Test MSE: 0.081535
Models saved successfully.
Epoch 10 | Train MSE: 0.085954 | Val MSE: 0.080240 | T

In [11]:
seq_len = X_train.shape[1]
num_features = X_train.shape[2]

# Parameters
B, T, D = batch_size, seq_len, num_features
emb_dim = 64
channels = [32, 64, 128]

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Instantiate model
model = TCNAutoencoder(input_dim=D, emb_dim=emb_dim, seq_len=T, channels=channels).to(device)

def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f"Model parameters: {count_parameters(model):,}")

params = list(model.parameters())

optimizer = torch.optim.Adam(params, lr=1e-3)
loss_fn = nn.MSELoss()

epochs = 100

best_loss = 0.050461 + 0.050724

encoder_path = "/kaggle/working/encoder.pth"

model.load_state_dict(torch.load(encoder_path))

model.to(device)

def evaluate(model, loader):
    model.eval()
    total_loss = 0
    with torch.no_grad():
        for (x,) in loader:
            x = x.to(device)
            x_hat = model(x)
            loss = loss_fn(x_hat, x)
            total_loss += loss.item()
    return total_loss / len(loader)

for epoch in range(71, epochs + 1):
    model.train()
    train_loss = 0

    for (x,) in train_loader:
        x = x.to(device)
        x_hat = model(x)
        loss = loss_fn(x_hat, x)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        train_loss += loss.item()

    train_loss /= len(train_loader)
    val_loss = evaluate(model, val_loader)
    test_loss = evaluate(model, test_loader)

    if val_loss + test_loss < best_loss:
        best_loss = val_loss + test_loss
        # Saves the weights
        torch.save(model.state_dict(), encoder_path)
        print("Models saved successfully.")

    print(f"Epoch {epoch:02d} | Train MSE: {train_loss:.6f} | Val MSE: {val_loss:.6f} | Test MSE: {test_loss:.6f}")

Model parameters: 1,569,895
Models saved successfully.
Epoch 71 | Train MSE: 0.051743 | Val MSE: 0.049214 | Test MSE: 0.049501
Epoch 72 | Train MSE: 0.051660 | Val MSE: 0.049366 | Test MSE: 0.049669
Epoch 73 | Train MSE: 0.051607 | Val MSE: 0.050077 | Test MSE: 0.050378
Epoch 74 | Train MSE: 0.051510 | Val MSE: 0.051597 | Test MSE: 0.052141
Epoch 75 | Train MSE: 0.051365 | Val MSE: 0.049275 | Test MSE: 0.049601
Epoch 76 | Train MSE: 0.051322 | Val MSE: 0.049512 | Test MSE: 0.049924
Epoch 77 | Train MSE: 0.051076 | Val MSE: 0.049709 | Test MSE: 0.049980
Epoch 78 | Train MSE: 0.051082 | Val MSE: 0.049299 | Test MSE: 0.049590
Models saved successfully.
Epoch 79 | Train MSE: 0.051025 | Val MSE: 0.049136 | Test MSE: 0.049501
Epoch 80 | Train MSE: 0.050776 | Val MSE: 0.049907 | Test MSE: 0.050291
Epoch 81 | Train MSE: 0.050809 | Val MSE: 0.049684 | Test MSE: 0.050038
Models saved successfully.
Epoch 82 | Train MSE: 0.050539 | Val MSE: 0.048836 | Test MSE: 0.049095
Epoch 83 | Train MSE: 0.050

In [13]:
for epoch in range(101, 151):
    model.train()
    train_loss = 0

    for (x,) in train_loader:
        x = x.to(device)
        x_hat = model(x)
        loss = loss_fn(x_hat, x)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        train_loss += loss.item()

    train_loss /= len(train_loader)
    val_loss = evaluate(model, val_loader)
    test_loss = evaluate(model, test_loader)

    if val_loss + test_loss < best_loss:
        best_loss = val_loss + test_loss
        # Saves the weights
        torch.save(model.state_dict(), encoder_path)
        print("Models saved successfully.")

    print(f"Epoch {epoch:02d} | Train MSE: {train_loss:.6f} | Val MSE: {val_loss:.6f} | Test MSE: {test_loss:.6f}")

Epoch 101 | Train MSE: 0.049223 | Val MSE: 0.049476 | Test MSE: 0.049799
Epoch 102 | Train MSE: 0.049452 | Val MSE: 0.048634 | Test MSE: 0.049064
Models saved successfully.
Epoch 103 | Train MSE: 0.049170 | Val MSE: 0.048099 | Test MSE: 0.048386
Epoch 104 | Train MSE: 0.049068 | Val MSE: 0.048749 | Test MSE: 0.049118
Models saved successfully.
Epoch 105 | Train MSE: 0.049031 | Val MSE: 0.047883 | Test MSE: 0.048154
Epoch 106 | Train MSE: 0.049005 | Val MSE: 0.048917 | Test MSE: 0.049332
Epoch 107 | Train MSE: 0.049287 | Val MSE: 0.048663 | Test MSE: 0.048924
Epoch 108 | Train MSE: 0.048993 | Val MSE: 0.048572 | Test MSE: 0.048877
Epoch 109 | Train MSE: 0.048697 | Val MSE: 0.048175 | Test MSE: 0.048463
Epoch 110 | Train MSE: 0.048796 | Val MSE: 0.049168 | Test MSE: 0.049459
Epoch 111 | Train MSE: 0.048809 | Val MSE: 0.048136 | Test MSE: 0.048329
Epoch 112 | Train MSE: 0.048786 | Val MSE: 0.048288 | Test MSE: 0.048554
Models saved successfully.
Epoch 113 | Train MSE: 0.048670 | Val MSE: 

In [14]:
for epoch in range(151, 201):
    model.train()
    train_loss = 0

    for (x,) in train_loader:
        x = x.to(device)
        x_hat = model(x)
        loss = loss_fn(x_hat, x)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        train_loss += loss.item()

    train_loss /= len(train_loader)
    val_loss = evaluate(model, val_loader)
    test_loss = evaluate(model, test_loader)

    if val_loss + test_loss < best_loss:
        best_loss = val_loss + test_loss
        # Saves the weights
        torch.save(model.state_dict(), encoder_path)
        print("Models saved successfully.")

    print(f"Epoch {epoch:02d} | Train MSE: {train_loss:.6f} | Val MSE: {val_loss:.6f} | Test MSE: {test_loss:.6f}")

Epoch 151 | Train MSE: 0.047142 | Val MSE: 0.047605 | Test MSE: 0.047886
Epoch 152 | Train MSE: 0.047072 | Val MSE: 0.049000 | Test MSE: 0.049240
Epoch 153 | Train MSE: 0.047333 | Val MSE: 0.047327 | Test MSE: 0.047681
Epoch 154 | Train MSE: 0.047107 | Val MSE: 0.048899 | Test MSE: 0.049177
Epoch 155 | Train MSE: 0.047065 | Val MSE: 0.047188 | Test MSE: 0.047434
Epoch 156 | Train MSE: 0.046981 | Val MSE: 0.047257 | Test MSE: 0.047553
Epoch 157 | Train MSE: 0.046996 | Val MSE: 0.047498 | Test MSE: 0.047841
Epoch 158 | Train MSE: 0.046940 | Val MSE: 0.046780 | Test MSE: 0.047223
Epoch 159 | Train MSE: 0.046924 | Val MSE: 0.047530 | Test MSE: 0.047834
Epoch 160 | Train MSE: 0.047026 | Val MSE: 0.048315 | Test MSE: 0.048634
Epoch 161 | Train MSE: 0.046927 | Val MSE: 0.047648 | Test MSE: 0.047866
Epoch 162 | Train MSE: 0.046843 | Val MSE: 0.047264 | Test MSE: 0.047583
Epoch 163 | Train MSE: 0.046834 | Val MSE: 0.048008 | Test MSE: 0.048276
Epoch 164 | Train MSE: 0.046772 | Val MSE: 0.047933

# Save the models

In [15]:
model_loaded = TCNAutoencoder(input_dim=D, emb_dim=emb_dim, seq_len=T, channels=channels)

model_loaded.load_state_dict(torch.load(encoder_path))

model_loaded.to(device)

model_loaded.eval()

print("Models reloaded successfully.")

Models reloaded successfully.
