In [1]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import sys
sys.path.append('/home/mei/nas/docker/thesis/model_train')
from dataloader.ts_reader import MultiModalDataset, collate_fn_pre_train
from model.autoencoder_ts import TimeSeriesAutoencoder

In [2]:
train_data_dir = "/home/mei/nas/docker/thesis/data/hdf/train"
val_data_dir = "/home/mei/nas/docker/thesis/data/hdf/val"
test_data_dir = "/home/mei/nas/docker/thesis/data/hdf/test"

lstm_dataset_train = MultiModalDataset(train_data_dir)
lstm_dataset_val = MultiModalDataset(val_data_dir)
lstm_dataset_test = MultiModalDataset(test_data_dir)

lstm_loader_train = DataLoader(lstm_dataset_train, batch_size=32, shuffle=True, collate_fn=collate_fn_pre_train)
lstm_loader_val = DataLoader(lstm_dataset_val, batch_size=32, shuffle=False,collate_fn=collate_fn_pre_train)
lstm_loader_test = DataLoader(lstm_dataset_test, batch_size=32, shuffle=False,collate_fn=collate_fn_pre_train)

In [3]:
import gc
gc.collect()

import torch
torch.cuda.empty_cache()

In [4]:
best_config = {
    "input_dim": 324,
    "hidden_dim": 32,
    "lr": 0.0001,
    "epochs": 10,
}
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = TimeSeriesAutoencoder(
    input_dim=best_config["input_dim"],
    hidden_dim=best_config["hidden_dim"],
).to(device)

criterion = nn.L1Loss()
optimizer = optim.Adam(model.parameters(), lr=best_config["lr"])

In [5]:
def train_model(model, train_loader, val_loader, criterion, optimizer, config, device):
    best_val_loss = float("inf")
    model_dir = "/home/mei/nas/docker/thesis/data/model/pre_train_autoencoder"
    os.makedirs(model_dir, exist_ok=True)
    best_model_path = os.path.join(model_dir, "best_model_32_1e-4.pth")

    for epoch in range(config["epochs"]):
        model.train()
        train_loss = 0.0
        for batch in train_loader:
            inputs, lengths = batch
            inputs = inputs.to(device)
            lengths = lengths.to(device)

            optimizer.zero_grad()
            outputs, _ = model(inputs, lengths)
            loss = criterion(outputs, inputs)
            loss.backward()
            optimizer.step()

            train_loss += loss.item()

        train_loss /= len(train_loader)

        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for batch in val_loader:
                inputs, lengths = batch
                inputs = inputs.to(device)
                lengths = lengths.to(device)

                outputs, _ = model(inputs, lengths)
                loss = criterion(outputs, inputs)
                val_loss += loss.item()

        val_loss /= len(val_loader)

        print(f"Epoch {epoch+1}/{config['epochs']}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}")

        if val_loss < best_val_loss:
            best_val_loss = val_loss
            torch.save(model.state_dict(), best_model_path)
            print(f"Best model saved at {best_model_path} with Val Loss: {val_loss:.4f}")

    print(f"Training complete. Best Validation Loss: {best_val_loss:.4f}")

In [6]:
train_model(model, lstm_loader_train, lstm_loader_val, criterion, optimizer, best_config, device)

Epoch 1/10, Train Loss: 0.1295, Val Loss: 0.0775
Best model saved at /home/mei/nas/docker/thesis/data/model/pre_train_autoencoder/best_model_32_1e-4.pth with Val Loss: 0.0775
Epoch 2/10, Train Loss: 0.0553, Val Loss: 0.0444
Best model saved at /home/mei/nas/docker/thesis/data/model/pre_train_autoencoder/best_model_32_1e-4.pth with Val Loss: 0.0444
Epoch 3/10, Train Loss: 0.0397, Val Loss: 0.0355
Best model saved at /home/mei/nas/docker/thesis/data/model/pre_train_autoencoder/best_model_32_1e-4.pth with Val Loss: 0.0355
Epoch 4/10, Train Loss: 0.0316, Val Loss: 0.0281
Best model saved at /home/mei/nas/docker/thesis/data/model/pre_train_autoencoder/best_model_32_1e-4.pth with Val Loss: 0.0281
Epoch 5/10, Train Loss: 0.0249, Val Loss: 0.0222
Best model saved at /home/mei/nas/docker/thesis/data/model/pre_train_autoencoder/best_model_32_1e-4.pth with Val Loss: 0.0222
Epoch 6/10, Train Loss: 0.0196, Val Loss: 0.0177
Best model saved at /home/mei/nas/docker/thesis/data/model/pre_train_autoenc

In [None]:
def test_model(model, test_loader, criterion, device):
    model.eval()
    test_loss = 0.0
    mae_list = []

    with torch.no_grad():
        for batch in test_loader:
            inputs, lengths = batch
            inputs = inputs.to(device)
            lengths = lengths.to(device)

            outputs, _ = model(inputs, lengths)
            loss = criterion(outputs, inputs)
            test_loss += loss.item()

            mae = torch.abs(outputs - inputs).mean(dim=(1, 2))
            mae_list.extend(mae.cpu().numpy())

    test_loss /= len(test_loader)
    print(f"Test Loss: {test_loss:.4f}")

In [None]:
def visualize_reconstruction(model, test_loader, device, num_samples=5):
    model.eval()
    with torch.no_grad():
        for i, batch in enumerate(test_loader):
            if i >= num_samples:
                break

            inputs, lengths = batch
            inputs = inputs.to(device)
            lengths = lengths.to(device)

            outputs, _ = model(inputs, lengths)

            plt.figure(figsize=(10, 6))
            plt.plot(inputs[0].cpu().numpy(), label="Original")
            plt.plot(outputs[0].cpu().numpy(), label="Reconstructed")
            plt.legend()
            plt.title(f"Sample {i+1}: Original vs Reconstructed")
            plt.show()

In [None]:
best_model_path = "/home/mei/nas/docker/thesis/data/model/pre_train_autoencoder/best_model_32_1e-4.pth"
model.load_state_dict(torch.load(best_model_path, map_location=device))
model.eval()

test_model(model, lstm_loader_test, criterion, device)