In [1]:
#!pip install scikit-learn pandas numpy torch tqdm matplotlib pyarrow

In [15]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from scaled_dataloader import load_train, load_val
from sklearn.preprocessing import StandardScaler
import numpy as np
import matplotlib.pyplot as plt
from plot_trajectory import plot_paths
#from metrics import rmse, mse, mae
import joblib
import math
import os
import json
import copy


In [16]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

Using device: cuda


In [17]:

class PositionalEncoding(nn.Module):
    def __init__(self, d_model: int, max_len: int = 5000):
        super().__init__()
        pe = torch.zeros(max_len, d_model)
        pos = torch.arange(0, max_len).unsqueeze(1).float()
        div = torch.exp(torch.arange(0, d_model, 2).float()
                        * (-torch.log(torch.tensor(10000.0)) / d_model))

        pe[:, 0::2] = torch.sin(pos * div)
        pe[:, 1::2] = torch.cos(pos * div)

        pe = pe.unsqueeze(0)     # → [1, max_len, d_model]
        self.register_buffer("pe", pe)

    def forward(self, x):
        """
        x: [batch, seq_len, d_model]
        """
        seq_len = x.size(1)
        return x + self.pe[:, :seq_len, :]


class TrajectoryTransformer30to10(nn.Module):
    def __init__(
        self,
        input_dim: int,
        output_dim: int,
        d_model: int = 128,
        nhead: int = 4,
        num_layers: int = 3,
        dim_feedforward: int = 256,
        dropout: float = 0.1,
    ):
        super().__init__()

        self.output_dim = output_dim
        self.future_steps = 10

        self.input_proj = nn.Linear(input_dim, d_model)
        self.pos_enc = PositionalEncoding(d_model)

        enc_layer = nn.TransformerEncoderLayer(
            d_model=d_model,
            nhead=nhead,
            dim_feedforward=dim_feedforward,
            dropout=dropout,
            batch_first=True,              # <-- CHANGED
        )

        self.encoder = nn.TransformerEncoder(enc_layer, num_layers=num_layers)

        self.out = nn.Linear(d_model, output_dim * self.future_steps)

    def forward(self, src):
        """
        src: [batch, 30, input_dim]
        return: [batch, 10, output_dim]
        """
        x = self.input_proj(src)       # [B, 30, d_model]
        x = self.pos_enc(x)            # [B, 30, d_model]
        x = self.encoder(x)            # [B, 30, d_model]

        last_state = x[:, -1, :]       # [B, d_model]

        out = self.out(last_state)     # [B, 10 * output_dim]

        return out.reshape(-1, 10, self.output_dim)



In [9]:

train_ds, scaler = load_train()
val_ds = load_val(scaler)


Loading TRAIN...
Initial data manipulation done, computing sliding windows and building x and y tensors . . .


100%|██████████| 59518/59518 [00:53<00:00, 1114.97it/s]


Built tensor lists
Built full tensors
torch.Size([3773409, 30, 4]) torch.Size([3773409, 10, 4])
Loading VAL...
Initial data manipulation done, computing sliding windows and building x and y tensors . . .


100%|██████████| 12847/12847 [00:09<00:00, 1349.59it/s]


Built tensor lists
Built full tensors
torch.Size([788039, 30, 4]) torch.Size([788039, 10, 4])


In [18]:

val_loader = torch.utils.data.DataLoader(
    val_ds, batch_size=64, shuffle=False
)
train_loader = torch.utils.data.DataLoader(
    train_ds, batch_size=64, shuffle=True
)

#Ignore SOG and COG

In [7]:
'''
num_epochs = 15
model = TrajectoryTransformer30to10(
    input_dim=4,      # or 5 if MMSI is included
    output_dim=4,     # predicting same set of features
    d_model=128,
    nhead=4,
    num_layers=3,
    dim_feedforward=256
).to(device)
'''

'\nnum_epochs = 15\nmodel = TrajectoryTransformer30to10(\n    input_dim=4,      # or 5 if MMSI is included\n    output_dim=4,     # predicting same set of features\n    d_model=128,\n    nhead=4,\n    num_layers=3,\n    dim_feedforward=256\n).to(device)\n'

In [8]:
'''
for epoch in range(num_epochs):
    model.train()

    train_mse  = 0.0
    train_rmse = 0.0
    train_mae  = 0.0

    for X, Y in train_loader:
        X, Y = X.to(device), Y.to(device)

        optimizer.zero_grad()
        pred = model(X)

        # Backprop loss = MSE
        loss = mse(pred, Y)
        loss.backward()
        optimizer.step()

        # --- METRICS ---
        mse_val  = loss.item()               # reuse computed loss!
        rmse_val = mse_val ** 0.5            # faster than sqrt() in torch
        mae_val  = mae(pred, Y).item()

        train_mse  += mse_val
        train_rmse += rmse_val
        train_mae  += mae_val

    # Averages
    train_mse  /= len(train_loader)
    train_rmse /= len(train_loader)
    train_mae  /= len(train_loader)

    print(
        f"Epoch {epoch}: "
        f"MSE={train_mse:.4f}, "
        f"RMSE={train_rmse:.4f}, "
        f"MAE={train_mae:.4f}"
    )

'''

'\nfor epoch in range(num_epochs):\n    model.train()\n\n    train_mse  = 0.0\n    train_rmse = 0.0\n    train_mae  = 0.0\n\n    for X, Y in train_loader:\n        X, Y = X.to(device), Y.to(device)\n\n        optimizer.zero_grad()\n        pred = model(X)\n\n        # Backprop loss = MSE\n        loss = mse(pred, Y)\n        loss.backward()\n        optimizer.step()\n\n        # --- METRICS ---\n        mse_val  = loss.item()               # reuse computed loss!\n        rmse_val = mse_val ** 0.5            # faster than sqrt() in torch\n        mae_val  = mae(pred, Y).item()\n\n        train_mse  += mse_val\n        train_rmse += rmse_val\n        train_mae  += mae_val\n\n    # Averages\n    train_mse  /= len(train_loader)\n    train_rmse /= len(train_loader)\n    train_mae  /= len(train_loader)\n\n    print(\n        f"Epoch {epoch}: "\n        f"MSE={train_mse:.4f}, "\n        f"RMSE={train_rmse:.4f}, "\n        f"MAE={train_mae:.4f}"\n    )\n\n'

In [33]:
#Test different Models
'''{   
        "name": "Mini_Model",
        "model_kwargs": {
            "input_dim": 2,
            "output_dim": 2,
            "d_model": 128,
            "nhead": 4,
            "num_layers": 3,
            "dim_feedforward": 10,
            "dropout": 0.1,
        },
        "train_kwargs": {
            "num_epochs": 10,
            "learning_rate": 1e-3,
            "weight_decay": 1e-4,
        },
    },
    {
        "name": "A_small",
        "model_kwargs": {
            "input_dim": 2,
            "output_dim": 2,
            "d_model": 128,
            "nhead": 4,
            "num_layers": 3,
            "dim_feedforward": 512,
            "dropout": 0.1,
        },
        "train_kwargs": {
            "num_epochs": 40,
            "learning_rate": 1e-3,
            "weight_decay": 1e-4,
        },
    },
       {
        "name": "Mini_Test",
        "model_kwargs": {
            "input_dim": 2,
            "output_dim": 2,
            "d_model": 128,
            "nhead": 8,
            "num_layers": 2,
            "dim_feedforward": 100,
            "dropout": 0.1,
        },
        "train_kwargs": {
            "num_epochs": 5,
            "learning_rate": 5e-4,
            "weight_decay": 1e-4,
            "patience": 5,
        },
    },
    {
        "name": "C_deeper_2",
        "model_kwargs": {
            "input_dim": 2,
            "output_dim": 2,
            "d_model": 256,
            "nhead": 8,
            "num_layers": 4,
            "dim_feedforward": 1024,
            "dropout": 0.1,
        },
        "train_kwargs": {
            "num_epochs": 90,
            "learning_rate": 5e-4,
            "weight_decay": 1e-4,
            "patience": 5,
        },
    },
    {
        "name": "C_deeper",
        "model_kwargs": {
            "input_dim": 2,
            "output_dim": 2,
            "d_model": 256,
            "nhead": 8,
            "num_layers": 4,
            "dim_feedforward": 1024,
            "dropout": 0.1,
        },
        "train_kwargs": {
            "num_epochs": 50,
            "learning_rate": 5e-4,
            "weight_decay": 1e-4,
            "patience": 5,
        },
    },
'''
configs = [
     {
        "name": "B_medium",
        "model_kwargs": {
            "input_dim": 2,
            "output_dim": 2,
            "d_model": 256,
            "nhead": 8,
            "num_layers": 3,
            "dim_feedforward": 1024,
            "dropout": 0.1,
        },
        "train_kwargs": {
            "num_epochs": 40,
            "learning_rate": 5e-4,
            "weight_decay": 1e-4,
        },
     },
  
]




#Run training model test

def train_one_config(config, train_loader, val_loader, device):
    name = config["name"]
    model_kwargs = config["model_kwargs"]
    train_kwargs = config["train_kwargs"]

    num_epochs   = train_kwargs["num_epochs"]
    learning_rate = train_kwargs["learning_rate"]
    weight_decay  = train_kwargs["weight_decay"]

    print(f"\n=== Training config: {name} ===")
    print("Model args:", model_kwargs)
    print("Train args:", train_kwargs)

    model = TrajectoryTransformer30to10(**model_kwargs).to(device)
    criterion = nn.MSELoss()
    optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate, weight_decay=weight_decay)

    history = {
        "train_mse": [],
        "train_rmse": [],
        "train_mae": [],
        "val_mse": [],
        "val_rmse": [],
        "val_mae": [],
    }

    for epoch in range(1, num_epochs + 1):
        # ---- TRAIN ----
        model.train()
        train_mse = 0.0
        train_mae = 0.0
        n_train   = 0

        for X, Y in train_loader:
            X, Y = X.to(device), Y.to(device)

            optimizer.zero_grad()
            preds = model(X)

            loss = criterion(preds, Y)
            loss.backward()
            optimizer.step()

            mse_batch = loss.item()
            mae_batch = torch.mean(torch.abs(preds - Y)).item()
            bs = X.size(0)

            train_mse += mse_batch * bs
            train_mae += mae_batch * bs
            n_train   += bs

        train_mse /= n_train
        train_rmse = math.sqrt(train_mse)
        train_mae /= n_train

        # ---- VALIDATION ----
        model.eval()
        val_mse = 0.0
        val_mae = 0.0
        n_val   = 0

        with torch.no_grad():
            for X_val, Y_val in val_loader:
                X_val, Y_val = X_val.to(device), Y_val.to(device)

                preds_val = model(X_val)
                loss_val  = criterion(preds_val, Y_val)

                mse_batch = loss_val.item()
                mae_batch = torch.mean(torch.abs(preds_val - Y_val)).item()
                bs = X_val.size(0)

                val_mse += mse_batch * bs
                val_mae += mae_batch * bs
                n_val   += bs

        val_mse /= n_val
        val_rmse = math.sqrt(val_mse)
        val_mae /= n_val

        history["train_mse"].append(train_mse)
        history["train_rmse"].append(train_rmse)
        history["train_mae"].append(train_mae)
        history["val_mse"].append(val_mse)
        history["val_rmse"].append(val_rmse)
        history["val_mae"].append(val_mae)

        print(
            f"Epoch {epoch:03d} | "
            f"Train MSE={train_mse:.4f}, RMSE={train_rmse:.4f}, MAE={train_mae:.4f} || "
            f"Val MSE={val_mse:.4f}, RMSE={val_rmse:.4f}, MAE={val_mae:.4f}"
        )

    # Rückgabe: letztes Val-RMSE + History + finales Modell
    final_val_rmse = history["val_rmse"][-1]
    final_val_mse = history["val_mse"][-1]
    return {
        "name": name,
        "config": config,
        "history": history,
        "final_val_rmse": final_val_rmse,
        "final_val_mse": final_val_mse,
        "model": model,   
    }


In [31]:


def train_one_config(config, train_loader, val_loader, device):
    name = config["name"]
    model_kwargs = config["model_kwargs"]
    train_kwargs = config["train_kwargs"]

    num_epochs    = train_kwargs["num_epochs"]
    learning_rate = train_kwargs["learning_rate"]
    weight_decay  = train_kwargs["weight_decay"]
    patience      = train_kwargs.get("patience", None)  # z.B. 5 oder None

    print(f"\n=== Training config: {name} ===")
    print("Model args:", model_kwargs)
    print("Train args:", train_kwargs)

    model = TrajectoryTransformer30to10(**model_kwargs).to(device)
    criterion = nn.MSELoss()
    optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate, weight_decay=weight_decay)

    history = {
        "epoch": [],
        "train_mse": [],
        "train_rmse": [],
        "train_mae": [],
        "val_mse": [],
        "val_rmse": [],
        "val_mae": [],
    }

    best_val_rmse = float("inf")
    best_state = None
    best_epoch = None
    epochs_no_improve = 0

    for epoch in range(1, num_epochs + 1):
        # ---- TRAIN ----
        model.train()
        train_mse = 0.0
        train_mae = 0.0
        n_train   = 0

        for batch in train_loader:
            X, Y = batch if len(batch) == 2 else batch[:2]
            X, Y = X.to(device), Y.to(device)

            optimizer.zero_grad()
            preds = model(X)
            loss = criterion(preds, Y)
            loss.backward()
            optimizer.step()

            mse_batch = loss.item()
            mae_batch = torch.mean(torch.abs(preds - Y)).item()
            bs = X.size(0)

            train_mse += mse_batch * bs
            train_mae += mae_batch * bs
            n_train   += bs

        train_mse /= n_train
        train_rmse = math.sqrt(train_mse)
        train_mae /= n_train

        # ---- VALIDATION ----
        model.eval()
        val_mse = 0.0
        val_mae = 0.0
        n_val   = 0

        with torch.no_grad():
            for batch in val_loader:
                X_val, Y_val = batch if len(batch) == 2 else batch[:2]
                X_val, Y_val = X_val.to(device), Y_val.to(device)

                preds_val = model(X_val)
                loss_val  = criterion(preds_val, Y_val)

                mse_batch = loss_val.item()
                mae_batch = torch.mean(torch.abs(preds_val - Y_val)).item()
                bs = X_val.size(0)

                val_mse += mse_batch * bs
                val_mae += mae_batch * bs
                n_val   += bs

        val_mse /= n_val
        val_rmse = math.sqrt(val_mse)
        val_mae /= n_val

        history["epoch"].append(epoch)
        history["train_mse"].append(train_mse)
        history["train_rmse"].append(train_rmse)
        history["train_mae"].append(train_mae)
        history["val_mse"].append(val_mse)
        history["val_rmse"].append(val_rmse)
        history["val_mae"].append(val_mae)

        print(
            f"Epoch {epoch:03d} | "
            f"Train RMSE={train_rmse:.4f}, Val RMSE={val_rmse:.4f}"
        )

        # ---- Early Stopping / bestes Modell merken ----
        if val_rmse < best_val_rmse:
            best_val_rmse = val_rmse
            best_epoch = epoch
            best_state = copy.deepcopy(model.state_dict())
            epochs_no_improve = 0
        else:
            epochs_no_improve += 1

        if patience is not None and epochs_no_improve >= patience:
            print(f"Early stopping at epoch {epoch} (no improvement for {patience} epochs).")
            break

    # bestes Modell wiederherstellen
    if best_state is not None:
        model.load_state_dict(best_state)
        print(f"Loaded best model from epoch {best_epoch} with Val RMSE={best_val_rmse:.4f}")

    final_val_rmse = best_val_rmse
    final_val_mse  = None  # könntest du über best_epoch aus history holen, wenn du willst

    return {
        "name": name,
        "config": config,
        "history": history,
        "final_val_rmse": final_val_rmse,
        "final_val_mse": final_val_mse,
        "model": model,
    }


def train_one_config(config, train_loader, val_loader, device):
    name = config["name"]
    model_kwargs = config["model_kwargs"]
    train_kwargs = config["train_kwargs"]

    num_epochs    = train_kwargs["num_epochs"]
    learning_rate = train_kwargs["learning_rate"]
    weight_decay  = train_kwargs["weight_decay"]

    print(f"\n=== Training config: {name} ===")
    print("Model args:", model_kwargs)
    print("Train args:", train_kwargs)

    model = TrajectoryTransformer30to10(**model_kwargs).to(device)
    criterion = nn.MSELoss()
    optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate, weight_decay=weight_decay)

    history = {
        "epoch": [],
        "train_mse": [],
        "train_rmse": [],
        "train_mae": [],
        "val_mse": [],
        "val_rmse": [],
        "val_mae": [],
    }

    for epoch in range(1, num_epochs + 1):
        # ---- TRAIN ----
        model.train()
        train_mse = 0.0
        train_mae = 0.0
        n_train   = 0

        for X, Y in train_loader:
            X, Y = X.to(device), Y.to(device)

            optimizer.zero_grad()
            preds = model(X)

            loss = criterion(preds, Y)
            loss.backward()
            optimizer.step()

            mse_batch = loss.item()
            mae_batch = torch.mean(torch.abs(preds - Y)).item()
            bs = X.size(0)

            train_mse += mse_batch * bs
            train_mae += mae_batch * bs
            n_train   += bs

        train_mse /= n_train
        train_rmse = math.sqrt(train_mse)
        train_mae /= n_train

        # ---- VALIDATION ----
        model.eval()
        val_mse = 0.0
        val_mae = 0.0
        n_val   = 0

        with torch.no_grad():
            for X_val, Y_val in val_loader:
                X_val, Y_val = X_val.to(device), Y_val.to(device)

                preds_val = model(X_val)
                loss_val  = criterion(preds_val, Y_val)

                mse_batch = loss_val.item()
                mae_batch = torch.mean(torch.abs(preds_val - Y_val)).item()
                bs = X_val.size(0)

                val_mse += mse_batch * bs
                val_mae += mae_batch * bs
                n_val   += bs

        val_mse /= n_val
        val_rmse = math.sqrt(val_mse)
        val_mae /= n_val

        history["epoch"].append(epoch)
        history["train_mse"].append(train_mse)
        history["train_rmse"].append(train_rmse)
        history["train_mae"].append(train_mae)
        history["val_mse"].append(val_mse)
        history["val_rmse"].append(val_rmse)
        history["val_mae"].append(val_mae)

        print(
            f"Epoch {epoch:03d} | "
            f"Train MSE={train_mse:.4f}, RMSE={train_rmse:.4f}, MAE={train_mae:.4f} || "
            f"Val MSE={val_mse:.4f}, RMSE={val_rmse:.4f}, MAE={val_mae:.4f}"
        )

    final_val_rmse = history["val_rmse"][-1]
    final_val_mse  = history["val_mse"][-1]

    return {
        "name": name,
        "config": config,
        "history": history,
        "final_val_rmse": final_val_rmse,
        "final_val_mse": final_val_mse,
        "model": model,   
    }


In [35]:
#Plotting code

os.makedirs("plots", exist_ok=True)
os.makedirs("checkpoints", exist_ok=True)

def plot_val_curve(history, name):
    epochs   = history["epoch"]
    val_rmse = history["val_rmse"]

    plt.figure()
    plt.plot(epochs, val_rmse, marker="o")
    plt.xlabel("Epoch")
    plt.ylabel("Val RMSE")
    plt.title(f"Val RMSE – {name}")
    plt.tight_layout()

    save_path = os.path.join("plots", f"{name}_val_rmse.png")
    plt.savefig(save_path, dpi=150)
    plt.close()
    


def plot_paths_saved(x_np, y_np, y_pred_np, idx, name):
    """NUR einen einzelnen Trajektorienplot bauen & speichern."""
    plt.figure(figsize=(5, 5))

    # Entweder bestehenden plot_paths benutzen:
    # plot_paths(x_np, y_np, y_pred_np, idx)

    # ...ODER direkt hier plotten (Beispiel: [lat = col 0, lon = col 1]):
    plt.plot(x_np[:, 1],      x_np[:, 0],      'o-', label='history')
    plt.plot(y_np[:, 1],      y_np[:, 0],      'x--', label='true future')
    plt.plot(y_pred_np[:, 1], y_pred_np[:, 0], 's--', label='pred future')

    plt.title(f"{name} – val sample {idx}")
    plt.legend()
    plt.tight_layout()

    save_path = os.path.join("plots", f"{name}_traj_seg{idx}.png")
    plt.savefig(save_path, dpi=150)
    plt.close()


In [36]:
def plot_segments_by_dataset_index(model, val_loader, device, scaler, name, indices_to_plot):
    """
    Plots specific segments by their dataset index.
    Assumes __getitem__ returns (X, Y).
    """
    model.eval()
    target_indices = set(indices_to_plot)
    global_idx = 0

    with torch.no_grad():
        for X_batch, Y_batch in val_loader:
            B = X_batch.size(0)

            for i in range(B):
                if global_idx in target_indices:
                    Xs = X_batch[i:i+1].to(device)
                    Ys = Y_batch[i:i+1].to(device)
                    Y_pred = model(Xs)

                    x_np      = Xs[0].cpu().numpy()
                    y_np      = Ys[0].cpu().numpy()
                    y_pred_np = Y_pred[0].cpu().numpy()

                    # ---- Plot speichern ----
                    plot_paths_saved(x_np, y_np, y_pred_np, idx=global_idx, name=name)

                    print(f"Plotted segment {global_idx}")

                    target_indices.remove(global_idx)
                    if not target_indices:
                        return  # fertig

                global_idx += 1

        if target_indices:
            print("Some requested indices not found:", target_indices)




def make_small_val_subset(val_loader, num_batches=2):
    small_batches = []
    for i, (X, Y) in enumerate(val_loader):
        if i >= num_batches:
            break
        # Clone, damit späteres .to(device) nichts kaputtmacht
        small_batches.append((X.clone(), Y.clone()))
    return small_batches

small_val_batches = make_small_val_subset(val_loader, num_batches=2)


In [34]:
#Run Model B again full
results = []
best_result = None

for cfg in configs:
    result  = train_one_config(cfg, train_loader, val_loader, device)
    name    = result["name"]
    model   = result["model"]
    history = result["history"]

    # 1) Checkpoint speichern
    ckpt_path = os.path.join("checkpoints", f"{name}.pt")
    torch.save(model.state_dict(), ckpt_path)
    result["checkpoint_path"] = ckpt_path

    # 2) Val-Kurve speichern
    plot_val_curve(history, name)

    # 3) Bestimmte Segmente plotten (z.B. "bewegte" Schiffe)
    plot_segments_by_dataset_index(
        model=model,
        val_loader=val_loader,
        device=device,
        scaler=scaler,
        name=name,
        indices_to_plot=[21, 22, 23, 29]
    )

    # 4) Modell aus result entfernen für JSON-Speicherung
    del result["model"]
    results.append(result)

    # 5) Bestes Result updaten
    if best_result is None or result["final_val_rmse"] < best_result["final_val_rmse"]:
        best_result = result

print("\n======================")
print("Best config:", best_result["name"])
print("Best final Val RMSE:", best_result["final_val_rmse"])
print("Best model args:", best_result["config"]["model_kwargs"])
print("Best train args:", best_result["config"]["train_kwargs"])
print("Best checkpoint:", best_result["checkpoint_path"])


=== Training config: B_medium ===
Model args: {'input_dim': 2, 'output_dim': 2, 'd_model': 256, 'nhead': 8, 'num_layers': 3, 'dim_feedforward': 1024, 'dropout': 0.1}
Train args: {'num_epochs': 40, 'learning_rate': 0.0005, 'weight_decay': 0.0001}
Epoch 001 | Train RMSE=0.0090, Val RMSE=0.0010
Epoch 002 | Train RMSE=0.0010, Val RMSE=0.0006
Epoch 003 | Train RMSE=0.0008, Val RMSE=0.0005
Epoch 004 | Train RMSE=0.0007, Val RMSE=0.0008
Epoch 005 | Train RMSE=0.0007, Val RMSE=0.0006
Epoch 006 | Train RMSE=0.0007, Val RMSE=0.0006
Epoch 007 | Train RMSE=0.0007, Val RMSE=0.0007
Epoch 008 | Train RMSE=0.0006, Val RMSE=0.0005
Epoch 009 | Train RMSE=0.0006, Val RMSE=0.0004
Epoch 010 | Train RMSE=0.0006, Val RMSE=0.0007
Epoch 011 | Train RMSE=0.0006, Val RMSE=0.0007
Epoch 012 | Train RMSE=0.0006, Val RMSE=0.0005
Epoch 013 | Train RMSE=0.0006, Val RMSE=0.0006
Epoch 014 | Train RMSE=0.0006, Val RMSE=0.0006
Epoch 015 | Train RMSE=0.0006, Val RMSE=0.0008
Epoch 016 | Train RMSE=0.0006, Val RMSE=0.0009
E

In [32]:
results = []
best_result = None

for cfg in configs:
    result  = train_one_config(cfg, train_loader, val_loader, device)
    name    = result["name"]
    model   = result["model"]
    history = result["history"]

    # 1) Checkpoint speichern
    ckpt_path = os.path.join("checkpoints", f"{name}.pt")
    torch.save(model.state_dict(), ckpt_path)
    result["checkpoint_path"] = ckpt_path

    # 2) Val-Kurve speichern
    plot_val_curve(history, name)

    # 3) Bestimmte Segmente plotten (z.B. "bewegte" Schiffe)
    plot_segments_by_dataset_index(
        model=model,
        val_loader=val_loader,
        device=device,
        scaler=scaler,
        name=name,
        indices_to_plot=[21, 22, 23, 29]
    )

    # 4) Modell aus result entfernen für JSON-Speicherung
    del result["model"]
    results.append(result)

    # 5) Bestes Result updaten
    if best_result is None or result["final_val_rmse"] < best_result["final_val_rmse"]:
        best_result = result

print("\n======================")
print("Best config:", best_result["name"])
print("Best final Val RMSE:", best_result["final_val_rmse"])
print("Best model args:", best_result["config"]["model_kwargs"])
print("Best train args:", best_result["config"]["train_kwargs"])
print("Best checkpoint:", best_result["checkpoint_path"])



=== Training config: Mini_Test ===
Model args: {'input_dim': 2, 'output_dim': 2, 'd_model': 128, 'nhead': 8, 'num_layers': 2, 'dim_feedforward': 100, 'dropout': 0.1}
Train args: {'num_epochs': 5, 'learning_rate': 0.0005, 'weight_decay': 0.0001, 'patience': 5}
Epoch 001 | Train RMSE=0.0073, Val RMSE=0.0028
Epoch 002 | Train RMSE=0.0025, Val RMSE=0.0011
Epoch 003 | Train RMSE=0.0019, Val RMSE=0.0015
Epoch 004 | Train RMSE=0.0016, Val RMSE=0.0007
Epoch 005 | Train RMSE=0.0013, Val RMSE=0.0008
Loaded best model from epoch 4 with Val RMSE=0.0007
Plotted segment 21
Plotted segment 22
Plotted segment 23
Plotted segment 29

=== Training config: C_deeper_2 ===
Model args: {'input_dim': 2, 'output_dim': 2, 'd_model': 256, 'nhead': 8, 'num_layers': 4, 'dim_feedforward': 1024, 'dropout': 0.1}
Train args: {'num_epochs': 90, 'learning_rate': 0.0005, 'weight_decay': 0.0001, 'patience': 5}
Epoch 001 | Train RMSE=0.0093, Val RMSE=0.0014
Epoch 002 | Train RMSE=0.0009, Val RMSE=0.0009
Epoch 003 | Train 

In [None]:
for cfg in configs:
    result = train_one_config(cfg, train_loader, val_loader, device)
    name   = result["name"]
    model  = result["model"]
    history = result["history"]

    # Checkpoint speichern
    ckpt_path = os.path.join("checkpoints", f"{name}.pt")
    torch.save(model.state_dict(), ckpt_path)
    result["checkpoint_path"] = ckpt_path

    # Validierungs-Kurve speichern
    plot_val_curve(history, name)

    # -----------------------------
    # HIER: 2–3 ROUTEN PLOTTEN
    # -----------------------------
    model.eval()
    with torch.no_grad():
        for seg_idx, (X_val_small, Y_val_small) in enumerate(small_val_batches):
            if seg_idx >= 3:
                break

            Xs = X_val_small.to(device)
            Ys = Y_val_small.to(device)

            Y_pred = model(Xs)

            x_np      = Xs[0].cpu().numpy()
            y_np      = Ys[0].cpu().numpy()
            y_pred_np = Y_pred[0].cpu().numpy()

            # ---------------------------------------
            # INVERSE SCALING
            # ---------------------------------------
            x_np[:, :2]      = scaler.inverse_transform(x_np[:, :2])
            y_np[:, :2]      = scaler.inverse_transform(y_np[:, :2])
            y_pred_np[:, :2] = scaler.inverse_transform(y_pred_np[:, :2])

            # ---------------------------------------
            #  MSE/RMSE nach inverse scaling
            # ---------------------------------------
            mse  = np.mean((y_pred_np - y_np)**2)
            rmse = np.sqrt(mse)
            print(f"[{name}] Segment {seg_idx}: MSE={mse:.4f}, RMSE={rmse:.4f}")

            # ---------------------------------------
            #  Plot speichern
            # ---------------------------------------
            plot_segments_by_dataset_index(
            model=model,
            val_loader=val_loader,
            device=device,
            scaler=scaler,
            name=name,
            indices_to_plot=[21, 22, 23, 29]      # <-- DEINE SEGMENTE
    )





=== Training config: Mini_Model ===
Model args: {'input_dim': 2, 'output_dim': 2, 'd_model': 128, 'nhead': 4, 'num_layers': 3, 'dim_feedforward': 10, 'dropout': 0.1}
Train args: {'num_epochs': 10, 'learning_rate': 0.001, 'weight_decay': 0.0001}
Epoch 001 | Train MSE=0.0001, RMSE=0.0087, MAE=0.0024 || Val MSE=0.0000, RMSE=0.0011, MAE=0.0008
Epoch 002 | Train MSE=0.0000, RMSE=0.0010, MAE=0.0007 || Val MSE=0.0000, RMSE=0.0015, MAE=0.0013
Epoch 003 | Train MSE=0.0000, RMSE=0.0009, MAE=0.0006 || Val MSE=0.0000, RMSE=0.0015, MAE=0.0011
Epoch 004 | Train MSE=0.0000, RMSE=0.0008, MAE=0.0006 || Val MSE=0.0000, RMSE=0.0017, MAE=0.0013
Epoch 005 | Train MSE=0.0000, RMSE=0.0008, MAE=0.0005 || Val MSE=0.0000, RMSE=0.0019, MAE=0.0015
Epoch 006 | Train MSE=0.0000, RMSE=0.0008, MAE=0.0005 || Val MSE=0.0000, RMSE=0.0020, MAE=0.0016
Epoch 007 | Train MSE=0.0000, RMSE=0.0007, MAE=0.0005 || Val MSE=0.0000, RMSE=0.0024, MAE=0.0018
Epoch 008 | Train MSE=0.0000, RMSE=0.0007, MAE=0.0005 || Val MSE=0.0000, RM

In [None]:

results = []
best_result = None

for cfg in configs:
    # 1) Model training 
    result = train_one_config(cfg, train_loader, val_loader, device)
    name   = result["name"]
    model  = result["model"]
    history = result["history"]

    # 2) save checkpoint 
    ckpt_path = os.path.join("checkpoints", f"{name}.pt")
    torch.save(model.state_dict(), ckpt_path)
    result["checkpoint_path"] = ckpt_path

    # 3) Val-RMSE-Kurve als PNG speichern
    plot_val_curve(history, name)

    # 4) 2–3 Trajektorienplots direkt nach Training erzeugen
    model.eval()
    with torch.no_grad():
        for seg_idx, (X_val_small, Y_val_small) in enumerate(small_val_batches):
            if seg_idx >= 3:   # max 3 Segmente
                break

            X_val_small = X_val_small.to(device)
            Y_val_small = Y_val_small.to(device)

            preds_small = model(X_val_small)

            x_np      = X_val_small[0].cpu().numpy()
            y_np      = Y_val_small[0].cpu().numpy()
            y_pred_np = preds_small[0].cpu().numpy()

            # HIER: ggf. inverse scaling einbauen
            # z.B.:
            # x_np_scaled = x_np.copy()
            # ...
            # und dann plot_paths_saved(x_np_scaled, ...)

            plot_paths_saved(x_np, y_np, y_pred_np, seg_idx, name)

    # 5) Modell-Objekt aus result entfernen, damit JSON-friendly
    del result["model"]
    results.append(result)

    # 6) Bestes Result tracken
    if best_result is None or result["final_val_rmse"] < best_result["final_val_rmse"]:
        best_result = result

# 7) Alle Ergebnisse als JSON sichern
with open("search_results.json", "w") as f:
    json.dump(results, f, indent=2)

print("\n======================")
print("Best config:", best_result["name"])
print("Best final Val RMSE:", best_result["final_val_rmse"])
print("Best model args:", best_result["config"]["model_kwargs"])
print("Best train args:", best_result["config"]["train_kwargs"])
print("Best checkpoint:", best_result["checkpoint_path"])



In [None]:
results = []
best_result = None

for cfg in configs:
    result = train_one_config(cfg, train_loader, val_loader, device)
    results.append(result)

    if best_result is None or result["final_val_rmse"] < best_result["final_val_rmse"]:
        best_result = result

print("\n======================")
print("Best config:", best_result["name"])
print("Best final Val RMSE:", best_result["final_val_rmse"])
print("Best model args:", best_result["config"]["model_kwargs"])
print("Best train args:", best_result["config"]["train_kwargs"])



=== Training config: Mini_Model ===
Model args: {'input_dim': 2, 'output_dim': 2, 'd_model': 128, 'nhead': 4, 'num_layers': 3, 'dim_feedforward': 10, 'dropout': 0.1}
Train args: {'num_epochs': 10, 'learning_rate': 0.001, 'weight_decay': 0.0001}
Epoch 001 | Train MSE=0.0001, RMSE=0.0087, MAE=0.0023 || Val MSE=0.0000, RMSE=0.0007, MAE=0.0005
Epoch 002 | Train MSE=0.0000, RMSE=0.0010, MAE=0.0007 || Val MSE=0.0000, RMSE=0.0012, MAE=0.0010
Epoch 003 | Train MSE=0.0000, RMSE=0.0009, MAE=0.0006 || Val MSE=0.0000, RMSE=0.0012, MAE=0.0009
Epoch 004 | Train MSE=0.0000, RMSE=0.0008, MAE=0.0005 || Val MSE=0.0000, RMSE=0.0010, MAE=0.0007
Epoch 005 | Train MSE=0.0000, RMSE=0.0008, MAE=0.0005 || Val MSE=0.0000, RMSE=0.0008, MAE=0.0007
Epoch 006 | Train MSE=0.0000, RMSE=0.0007, MAE=0.0005 || Val MSE=0.0000, RMSE=0.0009, MAE=0.0007
Epoch 007 | Train MSE=0.0000, RMSE=0.0007, MAE=0.0005 || Val MSE=0.0000, RMSE=0.0009, MAE=0.0007
Epoch 008 | Train MSE=0.0000, RMSE=0.0007, MAE=0.0004 || Val MSE=0.0000, RM



os.makedirs("checkpoints", exist_ok=True)

results = []
best_result = None

for cfg in configs:
    result = train_one_config(cfg, train_loader, val_loader, device)

    model = result["model"]              # the trained net for this config
    model_name = result["name"]

    # 1) save model weights
    ckpt_path = os.path.join("checkpoints", f"{model_name}.pt")
    torch.save(model.state_dict(), ckpt_path)

    # 2) store checkpoint path in result dict
    result["checkpoint_path"] = ckpt_path

    # 3) optionally remove the big model object from result so it's easier to serialize
    del result["model"]

    results.append(result)

    # 4) update best_result
    if best_result is None or result["final_val_rmse"] < best_result["final_val_rmse"]:
        best_result = result

# 5) save all results (configs + metrics + checkpoint paths) to disk
with open("search_results.json", "w") as f:
    json.dump(results, f, indent=2)

print("\n======================")
print("Best config:", best_result["name"])
print("Best final Val RMSE:", best_result["final_val_rmse"])
print("Best model args:", best_result["config"]["model_kwargs"])
print("Best train args:", best_result["config"]["train_kwargs"])
print("Best checkpoint:", best_result["checkpoint_path"])


### Plot Validation Loss

chosen = results[1]   # 0,1,2 depending on list of results
history = chosen["history"]
name = chosen["A_model"]

epochs = range(1, len(history["train_mse"]) + 1)

plt.figure(figsize=(8, 5))
plt.plot(epochs, history["train_mse"], 'r-', label='Train MSE')
plt.plot(epochs, history["val_mse"], 'b-', label='Val MSE')
plt.xlabel('Epoch')
plt.ylabel('MSE')
plt.title(f'Training & Validation MSE — Model {name}')
plt.legend()
plt.grid(True)
plt.show()


#Plot trajectory
# assume best_result is from search loop
net = results[1].to(device) #need to replace result that I want to plot
net.eval()

#Denk dran vorher wieder zurück zu skalieren!

num_samples_to_plot = 5

for idx, (x, y) in enumerate(val_loader):
    if idx >= num_samples_to_plot:
        break

    x, y = x.to(device), y.to(device)

    with torch.no_grad():
        y_pred = net(x)      # Transformer: shape [B, 10, 4]

    # pick the first element in the batch
    x_np      = x[0].cpu().numpy()         # (30, 4)
    y_np      = y[0].cpu().numpy()         # (10, 4)
    y_pred_np = y_pred[0].cpu().numpy()    # (10, 4) already — no reshape needed

    plot_paths(x_np, y_np, y_pred_np, idx)


In [37]:


def plot_path_single(x_sample, y_true, y_pred, idx,
                     scaler=None, hist_len=30,
                     lat_idx=0, lon_idx=1,
                     title_prefix="B_medium"):
    """
    x_sample: (hist_len * feat,) oder (hist_len, feat)
    y_true, y_pred: (future_len, feat)
    """

    # --- in NumPy bringen ---
    if isinstance(x_sample, torch.Tensor):
        x_sample = x_sample.detach().cpu().numpy()
    if isinstance(y_true, torch.Tensor):
        y_true = y_true.detach().cpu().numpy()
    if isinstance(y_pred, torch.Tensor):
        y_pred = y_pred.detach().cpu().numpy()

    # History ggf. zurück in (hist_len, feat) bringen
    if x_sample.ndim == 1:
        x_sample = x_sample.reshape(hist_len, -1)

    hist  = x_sample[:, [lat_idx, lon_idx]]
    true_ = y_true[:,   [lat_idx, lon_idx]]
    pred_ = y_pred[:,   [lat_idx, lon_idx]]

    # --- optional: inverse scaling für LAT/LON ---
    if scaler is not None:
        hist  = scaler.inverse_transform(hist)
        true_ = scaler.inverse_transform(true_)
        pred_ = scaler.inverse_transform(pred_)

    lat_hist, lon_hist = hist[:,0],  hist[:,1]
    lat_true, lon_true = true_[:,0], true_[:,1]
    lat_pred, lon_pred = pred_[:,0], pred_[:,1]

    # --- Plot: x = Lon, y = Lat ---
    plt.figure(figsize=(6, 6))
    plt.plot(lon_hist, lon_hist*0 + lat_hist, 'bo-',  label='Past')
    plt.plot(lon_true, lon_true*0 + lat_true, 'gx--', label='True future')
    plt.plot(lon_pred, lon_pred*0 + lat_pred, 'rs--', label='Pred future')

    ax = plt.gca()
    ax.set_aspect('equal', 'box')
    plt.xlabel("Longitude")
    plt.ylabel("Latitude")
    plt.title(f"{title_prefix} – val sample {idx}")
    plt.legend()
    plt.grid(True, alpha=0.3)
    plt.tight_layout()
    plt.show()
