In [1]:
import os
import pandas as pd
import numpy as np
import torch

from torch import nn
from torch.utils.data import TensorDataset, DataLoader
from torch.utils.tensorboard import SummaryWriter
from tqdm.notebook import tqdm
from sklearn.model_selection import train_test_split

np.set_printoptions(precision=3, suppress=True)

## Preprocess

In [2]:
base = '../data/raw'
data_path = 'cart_pole/20231006145359627330.csv'
full_path = os.path.join(base, data_path)

raw = pd.read_csv(full_path)

In [3]:
raw.head()

Unnamed: 0,id,time,qpos,qvel,qacc,ctrl
0,0,0.002,[0. 0.],[0. 0.],[0. 0.],[0. 0.]
1,1,0.004,[1.19646578e-10 7.28605007e-10],[5.98232892e-08 3.64302504e-07],[2.99116446e-05 1.82151252e-04],[9.99983333e-05 9.99983333e-05]
2,2,0.006,[7.17845414e-10 4.37140331e-09],[2.99099418e-07 1.82139915e-06],[0.00011964 0.00072855],[0.00039997 0.00039997]
3,3,0.008,[2.39265913e-09 1.45702863e-08],[8.37406857e-07 5.09944152e-06],[0.00026915 0.00163902],[0.00089987 0.00089987]
4,4,0.01,[5.98113704e-09 3.64223153e-08],[1.79423896e-06 1.09260145e-05],[0.00047842 0.00291329],[0.00159957 0.00159957]


In [4]:
def str_to_list(s: str) -> np.ndarray:
    return np.fromstring(s[1: -1], dtype=float, sep=' ')


def transform_to_numpy(df: pd.DataFrame) -> tuple[np.ndarray, np.ndarray]:
    x_res = []
    y_res = []

    for _, row in df.iterrows():
        row = row.to_list()[2:]
        x_res.append(np.r_[str_to_list(row[0]), str_to_list(row[1]), str_to_list(row[3])])
        y_res.append(str_to_list(row[2]))

    return np.array(x_res), np.array(y_res)

In [5]:
proportion = 0.2

x_data, y_data = transform_to_numpy(raw)
x_train, x_test, y_train, y_test = train_test_split(x_data, y_data, test_size=proportion, random_state=42)

In [6]:
print(x_train.shape)
print(y_train.shape)

(4000, 6)
(4000, 2)


In [7]:
train_loader = DataLoader(
    TensorDataset(torch.tensor(x_train), torch.tensor(y_train)),
    batch_size=100,
    shuffle=True
)
val_loader = DataLoader(
    TensorDataset(torch.tensor(x_test), torch.tensor(y_test)),
    batch_size=100,
    shuffle=True
)

## Model

In [8]:
class Naive(nn.Module):
    def __init__(self, input_dim: int, output_dim: int, dtype: torch.dtype = torch.float64):
        super(Naive, self).__init__()

        self.model = nn.Sequential(
            nn.Linear(input_dim, 100),
            nn.Sigmoid(),
            nn.Linear(100, 100),
            nn.Sigmoid(),
            nn.Linear(100, 100),
            nn.Sigmoid(),
            nn.Linear(100, output_dim),
        ).type(dtype)

    def forward(self, x):
        return self.model(x)

In [22]:
def to_device(_device: torch.device, *tensors: torch.Tensor) -> tuple[torch.Tensor, ...]:
    return tuple(t.to(_device) for t in tensors)


def train_one_epoch(
        _model: nn.Module,
        _optimizer: torch.optim.Optimizer,
        _loss_fn: torch.nn.Module,
        _train_loader: DataLoader,
        _epoch_num: int,
        _device: torch.device = "cpu",
        _ckpt_path: str = "best.pt",
):
    # best score for checkpointing
    _best = np.inf

    # training loop description
    train_loop = tqdm(
        enumerate(_train_loader, 1), 
        total=len(_train_loader),
        desc=f"Epoch {_epoch_num}",
        leave=True,
    )
    _model.train()
    _train_loss = 0.0
    # iterate over dataset
    for i, data in train_loop:
        _x, _y = to_device(_device, *data)

        # zero the parameter gradients
        _optimizer.zero_grad()

        # forward pass and loss calculation
        _outputs = _model(_x)
        _loss = _loss_fn(_outputs, _y)

        # backward pass
        _loss.backward()

        # optimizer run
        _optimizer.step()

        _train_loss += _loss.item()
        train_loop.set_postfix({"loss": _train_loss / i})


def val_one_epoch(
        _model: nn.Module,
        _optimizer: torch.optim.Optimizer,
        _loss_fn: torch.nn.Module,
        _val_loader: DataLoader,
        _best: float,
        _epoch_num: int,
        _device: torch.device = "cpu",
        _ckpt_path: str = "best.pt",
):
    # validation
    _val_loss = 0.0
    with torch.no_grad():
        _model.eval()  # evaluation mode
        val_loop = tqdm(
            enumerate(_val_loader, 1),
            total=len(_val_loader),
            desc=f"Val {_epoch_num}",
            leave=True,
        )
        for i, data in val_loop:
            _x, _y = to_device(_device, *data)

            _outputs = _model(_x)
            _val_loss += _loss_fn(_outputs, _y).item()

            val_loop.set_postfix({"loss": _val_loss / i})

        if _val_loss / len(_val_loader) > _best:
            torch.save(_model.state_dict(), _ckpt_path)
            _best = _val_loss / len(_val_loader)
    return _best


def train(
        _model: nn.Module,
        _optimizer: torch.optim.Optimizer,
        _loss_fn: torch.nn.Module,
        _train_loader: DataLoader,
        _val_loader: DataLoader,
        _epochs: int,
        _device: torch.device = "cpu",
        _ckpt_path: str = "best.pt",
):
    best = -float('inf')
    for epoch in range(_epochs):
        train_one_epoch(_model, _optimizer, _loss_fn, _train_loader, epoch, _device, _ckpt_path)
        best = val_one_epoch(_model, _optimizer, _loss_fn, _val_loader, epoch, best, _device, _ckpt_path)
        print(best)

In [15]:
device = 'cuda' if torch.cuda.is_available else 'cpu'
model = Naive(x_data.shape[1], y_data.shape[1]).to(device)
optimizer = torch.optim.Adam(model.parameters())
loss_fn = torch.nn.MSELoss()
# writer = SummaryWriter()

In [37]:
train(model, optimizer, loss_fn, train_loader, val_loader, 10, device)

Epoch 0:   0%|          | 0/40 [00:00<?, ?it/s]

Val -inf:   0%|          | 0/10 [00:00<?, ?it/s]

2.6226585059542336


Epoch 1:   0%|          | 0/40 [00:00<?, ?it/s]

Val 2.6226585059542336:   0%|          | 0/10 [00:00<?, ?it/s]

3.5409128792355284


Epoch 2:   0%|          | 0/40 [00:00<?, ?it/s]

Val 3.5409128792355284:   0%|          | 0/10 [00:00<?, ?it/s]

2.22886963389695


Epoch 3:   0%|          | 0/40 [00:00<?, ?it/s]

Val 2.22886963389695:   0%|          | 0/10 [00:00<?, ?it/s]

3


Epoch 4:   0%|          | 0/40 [00:00<?, ?it/s]

Val 3:   0%|          | 0/10 [00:00<?, ?it/s]

4


Epoch 5:   0%|          | 0/40 [00:00<?, ?it/s]

Val 4:   0%|          | 0/10 [00:00<?, ?it/s]

5


Epoch 6:   0%|          | 0/40 [00:00<?, ?it/s]

Val 5:   0%|          | 0/10 [00:00<?, ?it/s]

6


Epoch 7:   0%|          | 0/40 [00:00<?, ?it/s]

Val 6:   0%|          | 0/10 [00:00<?, ?it/s]

7


Epoch 8:   0%|          | 0/40 [00:00<?, ?it/s]

Val 7:   0%|          | 0/10 [00:00<?, ?it/s]

8


Epoch 9:   0%|          | 0/40 [00:00<?, ?it/s]

Val 8:   0%|          | 0/10 [00:00<?, ?it/s]

9
