Load Data

In [41]:
import pandas as pd
import glob

In [42]:
train_dflist = []
for i, fname in enumerate(sorted(glob.glob("data/2013_Targa_Sixty_Six" + "/*.csv"))):
    train_dflist.append(pd.read_csv(fname, sep=","))
    
test_dflist = []
for i, fname in enumerate(sorted(glob.glob("data/2013_Laguna_Seca" + "/*.csv"))):
    test_dflist.append(pd.read_csv(fname, sep=","))

Setup Pytorch Dataset for RNN Training

In [43]:
from torch.utils.data import ConcatDataset, Dataset, DataLoader
import torch

In [44]:
class CarDataset(Dataset):
    def __init__(self, df, target, seq_length, dtype=torch.float32):
        self.data = df.copy(deep=True)
        self.seq_length = seq_length
        self.y = torch.tensor(self.data[target].to_numpy(), dtype=dtype)
        self.X = torch.tensor(
            self.data.drop(
                ["sideSlip", "vxCG", "vyCG", "time", "longitude", "latitude"], axis=1
            ).to_numpy(),
            dtype=dtype,
        )

    def __getitem__(self, index):
        x = self.X[index : index + self.seq_length]
        y = self.y[index + self.seq_length - 1]  # -1 important to avoid forecasting!
        return x, y

    def __len__(self):
        return len(self.data) - self.seq_length

Preprocess Data

In [45]:
columns_to_standardize = [
    "engineSpeed",
    "handwheelAngle",
    "throttle",
    "brake",
    "axCG",
    "ayCG",
    "yawRate",
    "chassisAccelFL",
    "chassisAccelFR",
    "chassisAccelRL",
    "chassisAccelRR",
]

columns_to_minmax = [
    "vyCG",
    "vxCG",
    "sideSlip" 
]

In [46]:
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from pipelist import ApplyThreshold, _ConcatDataFrames, _SeparateDataFrames, SkScalerWrapper, CreateConcatDataset
from sklearn import set_config
from sklearn.preprocessing import StandardScaler, MinMaxScaler

set_config(display="diagram")

Example Pipeline 1: Only fascilitating transformers that can handle dataframe lists

In [47]:
pipeline_v1 = Pipeline([('threshold', ApplyThreshold(threshold=10, by="vxCG", seq_length=10)), 
                      ('stand scaler', SkScalerWrapper(StandardScaler(), columns_to_standardize)),
                      ('minmax scaler', SkScalerWrapper(MinMaxScaler(), columns_to_minmax)),
                      ('concat dataset', CreateConcatDataset(CarDataset, target="vxCG", seq_length=10)),
                     ])
pipeline_v1

Example Pipeline 2: Merging Dataframe List via Indexing to apply global scaling with out-of-the-box sklearn transformers

In [48]:
ct = ColumnTransformer([("stand", StandardScaler(), columns_to_standardize),
                        ("minmax", MinMaxScaler(), columns_to_minmax)],
                       remainder="passthrough",
                       verbose_feature_names_out=False)

pipeline_v2 = Pipeline([('threshold', ApplyThreshold(threshold=10, by="vxCG", seq_length=10)), 
                     ('concat', _ConcatDataFrames()), 
                     ('column transformer', ct.set_output(transform="pandas")),
                     ('separate', _SeparateDataFrames()),
                     ('concat dataset', CreateConcatDataset(CarDataset, target="vxCG", seq_length=10)),
                     ])
pipeline_v2

Application of the Example 2 Pipeline on the train data

In [49]:
train_dataset = pipeline_v2.fit_transform(train_dflist)

Application of the fitted pipeline on the test data (to scale with mean and std of the train data (known during training))

In [50]:
test_dataset = pipeline_v2.transform(test_dflist)

Pass concatenated datasets to the dataloaders

In [51]:
train_dataloader = DataLoader(train_dataset, batch_size=256, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=256, shuffle=True)

Setup simple RNN Training

In [52]:
import torch.nn as nn
device = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"
device

'cuda'

In [53]:
class Model(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(Model, self).__init__()
        self.num_layers = num_layers
        self.hidden_size = hidden_size
        self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        out, _ = self.rnn(x)
        out = out[:, -1, :]
        out = self.fc(out)

        return out

In [54]:
# initialize the model
input_size = 11
hidden_size = 10
num_layers = 1
output_size = 1
lr = 0.01

model = Model(input_size, 
                 hidden_size, 
                 num_layers, 
                 output_size
                 ).to(device)

# define the loss function and optimizer
criterion = nn.L1Loss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

Training Loop

In [61]:
num_epochs = 5

model.train()
for epoch in range(num_epochs):
        running_train_loss = 0.0
        for step, (x, y) in enumerate(train_dataloader):
                X, y = x.to(device), y.to(device).reshape([len(y),1])

                optimizer.zero_grad()
                pred = model(X)
                
                train_loss = criterion(pred, y)
                train_loss.backward()
                
                optimizer.step()
        
                print(f'EPOCH {epoch+1} of {num_epochs} batch {step+1} of {len(train_dataloader)} loss: {round(train_loss.item(),4)} ', end="\r")
        print("")

EPOCH 1 of 5 batch 159 of 159 loss: 0.0383 
EPOCH 2 of 5 batch 159 of 159 loss: 0.0335 
EPOCH 3 of 5 batch 159 of 159 loss: 0.0331 
EPOCH 4 of 5 batch 159 of 159 loss: 0.0326 
EPOCH 5 of 5 batch 159 of 159 loss: 0.0337 


Testing

In [62]:
model.eval()
correct = 0
total = 0
total_loss = 0  
with torch.no_grad():
    for step, (x, y) in enumerate(test_dataloader):
        X, y = x.to(device), y.to(device).reshape([len(y),1])

        pred = model(X)
        loss = criterion(pred, y)
        total_loss += loss.item()

    print(f"Avg. Loss: {total_loss/step}")

Avg. Loss: 0.07265662416405191
