In [1]:
import pandas as pd
import glob
import numpy as np
from bagpipe.preprocessing import ApplyThreshold, _ConcatDataFrames, _SeparateDataFrames, CreateConcatDataset, SkScalerWrapper

In [2]:
import matplotlib.pyplot as plt
import math

def plot_dataframes(df_list, col='vxCG', figsize=(15, 15)):
    if not isinstance(df_list, list):
        raise TypeError('df_list must be a list of dataframes')
    
    n = len(df_list)
    sqrt_n = int(math.ceil(n**0.5))
    fig, axs = plt.subplots(sqrt_n, sqrt_n, figsize=figsize)

    for i, df in enumerate(df_list):
        ax = axs[i // sqrt_n, i % sqrt_n]
        df[col].plot(ax=ax)
        ax.set_title(f'Dataframe {i+1}')

    # Remove empty subplots
    if n < sqrt_n**2:
        for i in range(n, sqrt_n**2):
            fig.delaxes(axs.flatten()[i])

    plt.tight_layout()
    plt.show()

In [3]:
train_dflist = []
for i, fname in enumerate(sorted(glob.glob("../data/2013_Targa_Sixty_Six" + "/*.csv"))):
    train_dflist.append(pd.read_csv(fname, sep=","))
    
test_dflist = []
for i, fname in enumerate(sorted(glob.glob("../data/2013_Laguna_Seca" + "/*.csv"))):
    test_dflist.append(pd.read_csv(fname, sep=","))

In [4]:
from torch.utils.data import ConcatDataset, Dataset, DataLoader
import torch

class CarDataset(Dataset):
    def __init__(self, df, target, seq_length, dtype=torch.float32):
        self.data = df.copy(deep=True)
        self.seq_length = seq_length
        self.y = torch.tensor(self.data[target].to_numpy(), dtype=dtype)
        self.X = torch.tensor(
            self.data.drop(
                ["sideSlip", "vxCG", "vyCG", "time", "longitude", "latitude"], axis=1
            ).to_numpy(),
            dtype=dtype,
        )

    def __getitem__(self, index):
        x = self.X[index : index + self.seq_length]
        y = self.y[index + self.seq_length - 1]  # -1 important to avoid forecasting!
        return x, y

    def __len__(self):
        return len(self.data) - self.seq_length

In [5]:
from sklearn.preprocessing import StandardScaler, MinMaxScaler, Normalizer
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn import set_config

set_config(display="diagram")

ct = ColumnTransformer([("stand", StandardScaler(), ["vxCG", "vyCG"]),
                        ("minmax", MinMaxScaler(), ["sideSlip"])],
                       remainder="passthrough",
                       verbose_feature_names_out=False)

pipelist1 = Pipeline([('threshold', ApplyThreshold(threshold=10, by="vxCG", seq_length=10)), 
                     ('concat', _ConcatDataFrames()), 
                     ('column transformer', ct.set_output(transform="pandas")),
                     ('separate', _SeparateDataFrames()),
                     ('concat dataset', CreateConcatDataset(CarDataset, target="vxCG", seq_length=10)),
                     ])

pipelist1

In [6]:
concat_ds = pipelist1.fit_transform(train_dflist)

In [7]:
pipelist2 = Pipeline([('concat', _ConcatDataFrames()), 
                     ('column transformer', ct.set_output(transform="pandas")),
                     ('separate', _SeparateDataFrames()),
                     ('concat dataset', CreateConcatDataset(CarDataset, target="vxCG", seq_length=10)),
                    ])

pipelist2

In [8]:
pipelist3 = Pipeline([('threshold', ApplyThreshold(threshold=10, by="vxCG", seq_length=10)), 
                      ('stand scaler', SkScalerWrapper(StandardScaler(), ["vxCG","vyCG"])),
                      ('minmax scaler', SkScalerWrapper(MinMaxScaler(), ["sideSlip"])),
                      ('concat dataset', CreateConcatDataset(CarDataset, target="vxCG", seq_length=10)),
                     ])

pipelist3