In [1]:
import torch
import pandas as pd
import numpy as np
from copy import deepcopy
from torch.utils.data import ConcatDataset, Dataset, DataLoader
import pkg_resources
import glob

device = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"

In [2]:
file_path = {
    "2013_Targa_Sixty_Six": "data/2013_Targa_Sixty_Six",
    "2014_Targa_Sixty_Six": "data/2014_Targa_Sixty_Six",
    "2013_Laguna_Seca": "data/2013_Laguna_Seca",
}

columns_to_transform = [
    "vyCG",  # !!
    "vxCG",  # !!
    "sideSlip",  # !!
    "engineSpeed",
    "handwheelAngle",
    "throttle",
    "brake",
    "axCG",
    "ayCG",
    "yawRate",
    "chassisAccelFL",
    "chassisAccelFR",
    "chassisAccelRL",
    "chassisAccelRR",
]

In [3]:
class CarDataset(Dataset):
    def __init__(
        self, df, target, seq_length, dtype=torch.float32, loop_predictions=False
    ):
        self.data = df.copy(deep=True)
        self.seq_length = seq_length
        self.y = torch.tensor(self.data[target].to_numpy(), dtype=dtype)
        self.X = torch.tensor(
            self.data.drop(
                ["sideSlip", "vxCG", "vyCG", "time", "longitude", "latitude"], axis=1
            ).to_numpy(),
            dtype=dtype,
        )

    def __getitem__(self, index):
        x = self.X[index : index + self.seq_length]
        y = self.y[index + self.seq_length - 1]  # -1 important to avoid forecasting!
        return x, y

    def __len__(self):
        return len(self.data) - self.seq_length

In [4]:
def create_dataset(
    folder,
    seq_length,
    concat_dataset=False,
    num_csv=np.inf,
    target="sideSlip",
    cols_to_transform=columns_to_transform,
    threshold=None,
    bound=None,
    threshold_as_upper_limit=False,
    transform=None,
    mean=None,
    std=None,
):
    path = pkg_resources.resource_filename(__name__, file_path[folder])

    # Collect all DFs in a list
    dflist = []
    files = []
    for i, fname in enumerate(sorted(glob.glob(path + "/*.csv"))):
        if i >= num_csv:
            break
        dflist.append(pd.read_csv(fname, sep=","))
        files.append(fname.split("/")[-1])

    if threshold is not None:
        dflist, files = _apply_threshold(
            dflist=dflist,
            file_ls=files,
            threshold=threshold,
            bound=bound,
            seq_length=seq_length,
            threshold_as_upper_limit=threshold_as_upper_limit,
        )

    if transform == "standardize":
        dflist, param1, param2 = _standarize(dflist, mean, std, cols_to_transform)
    elif transform == "normalize":
        dflist, param1, param2 = _normalize(dflist, cols_to_transform)
    elif transform is not None:
        raise Exception(f"Unknown transform {transform}!")
    else:
        param1 = None
        param2 = None

    # Create datasets
    datasets = []
    for i, data in enumerate(dflist):
        if len(data) < seq_length:
            print(f"Attention Df with length {len(data)} is smaller than {seq_length}")
            continue
        datasets.append(CarDataset(df=data, target=target, seq_length=seq_length))

    if concat_dataset:
        datasets = ConcatDataset(datasets)

    return datasets, files, param1, param2


def _apply_threshold(
    dflist, file_ls, threshold, bound, seq_length, threshold_as_upper_limit=False
):
    if bound is None:
        raise Exception("bound has to be provided if threshold is not None!")
    if bound < seq_length:
        raise Exception("bound has to be larger than seq_length!")

    new_dflist = []
    new_file_ls = []
    for i, df in enumerate(dflist):
        if threshold_as_upper_limit:
            df_mask = df["vxCG"].rolling(bound, center=True).min() < threshold
        else:
            df_mask = df["vxCG"].rolling(bound, center=True).max() > threshold

        cont_df = pd.DataFrame([])
        detected = False
        start_index = 0
        stop_index = 0
        current_outing = file_ls[i]

        for index, bool in enumerate(df_mask):
            if bool and detected == False:
                start_index = index
                detected = True

            elif bool == False and detected == True:
                stop_index = index

                cont_df = df.iloc[start_index:stop_index]
                new_dflist.append(cont_df)
                new_file_ls.append(
                    current_outing + "_" + str(start_index) + "_" + str(stop_index)
                )
                cont_df = pd.DataFrame([])

                detected = False

    return new_dflist, new_file_ls


def _standarize(dflist, mean=None, std=None, cols_to_standardize=columns_to_transform):
    if mean is not None and std is None or mean is None and std is not None:
        raise Exception("Either both mean and std or none of them have to be provided!")

    new_dflist = deepcopy(dflist)
    if mean is None and std is None:
        concatdf = pd.concat(new_dflist)
        mean = concatdf[cols_to_standardize].mean()
        std = concatdf[cols_to_standardize].std()

    for i, df in enumerate(new_dflist):
        new_dflist[i][cols_to_standardize] = (df[cols_to_standardize] - mean) / std

    return new_dflist, mean, std


def _normalize(dflist, cols_to_normalize=columns_to_transform):
    new_dflist = deepcopy(dflist)

    concatdf = pd.concat(new_dflist)
    min_df = concatdf[cols_to_normalize].min()
    max_df = concatdf[cols_to_normalize].max()

    for i, df in enumerate(new_dflist):
        new_dflist[i][cols_to_normalize] = (df[cols_to_normalize] - min_df) / (
            max_df - min_df
        )
    return new_dflist, min_df, max_df

In [5]:
ds, files ,mean,std  = create_dataset("2013_Laguna_Seca",
                            seq_length=20,
                            cols_to_transform=columns_to_transform,
                            concat_dataset=True,
                            target="sideSlip",
                            transform="standardize",
                            threshold=0.83,
                            bound=20)

In [6]:
ds.datasets[0].X.shape

torch.Size([711, 11])

In [7]:
ds.datasets[0].data

Unnamed: 0,sideSlip,vxCG,vyCG,time,longitude,latitude,engineSpeed,handwheelAngle,throttle,brake,axCG,ayCG,yawRate,chassisAccelFL,chassisAccelFR,chassisAccelRL,chassisAccelRR
1350,0.061337,-2.042480,0.097394,67.500,,,-2.004357,-0.262060,-0.696700,-0.418353,-0.025439,-0.113414,-0.252053,0.418410,0.219105,1.055036,0.465117
1351,0.061337,-2.041726,0.083040,67.550,,,-2.013131,-0.257760,-0.699709,-0.418353,-0.438201,-0.308996,-0.324071,-0.179859,0.363518,0.680184,0.773426
1352,0.061337,-2.041726,0.083040,67.601,,,-1.995584,-0.264210,-0.702718,-0.418353,-0.147151,-0.059280,-0.318192,0.813145,0.168135,0.480446,0.325236
1353,0.061337,-2.042480,0.097394,67.650,,,-1.964877,-0.251309,-0.702718,-0.418353,-0.192131,-0.026101,-0.220453,0.621945,0.444220,0.428460,0.270997
1354,0.061337,-2.042480,0.097394,67.700,,,-1.999971,-0.223358,-0.702718,-0.418353,-0.075711,-0.143101,-0.216043,0.418410,0.805254,1.016730,0.662092
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2056,0.061337,-1.997242,0.097394,102.800,-121.755761,36.586622,-2.013131,-0.188957,-0.702718,-0.418353,-0.263571,-0.066265,-0.207225,0.236462,0.344405,0.658295,0.773426
2057,0.061337,-1.998750,0.083040,102.850,-121.755761,36.586622,-2.030678,-0.186807,-0.702718,-0.418353,-0.147151,-0.104683,-0.196936,0.236462,0.355023,0.685657,0.662092
2058,0.061337,-1.999504,0.083040,102.900,-121.755761,36.586622,-1.986810,-0.184657,-0.702718,-0.418353,-0.088941,-0.050548,-0.217513,1.032099,-0.061228,0.663768,0.662092
2059,0.061337,-2.001012,0.111747,102.950,-121.755761,36.586622,-1.964877,-0.186807,-0.705727,-0.418353,0.122732,-0.101190,-0.216778,1.032099,-0.012382,0.633670,0.690639


In [8]:
dl = DataLoader(ds, 
                 batch_size=512, 
                 shuffle=True, 
                 # worker_init_fn=seed_worker, 
                 # generator=g
                 )

In [9]:
for step, (x, y) in enumerate(dl):
    x, y = x.to(device) , y.to(device).reshape([len(y),1]) # otherwise y.shape = [512]
    print(x.shape)
    print(y.shape)
    if step == 0:
        break

torch.Size([512, 20, 11])
torch.Size([512, 1])


In [10]:
import torch.nn as nn

class Model(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(Model, self).__init__()
        self.num_layers = num_layers
        self.hidden_size = hidden_size
        self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        out, _ = self.rnn(x)
        out = out[:, -1, :]
        out = self.fc(out)

        return out

In [11]:
# initialize the model
input_size = 11
hidden_size = 10
num_layers = 1
output_size = 1
lr = 0.0001

model = Model(input_size, 
                 hidden_size, 
                 num_layers, 
                 output_size
                 ).to(device)

# define the loss function and optimizer
criterion = nn.L1Loss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

In [12]:
num_epochs = 5

for epoch in range(num_epochs):
        running_train_loss = 0.0
        for step, (x, y) in enumerate(dl):
                X, y = x.to(device), y.to(device).reshape([len(y),1])

                optimizer.zero_grad()
                pred = model(X)
                
                train_loss = criterion(pred, y)
                train_loss.backward()
                
                optimizer.step()
        
                print(f'EPOCH {epoch+1} of {num_epochs} batch {step+1} of {len(dl)} loss: {round(train_loss.item(),4)} ', end="\r")
        print("")
        

  Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


EPOCH 0 of 5 batch 357 of 357 loss: 0.5114 
EPOCH 1 of 5 batch 357 of 357 loss: 0.4126 
EPOCH 2 of 5 batch 357 of 357 loss: 0.3606 
EPOCH 3 of 5 batch 357 of 357 loss: 0.2879 
EPOCH 4 of 5 batch 357 of 357 loss: 0.3205 
