In [9]:
import pandas as pd
import tensorflow as tf
import numpy as np
import glob

In [6]:
tf.config.list_physical_devices('GPU')

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [12]:
csv_files = glob.glob('./driving_data/*.csv')
dfs = []
for file in csv_files:
    dfs.append(pd.read_csv(file))
df = pd.concat(dfs)
df

Unnamed: 0,pitch,roll,acceleration_x,acceleration_y,acceleration_z,angular_velocity_x,angular_velocity_y,angular_velocity_z,distance,time,jerk_x,jerk_y,jerk_z,is_running_backoff
0,-1.0,0.0,205.85300,136.43750,9787.594,-0.055989,-0.095555,0.005286,2000.0,2024-01-12 16:26:33.393240,0.000000,0.000000,0.000000,False
1,-3.0,2.0,636.70830,330.32230,9162.854,2.254011,-2.895555,-0.414714,2000.0,2024-01-12 16:26:33.513343,3587.381664,1614.321041,-5201.701873,False
2,-1.0,-1.0,550.53720,-526.60080,9876.159,2.114011,-2.755555,-2.724714,2000.0,2024-01-12 16:26:33.693540,-478.204965,-4755.479281,3958.473227,False
3,-4.0,0.0,679.79380,2.39364,9201.152,-0.545989,-0.095555,3.645286,1843.0,2024-01-12 16:26:33.933171,539.398492,2207.537589,-2816.860089,False
4,0.0,2.0,-86.17104,440.42980,10618.190,0.294011,-0.025555,-3.984714,1803.0,2024-01-12 16:26:34.113693,-4243.055362,2426.497380,7849.669292,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
274,0.0,-3.0,40.69188,-497.87710,8748.754,-5.146463,12.652540,177.248700,2000.0,2024-01-12 16:24:31.735440,-29213.812318,279.877173,-28414.193606,True
275,0.0,-3.0,40.69188,-497.87710,8748.754,-5.146463,12.652540,177.248700,2000.0,2024-01-12 16:24:31.735440,-29213.812318,279.877173,-28414.193606,False
276,-3.0,-3.0,643.88920,-646.28280,9414.187,-1.086463,2.572544,171.018800,2000.0,2024-01-12 16:24:31.976498,2502.291233,-615.643123,2760.468435,False
277,0.0,0.0,9.57456,-21.54276,9500.357,2.483537,-7.297456,6.378747,2000.0,2024-01-12 16:24:32.156019,-3533.372920,3480.038770,479.999554,False


In [17]:
# We only want to keep the rows where nothing is happening or when it starts the backoff procedure. 
# The data during the backoff procedure is not useful.
backoff_start = (df['is_running_backoff'] & -df['is_running_backoff'].shift(fill_value=False))
filter_mask = -df['is_running_backoff'] | backoff_start
filtered_df = df[filter_mask]
filtered_df

Unnamed: 0,pitch,roll,acceleration_x,acceleration_y,acceleration_z,angular_velocity_x,angular_velocity_y,angular_velocity_z,distance,time,jerk_x,jerk_y,jerk_z,is_running_backoff
0,-1.0,0.0,205.85300,136.43750,9787.594,-0.055989,-0.095555,0.005286,2000.0,2024-01-12 16:26:33.393240,0.000000,0.000000,0.000000,False
1,-3.0,2.0,636.70830,330.32230,9162.854,2.254011,-2.895555,-0.414714,2000.0,2024-01-12 16:26:33.513343,3587.381664,1614.321041,-5201.701873,False
2,-1.0,-1.0,550.53720,-526.60080,9876.159,2.114011,-2.755555,-2.724714,2000.0,2024-01-12 16:26:33.693540,-478.204965,-4755.479281,3958.473227,False
3,-4.0,0.0,679.79380,2.39364,9201.152,-0.545989,-0.095555,3.645286,1843.0,2024-01-12 16:26:33.933171,539.398492,2207.537589,-2816.860089,False
4,0.0,2.0,-86.17104,440.42980,10618.190,0.294011,-0.025555,-3.984714,1803.0,2024-01-12 16:26:34.113693,-4243.055362,2426.497380,7849.669292,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
266,19.0,-24.0,-2549.22700,-3023.16700,6582.510,-8.926463,-20.247460,-91.831250,2000.0,2024-01-12 16:24:30.655430,-17175.118334,-19083.460541,-14225.857888,True
275,0.0,-3.0,40.69188,-497.87710,8748.754,-5.146463,12.652540,177.248700,2000.0,2024-01-12 16:24:31.735440,-29213.812318,279.877173,-28414.193606,False
276,-3.0,-3.0,643.88920,-646.28280,9414.187,-1.086463,2.572544,171.018800,2000.0,2024-01-12 16:24:31.976498,2502.291233,-615.643123,2760.468435,False
277,0.0,0.0,9.57456,-21.54276,9500.357,2.483537,-7.297456,6.378747,2000.0,2024-01-12 16:24:32.156019,-3533.372920,3480.038770,479.999554,False


In [29]:
# Convert to tensors. Every tensor contains 10 entries. 
history_size = 10
tensors = []
# TODO: normalize all columns to be between 0 and 1
for start in range(len(filtered_df) - history_size):
    window_df = filtered_df.iloc[start:start+history_size].drop(columns=["time","is_running_backoff"]).astype('float32')
    tensors.append(tf.convert_to_tensor(window_df))
    
# The label is whether the backoff procedure is started (1 when the last line of the tensor had the backoff procedure started)
labels = tf.convert_to_tensor(filtered_df['is_running_backoff'].iloc[history_size:].astype('float32'))


In [34]:
print(labels[11])
print(tensors[11][9])

tf.Tensor(0.0, shape=(), dtype=float32)
tf.Tensor(
[ 4.0000000e+00  9.0000000e+00 -2.7766220e+02  6.1037817e+02
  3.5401941e+03  3.7940111e+00  2.3774441e+01  1.5335290e+01
  2.0000000e+03 -4.9078970e+03 -2.3205842e+03 -3.1861320e+04], shape=(12,), dtype=float32)


In [36]:
print(labels[0])
print(tensors[0][9])

tf.Tensor(0.0, shape=(), dtype=float32)
tf.Tensor(
[-2.0000000e+00 -3.0000000e+00  4.2128061e+02 -5.8404822e+02
  8.9593945e+03 -5.3759890e+00 -1.4955550e+00  7.7528602e-01
  2.0000000e+03 -2.7445901e+03 -4.6273516e+03 -3.8450737e+03], shape=(12,), dtype=float32)


In [35]:
print(len(tensors),len(labels))

639 639
