In [85]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

In [86]:
df = pd.read_csv('driving_data.csv')
print("Shape:", df.shape)
print("Columns:", df.columns.tolist())

Shape: (37736, 51)
Columns: ['speedX', 'speedY', 'speedZ', 'angle', 'trackPos', 'rpm', 'gear', 'steer_input', 'accel_input', 'brake_input', 'num_opponents', 'opponent_0_angle', 'opponent_1_angle', 'opponent_2_angle', 'opponent_3_angle', 'opponent_4_angle', 'opponent_5_angle', 'opponent_6_angle', 'opponent_7_angle', 'opponent_8_angle', 'opponent_9_angle', 'opponent_0_distance', 'opponent_1_distance', 'opponent_2_distance', 'opponent_3_distance', 'opponent_4_distance', 'opponent_5_distance', 'opponent_6_distance', 'opponent_7_distance', 'opponent_8_distance', 'opponent_9_distance', 'track_pos', 'track_0', 'track_1', 'track_2', 'track_3', 'track_4', 'track_5', 'track_6', 'track_7', 'track_8', 'track_9', 'track_10', 'track_11', 'track_12', 'track_13', 'track_14', 'track_15', 'track_16', 'track_17', 'track_18']


In [87]:
print(df.head())

     speedX    speedY    speedZ     angle  trackPos     rpm  gear  \
0 -0.025037  0.026839 -0.026763 -0.002788  0.333891  1047.2     1   
1 -0.025037  0.026839 -0.026763 -0.002788  0.333891  1047.2     1   
2 -0.025037  0.026839 -0.026763 -0.002788  0.333891  1047.2     1   
3 -0.025037  0.026839 -0.026763 -0.002788  0.333891  1047.2     1   
4 -0.025037  0.026839 -0.026763 -0.002788  0.333891  1047.2     1   

   steer_input  accel_input  brake_input  ...  track_9  track_10  track_11  \
0          0.0          0.0          0.0  ...    200.0   86.9471   42.9276   
1          0.0          0.0          0.0  ...    200.0   86.9471   42.9276   
2          0.0          0.0          0.0  ...    200.0   86.9471   42.9276   
3          0.0          0.0          0.0  ...    200.0   86.9471   42.9276   
4          0.0          0.0          0.0  ...    200.0   86.9471   42.9276   

   track_12  track_13  track_14  track_15  track_16  track_17  track_18  
0   28.6439   21.6159   14.7441   10.4043 

In [88]:
print(df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 37736 entries, 0 to 37735
Data columns (total 51 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   speedX               37736 non-null  float64
 1   speedY               37736 non-null  float64
 2   speedZ               37736 non-null  float64
 3   angle                37736 non-null  float64
 4   trackPos             37736 non-null  float64
 5   rpm                  37736 non-null  float64
 6   gear                 37736 non-null  int64  
 7   steer_input          37736 non-null  float64
 8   accel_input          37736 non-null  float64
 9   brake_input          37736 non-null  float64
 10  num_opponents        37736 non-null  int64  
 11  opponent_0_angle     37736 non-null  int64  
 12  opponent_1_angle     37736 non-null  float64
 13  opponent_2_angle     37736 non-null  int64  
 14  opponent_3_angle     37736 non-null  float64
 15  opponent_4_angle     37736 non-null 

In [89]:
df.isna().sum().sort_values(ascending=False)


speedX                 0
speedY                 0
speedZ                 0
angle                  0
trackPos               0
rpm                    0
gear                   0
steer_input            0
accel_input            0
brake_input            0
num_opponents          0
opponent_0_angle       0
opponent_1_angle       0
opponent_2_angle       0
opponent_3_angle       0
opponent_4_angle       0
opponent_5_angle       0
opponent_6_angle       0
opponent_7_angle       0
opponent_8_angle       0
opponent_9_angle       0
opponent_0_distance    0
opponent_1_distance    0
opponent_2_distance    0
opponent_3_distance    0
opponent_4_distance    0
opponent_5_distance    0
opponent_6_distance    0
opponent_7_distance    0
opponent_8_distance    0
opponent_9_distance    0
track_pos              0
track_0                0
track_1                0
track_2                0
track_3                0
track_4                0
track_5                0
track_6                0
track_7                0


In [90]:
print(df.nunique())

speedX                 33221
speedY                 37118
speedZ                 36998
angle                  36469
trackPos               36590
rpm                    35065
gear                       7
steer_input                3
accel_input                3
brake_input                2
num_opponents              7
opponent_0_angle          36
opponent_1_angle       34594
opponent_2_angle          36
opponent_3_angle       31502
opponent_4_angle          36
opponent_5_angle       21036
opponent_6_angle          36
opponent_7_angle        9325
opponent_8_angle          36
opponent_9_angle        2551
opponent_0_distance       18
opponent_1_distance      196
opponent_2_distance        1
opponent_3_distance        1
opponent_4_distance        1
opponent_5_distance        1
opponent_6_distance        1
opponent_7_distance        1
opponent_8_distance        1
opponent_9_distance        1
track_pos              36590
track_0                32287
track_1                32485
track_2       

In [91]:
print((df==0).mean())

speedX                 0.006625
speedY                 0.006625
speedZ                 0.000000
angle                  0.000000
trackPos               0.000000
rpm                    0.000000
gear                   0.000000
steer_input            0.698829
accel_input            0.172302
brake_input            0.952062
num_opponents          0.048097
opponent_0_angle       0.234047
opponent_1_angle       0.000000
opponent_2_angle       0.278169
opponent_3_angle       0.000000
opponent_4_angle       0.491600
opponent_5_angle       0.000000
opponent_6_angle       0.769663
opponent_7_angle       0.000000
opponent_8_angle       0.932187
opponent_9_angle       0.000000
opponent_0_distance    0.995363
opponent_1_distance    0.000000
opponent_2_distance    1.000000
opponent_3_distance    0.000000
opponent_4_distance    1.000000
opponent_5_distance    0.000000
opponent_6_distance    1.000000
opponent_7_distance    0.000000
opponent_8_distance    1.000000
opponent_9_distance    0.000000
track_po

In [92]:
constant_cols = [c for c in df.columns if df[c].nunique() == 1]
df.drop(columns=constant_cols, inplace=True)
print("Dropped constant columns:", constant_cols)


Dropped constant columns: ['opponent_2_distance', 'opponent_3_distance', 'opponent_4_distance', 'opponent_5_distance', 'opponent_6_distance', 'opponent_7_distance', 'opponent_8_distance', 'opponent_9_distance']


In [93]:
before = len(df)
df = df.loc[~((df['steer_input']==0) & (df['accel_input']==0) & (df['brake_input']==0))]
print(f"Removed {before - len(df)} rows with no control inputs")


Removed 2093 rows with no control inputs


In [94]:
# # 1) identify all of your raw track‑distance columns
# track_cols = [c for c in df.columns if c.startswith('track')]

# # 2) build the summary statistics
# df['track_min']  = df[track_cols].min(axis=1)
# df['track_mean'] = df[track_cols].mean(axis=1)
# df['track_max']  = df[track_cols].max(axis=1)

# # 3) now these three exist—drop the original track_cols if you like
# df.drop(columns=track_cols, inplace=True)


In [95]:
track_cols = [c for c in df.columns if c.startswith('track')]
pca = PCA(n_components=1)
df['track_sensor'] = pca.fit_transform(df[track_cols])
df.drop(columns=track_cols, inplace=True)

In [100]:
opp_cols = [c for c in df.columns 
            if c.startswith("opponent_") 
            and (c.endswith("_angle") or c.endswith("_distance"))]

print("Found opponent columns:", opp_cols)

# 2) PCA into a single feature
pca = PCA(n_components=1, random_state=42)
df['opponent_sensor'] = pca.fit_transform(df[opp_cols])

# 3) drop the high‑dimensional originals
df.drop(columns=opp_cols, inplace=True)

Found opponent columns: ['opponent_0_distance', 'opponent_1_distance']


In [101]:
scaler = StandardScaler()
features = ['steer_input','accel_input','brake_input','track_sensor','opponent_sensor']  # adjust as needed
df[features] = scaler.fit_transform(df[features])

In [103]:
df.to_csv('driving_data_cleaned.csv', index=False)