## Pre-processing data from the game

- Get all files of the dataset folder
- Get which file and process
- Concatenate all files
- Save as a single file

In [1]:
import matplotlib
matplotlib.use('nbagg')

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import glob
import csv

#%matplotlib inline
sns.set()
pd.set_option('display.max_columns', None)


Every line of a recorded file represents a frame on the game.

- Store a single file in a "tuple?"
- frame(n+1) input features is the target from the frame(n)
- Add the (n+1) inputs features on the (n) line
- Use just lidar from 0 to 180 degress, remove the rest
- Concatenate to the dataset file

# Import log files

In [2]:
# Get all files from the dataset folder
files = glob.glob("./data/*.txt")
print("N logs: " + str(len(files)))

dataset = np.zeros([1,51])

sum = 0

 # Iterate in the files, 
for file in files:
    
    # Read and split the file
    data = np.array(list(csv.reader(open(file, "r"), delimiter=";", quoting=csv.QUOTE_NONNUMERIC)))
    
    # Insert 4 new columns
    data = np.concatenate((data, np.zeros((data.shape[0],4))), 
                           axis=1)
    
    # Iterate line by line to add the target value
    for i, line in enumerate(data):   
        
        # concatenate (n+1) input features to the (n) features
        if i < data.shape[0]-1:
            data[i][data.shape[1]-4:] = data[i+1][1:5]
        
    # Create a big dataset with all rec files
    dataset = np.concatenate((dataset, data), axis=0)

    sum += data.shape[0]
    

# Delete the declaration line (gambiarra)
dataset = np.delete(dataset, 0, 0)

N logs: 39


# Create a pandas dataframe

In [3]:
header = ["Rec_time",
          "Throttle",
          "Brake",
          "Steering",
          "Handbrake",
          "Speed",
          "Accel_x",
          "Accel_y",
          "Pos_x",
          "Pos_y",
          "lidar_0d",
          "lidar_10d",
          "lidar_20d",
          "lidar_30d",
          "lidar_40d",
          "lidar_50d",
          "lidar_60d",
          "lidar_70d",
          "lidar_80d",
          "lidar_90d",
          "lidar_100d",
          "lidar_110d",
          "lidar_120d",
          "lidar_130d",
          "lidar_140d",
          "lidar_150d",
          "lidar_160d",
          "lidar_170d",
          "lidar_180d",
          "lidar_190d",
          "lidar_200d",
          "lidar_210d",
          "lidar_220d",
          "lidar_230d",
          "lidar_240d",
          "lidar_250d",
          "lidar_260d",
          "lidar_270d",
          "lidar_280d",
          "lidar_290d",
          "lidar_300d",
          "lidar_310d",
          "lidar_320d",
          "lidar_330d",
          "lidar_340d",
          "lidar_350d",
          "lap_time",
          "T_throtle",
          "T_brake",
          "T_steering",
          "T_handbrake"]

# Create a pandas dataframe
df_dataset = pd.DataFrame(dataset, columns=header[:])
df_dataset.head(n=50)

Unnamed: 0,Rec_time,Throttle,Brake,Steering,Handbrake,Speed,Accel_x,Accel_y,Pos_x,Pos_y,lidar_0d,lidar_10d,lidar_20d,lidar_30d,lidar_40d,lidar_50d,lidar_60d,lidar_70d,lidar_80d,lidar_90d,lidar_100d,lidar_110d,lidar_120d,lidar_130d,lidar_140d,lidar_150d,lidar_160d,lidar_170d,lidar_180d,lidar_190d,lidar_200d,lidar_210d,lidar_220d,lidar_230d,lidar_240d,lidar_250d,lidar_260d,lidar_270d,lidar_280d,lidar_290d,lidar_300d,lidar_310d,lidar_320d,lidar_330d,lidar_340d,lidar_350d,lap_time,T_throtle,T_brake,T_steering,T_handbrake
0,0.002,0.725,0.0,0.5,0.0,0.975,0.0,0.0,-7.668,-60.099,0.025,0.023,0.023,0.023,0.023,0.025,0.028,0.032,0.04,0.055,0.092,0.253,1.0,1.0,0.926,0.592,0.445,0.363,0.316,0.285,0.268,0.26,0.26,0.268,0.286,0.024,0.018,0.015,0.013,0.012,0.011,0.011,0.011,0.012,0.012,0.014,0.0,0.725,0.0,0.5,0.0
1,0.336,0.725,0.0,0.5,0.0,0.707,0.0,0.0,-9.591,-61.343,0.527,0.823,1.0,0.391,0.157,0.1,0.079,0.068,0.061,0.057,0.055,0.055,0.057,0.061,0.067,0.077,0.095,0.128,0.197,0.405,1.0,1.0,0.903,0.559,0.414,0.336,0.289,0.26,0.243,0.235,0.234,0.24,0.255,0.28,0.323,0.394,0.0,0.0,0.0,0.234,0.0
2,0.344,0.0,0.0,0.234,0.0,0.392,0.0,0.0,-9.691,-61.402,0.448,0.641,1.0,1.0,0.228,0.125,0.089,0.075,0.066,0.061,0.058,0.057,0.058,0.06,0.065,0.074,0.087,0.112,0.16,0.272,0.778,1.0,1.0,0.682,0.47,0.367,0.308,0.271,0.249,0.237,0.232,0.235,0.245,0.265,0.298,0.352,0.0,0.0,0.0,0.234,0.0
3,0.347,0.0,0.0,0.234,0.0,0.395,0.0,0.0,-9.691,-61.402,0.448,0.641,1.0,1.0,0.228,0.125,0.089,0.075,0.066,0.061,0.058,0.057,0.058,0.06,0.065,0.074,0.087,0.112,0.16,0.272,0.778,1.0,1.0,0.682,0.47,0.367,0.308,0.271,0.249,0.237,0.232,0.235,0.245,0.265,0.298,0.352,0.0,0.0,0.0,0.234,0.0
4,0.352,0.0,0.0,0.234,0.0,0.395,0.0,0.0,-9.691,-61.402,0.448,0.641,1.0,1.0,0.228,0.125,0.089,0.075,0.066,0.061,0.058,0.057,0.058,0.06,0.065,0.074,0.087,0.112,0.16,0.272,0.778,1.0,1.0,0.682,0.47,0.367,0.308,0.271,0.249,0.237,0.232,0.235,0.245,0.265,0.298,0.352,0.0,0.0,0.0,0.234,0.0
5,0.358,0.0,0.0,0.234,0.0,0.395,0.0,0.0,-9.691,-61.402,0.448,0.641,1.0,1.0,0.228,0.125,0.089,0.075,0.066,0.061,0.058,0.057,0.058,0.06,0.065,0.074,0.087,0.112,0.16,0.272,0.778,1.0,1.0,0.682,0.47,0.367,0.308,0.271,0.249,0.237,0.232,0.235,0.245,0.265,0.298,0.352,0.0,0.0,0.0,0.234,0.0
6,0.363,0.0,0.0,0.234,0.0,0.395,0.0,0.0,-9.791,-61.454,0.393,0.528,0.831,1.0,0.393,0.163,0.104,0.083,0.071,0.064,0.06,0.058,0.058,0.06,0.064,0.071,0.082,0.101,0.137,0.209,0.438,1.0,1.0,0.876,0.547,0.406,0.331,0.285,0.257,0.24,0.232,0.232,0.238,0.253,0.279,0.321,0.0,0.0,0.0,0.234,0.0
7,0.369,0.0,0.0,0.234,0.0,0.398,0.0,0.0,-9.791,-61.454,0.393,0.528,0.831,1.0,0.393,0.163,0.104,0.083,0.071,0.064,0.06,0.058,0.058,0.06,0.064,0.071,0.082,0.101,0.137,0.209,0.438,1.0,1.0,0.876,0.547,0.406,0.331,0.285,0.257,0.24,0.232,0.232,0.238,0.253,0.279,0.321,0.0,0.0,0.0,0.234,0.0
8,0.374,0.0,0.0,0.234,0.0,0.398,0.0,0.0,-9.791,-61.454,0.393,0.528,0.831,1.0,0.393,0.163,0.104,0.083,0.071,0.064,0.06,0.058,0.058,0.06,0.064,0.071,0.082,0.101,0.137,0.209,0.438,1.0,1.0,0.876,0.547,0.406,0.331,0.285,0.257,0.24,0.232,0.232,0.238,0.253,0.279,0.321,0.0,0.0,0.0,0.234,0.0
9,0.379,0.0,0.0,0.234,0.0,0.398,0.0,0.0,-9.791,-61.454,0.393,0.528,0.831,1.0,0.393,0.163,0.104,0.083,0.071,0.064,0.06,0.058,0.058,0.06,0.064,0.071,0.082,0.101,0.137,0.209,0.438,1.0,1.0,0.876,0.547,0.406,0.331,0.285,0.257,0.24,0.232,0.232,0.238,0.253,0.279,0.321,0.0,0.0,0.0,0.234,0.0


# Delete some features 

- lidar from 180 to 350 degres. Just use lidar of front
- delete recording time, accelaration, position, and lap time

In [4]:
# Lidar table to be deleted
aux = df_dataset.iloc[:, 28:46]
aux.head()

Unnamed: 0,lidar_180d,lidar_190d,lidar_200d,lidar_210d,lidar_220d,lidar_230d,lidar_240d,lidar_250d,lidar_260d,lidar_270d,lidar_280d,lidar_290d,lidar_300d,lidar_310d,lidar_320d,lidar_330d,lidar_340d,lidar_350d
0,0.316,0.285,0.268,0.26,0.26,0.268,0.286,0.024,0.018,0.015,0.013,0.012,0.011,0.011,0.011,0.012,0.012,0.014
1,0.197,0.405,1.0,1.0,0.903,0.559,0.414,0.336,0.289,0.26,0.243,0.235,0.234,0.24,0.255,0.28,0.323,0.394
2,0.16,0.272,0.778,1.0,1.0,0.682,0.47,0.367,0.308,0.271,0.249,0.237,0.232,0.235,0.245,0.265,0.298,0.352
3,0.16,0.272,0.778,1.0,1.0,0.682,0.47,0.367,0.308,0.271,0.249,0.237,0.232,0.235,0.245,0.265,0.298,0.352
4,0.16,0.272,0.778,1.0,1.0,0.682,0.47,0.367,0.308,0.271,0.249,0.237,0.232,0.235,0.245,0.265,0.298,0.352


In [5]:
# Delete lidar features
#df_dataset = df_dataset.drop(df_dataset.columns[28:46], axis=1)
#df_dataset.head()


In [6]:
# Remove some columns
df_dataset = df_dataset.drop(['Rec_time',
                              'Accel_x',
                              'Accel_y',
                              'Pos_x',
                              'Pos_y',
                              'lap_time'], axis = 1)
df_dataset.head()

Unnamed: 0,Throttle,Brake,Steering,Handbrake,Speed,lidar_0d,lidar_10d,lidar_20d,lidar_30d,lidar_40d,lidar_50d,lidar_60d,lidar_70d,lidar_80d,lidar_90d,lidar_100d,lidar_110d,lidar_120d,lidar_130d,lidar_140d,lidar_150d,lidar_160d,lidar_170d,lidar_180d,lidar_190d,lidar_200d,lidar_210d,lidar_220d,lidar_230d,lidar_240d,lidar_250d,lidar_260d,lidar_270d,lidar_280d,lidar_290d,lidar_300d,lidar_310d,lidar_320d,lidar_330d,lidar_340d,lidar_350d,T_throtle,T_brake,T_steering,T_handbrake
0,0.725,0.0,0.5,0.0,0.975,0.025,0.023,0.023,0.023,0.023,0.025,0.028,0.032,0.04,0.055,0.092,0.253,1.0,1.0,0.926,0.592,0.445,0.363,0.316,0.285,0.268,0.26,0.26,0.268,0.286,0.024,0.018,0.015,0.013,0.012,0.011,0.011,0.011,0.012,0.012,0.014,0.725,0.0,0.5,0.0
1,0.725,0.0,0.5,0.0,0.707,0.527,0.823,1.0,0.391,0.157,0.1,0.079,0.068,0.061,0.057,0.055,0.055,0.057,0.061,0.067,0.077,0.095,0.128,0.197,0.405,1.0,1.0,0.903,0.559,0.414,0.336,0.289,0.26,0.243,0.235,0.234,0.24,0.255,0.28,0.323,0.394,0.0,0.0,0.234,0.0
2,0.0,0.0,0.234,0.0,0.392,0.448,0.641,1.0,1.0,0.228,0.125,0.089,0.075,0.066,0.061,0.058,0.057,0.058,0.06,0.065,0.074,0.087,0.112,0.16,0.272,0.778,1.0,1.0,0.682,0.47,0.367,0.308,0.271,0.249,0.237,0.232,0.235,0.245,0.265,0.298,0.352,0.0,0.0,0.234,0.0
3,0.0,0.0,0.234,0.0,0.395,0.448,0.641,1.0,1.0,0.228,0.125,0.089,0.075,0.066,0.061,0.058,0.057,0.058,0.06,0.065,0.074,0.087,0.112,0.16,0.272,0.778,1.0,1.0,0.682,0.47,0.367,0.308,0.271,0.249,0.237,0.232,0.235,0.245,0.265,0.298,0.352,0.0,0.0,0.234,0.0
4,0.0,0.0,0.234,0.0,0.395,0.448,0.641,1.0,1.0,0.228,0.125,0.089,0.075,0.066,0.061,0.058,0.057,0.058,0.06,0.065,0.074,0.087,0.112,0.16,0.272,0.778,1.0,1.0,0.682,0.47,0.367,0.308,0.271,0.249,0.237,0.232,0.235,0.245,0.265,0.298,0.352,0.0,0.0,0.234,0.0


In [7]:
# Remove inputs
df_dataset = df_dataset.drop(['Throttle',
                              'Brake',
                              'Steering',
                              'Handbrake'], axis = 1)

df_dataset.head()

Unnamed: 0,Speed,lidar_0d,lidar_10d,lidar_20d,lidar_30d,lidar_40d,lidar_50d,lidar_60d,lidar_70d,lidar_80d,lidar_90d,lidar_100d,lidar_110d,lidar_120d,lidar_130d,lidar_140d,lidar_150d,lidar_160d,lidar_170d,lidar_180d,lidar_190d,lidar_200d,lidar_210d,lidar_220d,lidar_230d,lidar_240d,lidar_250d,lidar_260d,lidar_270d,lidar_280d,lidar_290d,lidar_300d,lidar_310d,lidar_320d,lidar_330d,lidar_340d,lidar_350d,T_throtle,T_brake,T_steering,T_handbrake
0,0.975,0.025,0.023,0.023,0.023,0.023,0.025,0.028,0.032,0.04,0.055,0.092,0.253,1.0,1.0,0.926,0.592,0.445,0.363,0.316,0.285,0.268,0.26,0.26,0.268,0.286,0.024,0.018,0.015,0.013,0.012,0.011,0.011,0.011,0.012,0.012,0.014,0.725,0.0,0.5,0.0
1,0.707,0.527,0.823,1.0,0.391,0.157,0.1,0.079,0.068,0.061,0.057,0.055,0.055,0.057,0.061,0.067,0.077,0.095,0.128,0.197,0.405,1.0,1.0,0.903,0.559,0.414,0.336,0.289,0.26,0.243,0.235,0.234,0.24,0.255,0.28,0.323,0.394,0.0,0.0,0.234,0.0
2,0.392,0.448,0.641,1.0,1.0,0.228,0.125,0.089,0.075,0.066,0.061,0.058,0.057,0.058,0.06,0.065,0.074,0.087,0.112,0.16,0.272,0.778,1.0,1.0,0.682,0.47,0.367,0.308,0.271,0.249,0.237,0.232,0.235,0.245,0.265,0.298,0.352,0.0,0.0,0.234,0.0
3,0.395,0.448,0.641,1.0,1.0,0.228,0.125,0.089,0.075,0.066,0.061,0.058,0.057,0.058,0.06,0.065,0.074,0.087,0.112,0.16,0.272,0.778,1.0,1.0,0.682,0.47,0.367,0.308,0.271,0.249,0.237,0.232,0.235,0.245,0.265,0.298,0.352,0.0,0.0,0.234,0.0
4,0.395,0.448,0.641,1.0,1.0,0.228,0.125,0.089,0.075,0.066,0.061,0.058,0.057,0.058,0.06,0.065,0.074,0.087,0.112,0.16,0.272,0.778,1.0,1.0,0.682,0.47,0.367,0.308,0.271,0.249,0.237,0.232,0.235,0.245,0.265,0.298,0.352,0.0,0.0,0.234,0.0


In [8]:
df_dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 643382 entries, 0 to 643381
Data columns (total 41 columns):
Speed          643382 non-null float64
lidar_0d       643382 non-null float64
lidar_10d      643382 non-null float64
lidar_20d      643382 non-null float64
lidar_30d      643382 non-null float64
lidar_40d      643382 non-null float64
lidar_50d      643382 non-null float64
lidar_60d      643382 non-null float64
lidar_70d      643382 non-null float64
lidar_80d      643382 non-null float64
lidar_90d      643382 non-null float64
lidar_100d     643382 non-null float64
lidar_110d     643382 non-null float64
lidar_120d     643382 non-null float64
lidar_130d     643382 non-null float64
lidar_140d     643382 non-null float64
lidar_150d     643382 non-null float64
lidar_160d     643382 non-null float64
lidar_170d     643382 non-null float64
lidar_180d     643382 non-null float64
lidar_190d     643382 non-null float64
lidar_200d     643382 non-null float64
lidar_210d     643382 non-null 

# Save processed data to a CSV file

In [9]:
df_dataset.to_csv('./data/dataset.csv', sep=';', index=False, float_format='%.3f')