In [1]:
import tensorflow as tf
import numpy as np
from sklearn.preprocessing import StandardScaler, MinMaxScaler
import pandas as pd
from sklearn.model_selection import train_test_split
import sqlite3
import pandas as pd
from pathlib import Path
import os
from matplotlib import pyplot as plt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout, LeakyReLU
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator

physical_devices = tf.config.list_physical_devices('GPU')
print(physical_devices[0])
tf.config.experimental.set_memory_growth(physical_devices[0], True)


PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')


In [2]:
def import_training_data():

    ''' imports training data as a pandas DataFrame '''

    dir = '../../SQL_Data/constant_setup'
    files = os.listdir(dir)
    files = [f for f in files if f.endswith('.sqlite3')]

    data = []
    for f in files:
        path = os.path.join(dir, f)
        conn = sqlite3.connect(path)
        if os.path.getsize(path) > 10000:
            cur = conn.cursor()
            cur.execute('SELECT * FROM TrainingData')
            df = pd.DataFrame(cur.fetchall())
            data.append(df)

    names = list(map(lambda x: x[0], cur.description))
    df = pd.concat(data)
    df.columns = names
    df = df.drop(['frameIdentifier','bestLapTime', 'pkt_id', 'packetId', 'SessionTime'], axis=1)
    df.set_index('index', inplace=True)

    print('Data Imported')

    return df

In [3]:
def format_data(data):
    '''

    seperates data first by session, then by lap, before padding each array so that
    they are all the same length for model input.
    Performs test train split also

    '''
    data.reset_index(drop=True, inplace=True)

    scalers = {}
#     sessionUIDs = data.pop('sessionUID')
#     lap_number = data.pop('currentLapNum')
#     for i in data.columns:
#         scaler = MinMaxScaler(feature_range=(0,1))
#         s = scaler.fit_transform(data[i].values.reshape(-1,1))
#         s = np.reshape(s, len(s))
#         scalers['scaler_'+ i ] = scaler
#         data[i] = s

#     data['sessionUID'] = sessionUIDs
#     data['currentLapNum'] = lap_number

    session_groups = data.groupby('sessionUID')
    training_data = []
    target_data = []
    total_laps = 0
    for s in list(session_groups.groups):
        session = session_groups.get_group(s)
        lap_groups = session.groupby('currentLapNum')
        total_laps += len(lap_groups)
        for l in list(lap_groups.groups):
            lap = lap_groups.get_group(l)
            lap = lap.drop(['sessionUID','currentLapNum'], axis=1)
            target_data.append(lap.pop('lap_time_remaining'))
            training_data.append(lap)
    training = [x.to_numpy() for x in training_data]
    target = [y.to_numpy() for y in target_data]
    print(f'Total Laps: {total_laps}')

    max_timesteps = 10000 # max(training, key=len).shape[0]
    num_rows_to_add = [max_timesteps-l.shape[0] for l in training]

    training_pad = []
    target_pad = []
    print(f'max timesteps : {max_timesteps}')

    for i in range(len(training)):
        rows_to_add = num_rows_to_add[i]

        training_arr = training[i]
        training_append = np.zeros((rows_to_add, training[0].shape[1]), dtype=float)
        training_array = np.vstack((training_arr, training_append))
        training_pad.append(training_array)

        target_arr = target[i]
        target_append = np.zeros((rows_to_add), dtype=float)
        target_array = np.concatenate([target_arr, np.zeros(rows_to_add)])
        target_pad.append(target_array)

    split = int(total_laps*0.9)

    X_train = training_pad[:split]
    X_test = training_pad[split:]
    y_train = target_pad[:split]
    y_test = target_pad[split:]

    X_train = np.array(X_train)
    X_test = np.array(X_test)
    y_test = np.array(y_test)
    y_train = np.array(y_train)

    print('Data Formatted')

    return X_train, X_test, y_train, y_test, scalers, num_rows_to_add

In [4]:
def pad_data(training, target):
    
    max_timesteps = 10000 # max(training, key=len).shape[0]
    num_rows_to_add = [max_timesteps-l.shape[0] for l in training]
    training_pad = []
    target_pad = []
    print(f'max timesteps : {max_timesteps}')
    
    for i in range(len(training)):
        rows_to_add = num_rows_to_add[i]

        training_arr = training[i]
        training_append = np.zeros((rows_to_add, training[0].shape[1]), dtype=float)
        training_array = np.vstack((training_arr, training_append))
        training_pad.append(training_array)

        target_arr = target[i].reshape(target[i].shape[0])
        target_append = np.zeros((rows_to_add), dtype=float)
        target_array = np.concatenate([target_arr, np.zeros(rows_to_add)])
        target_pad.append(target_array)
    
    return training_pad, target_pad

In [None]:
def scale_data(data):
    scalers = {}
    sessionUIDs = data.pop('sessionUID')
    lap_number = data.pop('currentLapNum')
    for i in data.columns:
        scaler = MinMaxScaler(feature_range=(0,1))
        s = scaler.fit_transform(data[i].values.reshape(-1,1))
        s = np.reshape(s, len(s))
        scalers['scaler_'+ i ] = scaler
        data[i] = s

    data['sessionUID'] = sessionUIDs
    data['currentLapNum'] = lap_number
    
    return data, scalers

In [5]:
def dataframe_format(data):
    data.reset_index(drop=True, inplace=True)
    data.drop('index', axis=1, inplace=True)
    
    session_groups = data.groupby('sessionUID')
    samples = []
    targets = []
    total_laps = 0
    
    for s in list(session_groups.groups):
        session = session_groups.get_group(s)
        lap_groups = session.groupby('currentLapNum')
        total_laps += len(lap_groups)
        for l in list(lap_groups.groups):
            lap = lap_groups.get_group(l)
            lap = lap.drop(['sessionUID'], axis=1)
            targ = pd.DataFrame(lap.pop('lap_time_remaining'))
            targets.append(targ)
            samples.append(lap)
    
    sample_cols = list(samples[0].columns)
    target_cols = list(targets[0].columns)

    training = [x.to_numpy() for x in samples]
    target = [y.to_numpy() for y in targets]
    
    training, target = pad_data(training, target)
    
    split = int(total_laps*0.9)

    X_train = training[:split]
    X_test  = training[split:]
    y_train = target[:split]
    y_test  = target[split:]
    
    
    Xtrain = np.concatenate(X_train)
    Xtest  = np.concatenate(X_test)
    Ytrain = np.concatenate(y_train)
    Ytest  = np.concatenate(y_test)
    
    trainX = pd.DataFrame(Xtrain, columns=sample_cols)
    testX  = pd.DataFrame(Xtest, columns=sample_cols)
    trainY = pd.DataFrame()
    testY  = pd.DataFrame()
    
    trainY['lap_time_remaining'] = Ytrain
    testY['lap_time_remaining']  = Ytest
    
    return trainX, testX, trainY, testY

In [6]:
def single_generator(trainX, testX, trainY, testY):
    ''' Creates train and test generators from data for a single lap/sequence'''

    look_back = 5
    batch_size = 1

    train_generator = tf.keras.preprocessing.sequence.TimeseriesGenerator(trainX, trainY, length=look_back, sampling_rate=1, stride=1, batch_size=batch_size)
    test_generator = tf.keras.preprocessing.sequence.TimeseriesGenerator(testX, testY, length=look_back, sampling_rate=1, stride=1, batch_size=1)

    return train_generator, test_generator

In [7]:
def concat_data(trainX, testX, trainY, testY):
    trainX, testX, trainY, testY = np.concatenate(trainX), np.concatenate(testX), np.concatenate(trainY), np.concatenate(testY)
    return trainX, testX, trainY, testY

In [8]:
def train_model(model, train_generator):
    ''' training the model'''
    EPOCHS = 1
    callback = [EarlyStopping(monitor="loss", min_delta = 0.0001, patience = 10, mode = 'auto', 
                restore_best_weights=True),]
                #ModelCheckpoint('generator_lstm.h5')]
    history = model.fit(train_generator, callbacks=callback, shuffle=False, epochs=EPOCHS, batch_size=1)
    return history, model

In [9]:
def build_model(train_generator):

    ''' buils model and prints out summary'''
    trainX, trainY = train_generator[0]

    learning_rate = 0.001
    units = 128
    epochs = 100
    from tensorflow.keras.models import Sequential
    from tensorflow.keras.layers import Dense, LSTM, Dropout, LeakyReLU
    from tensorflow.keras.callbacks import EarlyStopping

    model = Sequential()
    model.add(tf.keras.layers.Masking(mask_value=0., input_shape=(trainX.shape[1], trainX.shape[2])))
    model.add(LSTM(units, ))
    model.add(LeakyReLU(alpha=0.5))
    model.add(Dropout(0.1))
    model.add(Dense(1))

    adam = tf.keras.optimizers.Adam(lr=learning_rate)

    model.compile(optimizer=adam, loss='mse', metrics=['mae'])

    print(model.summary())

    print('Model Built')

    return model

In [10]:
class sequence_generator(TimeseriesGenerator):

    def __init__(self, data, targets, length,
                 sampling_rate=1,
                 stride=1,
                 start_index=0,
                 end_index=None,
                 shuffle=False,
                 reverse=False,
                 batch_size=128):

        if len(data) != len(targets):
            raise ValueError('Data and targets have to be' +
                             ' of same length. '
                             'Data length is {}'.format(len(data)) +
                             ' while target length is {}'.format(len(targets)))

        self.data = data
        self.targets = targets
        self.length = length
        self.sampling_rate = sampling_rate
        self.stride = stride
        self.start_index = start_index + length
        if end_index is None:
            end_index = len(data) - 1
        self.end_index = end_index
        self.shuffle = shuffle
        self.reverse = reverse
        self.batch_size = batch_size

        if self.start_index > self.end_index:
            raise ValueError('`start_index+length=%i > end_index=%i` '
                             'is disallowed, as no part of the sequence '
                             'would be left to be used as current step.'
                             % (self.start_index, self.end_index))

    def __getitem__(self, index):
        if self.shuffle:
            rows = np.random.randint(
                self.start_index, self.end_index + 1, size=self.batch_size)
        else:
            i = self.start_index + self.batch_size * self.stride * index
            rows = np.arange(i, min(i + self.batch_size *
                                    self.stride, self.end_index + 1), self.stride)

        samples = np.array([self.data[row - self.length:row:self.sampling_rate]
                            for row in rows])
        targets = np.array([self.targets[row] for row in rows])

        if self.reverse:
            return samples[:, ::-1, ...], targets

        samples, targets = self.check_overlap(samples, targets)

        return samples, targets

    def check_overlap(self, samples, targets):
        x = samples[0]
        lap_col = x[:,-1]
        start = lap_col[0]
        changes = [not i==start for i in lap_col]
        if True in changes:
            samples[:,:,:] = 0.
        return samples, targets


In [27]:
data = pd.read_csv('sample_data.csv')

train_X, test_X, train_Y, test_Y = dataframe_format(data)

trainX = train_X.to_numpy()
testX  = test_X.to_numpy()
trainY = train_Y.to_numpy()
testY  = test_Y.to_numpy()


train_generator, test_generator = single_generator(trainX, testX, trainY, testY)


max timesteps : 10000


In [28]:


# data = import_training_data()


# trainX, testX, trainY, testY, scalers, num_rows_to_add = format_data(data)

# train_X, test_X, train_Y, test_Y = concat_data(trainX, testX, trainY, testY)

# look_back = 5
# batch_size=1

# train_generator = sequence_generator(train_X, train_Y, length=look_back, sampling_rate=1, stride=1, batch_size=batch_size)

# test_generator = sequence_generator(test_X, test_Y, length=look_back, sampling_rate=1, stride=1, batch_size=batch_size)

# model = build_model(train_generator)


# history, model = train_model(model, train_generator)


(array([[[ 0.00000000e+00,  0.00000000e+00,  1.00000000e+00,
          1.03774345e+02,  8.24928284e+01,  7.00000000e+00,
          0.00000000e+00, -5.12259766e+02, -9.74773407e+00,
          3.99145050e+02, -3.69962363e-05,  1.71023747e-03,
         -6.66961714e-04,  2.52065134e+00,  1.39009953e-03,
          6.95634913e-03,  0.00000000e+00,  0.00000000e+00,
          0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
          0.00000000e+00,  3.80200000e+03,  0.00000000e+00,
          3.00000000e+01,  3.00000000e+01,  3.00000000e+01,
          3.00000000e+01,  8.40000000e+01,  8.40000000e+01,
          8.90000000e+01,  8.90000000e+01,  8.50000000e+01,
          1.00000000e+00,  6.00000000e+01,  5.47000008e+01,
          2.30414772e+00,  0.00000000e+00,  0.00000000e+00,
          0.00000000e+00,  0.00000000e+00,  1.80000000e+01,
          0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
          0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
        [ 0.00000000e+00,  0.00000000e

In [None]:
def make_predictions(model, test_generator, scalers):
    preds = model.predict(test_generator)
    return preds
 
pred = make_predictions(model, test_generator, scalers)


In [None]:
pred = pred.ravel()
testY = testY.ravel()
print(f'predictor shape {pred.shape}')
print(f'test shape {testY.shape}')
test_y = testY[:-5]
print(f'test shape new {test_y.shape}')


In [None]:
truth = test_y # scalers['scaler_lap_time_remaining'].inverse_transform(test_y.reshape(-1,1)).ravel()
print(truth.shape)
print(pred.shape)

In [None]:
i_prev = 0
err = pd.Series(truth-pred)
test_df = pd.DataFrame()
test_df['truth'] = truth
test_df['error'] = err
test_df['prediction'] = preds
print(test_df.info())




In [None]:
j_prev = 0
err = truth-pred
for j in range(trainX.shape[1], len(pred), trainX.shape[1]):
    plt.plot(err[j_prev:j])
    j_prev = j

In [None]:
gen = sequence_generator(train_X, train_Y, length=look_back, sampling_rate=1, stride=1, batch_size=batch_size)
count = 0
for i in range(len(gen)):
    x,y = gen[i]
    if np.all(x == 10.):
        count += 1
print(count)