<font size="+0.5">Notebook for transform data format to train the model<font>

In [None]:
import numpy as np
import pandas as pd
import os

from datetime import datetime

In [None]:
# Timestamp form in init data have this format
form = "%d/%m/%Y %H:%M:%S"

In [None]:
# Dictionary to replace events
replacement = {"curva_direita_agressiva": "Aggressive right turn",
               "curva_esquerda_agressiva": "Aggressive left turn",
               "evento_nao_agressivo": "Non-aggressive event",
               "troca_faixa_direita_agressiva": "Aggressive right lane change",
               "aceleracao_agressiva": "Aggressive acceleration",
               "freada_agressiva": "Aggressive breaking",
               "troca_faixa_esquerda_agressiva": "Aggressive left lane change",
               "No label": "No label"}

In [None]:
# Function to replace events
def replace_event(row):
    return replacement[row['event']]

In [None]:
# Load and concatenate accelerometer data with its events
def make_labeled_data(folder_num):
    # Load events and its time
    data_label = pd.read_csv(os.path.join('data', 'data_init', str(folder_num), 'groundTruth.csv'))
    # Load accelerometer and gyroscope data
    data_acc = pd.read_csv(os.path.join('data', 'data_init', str(folder_num), 'aceleracaoLinear_terra.csv'))
    data_gyro = pd.read_csv(os.path.join('data', 'data_init', str(folder_num), 'giroscopio_terra.csv'))
    
    data = data_acc.copy()
    data = data.rename(columns={"x": "x_accelerometer", "y": "y_accelerometer", "z": "z_accelerometer"})
    data['x_gyroscope'] = data_gyro["x"]
    data['y_gyroscope'] = data_gyro["y"]
    data['z_gyroscope'] = data_gyro["z"]
    
    # Take first time as start of the trip
    init = datetime.strptime(data.loc[0]['timestamp'], form)
    
    # Function for changing time on its duration of the time by this trip to this record
    def change_timestamp(row):
        return (datetime.strptime(row['timestamp'], form) - init).seconds
    
    
    
    data['time_duration'] = data.apply(change_timestamp, axis=1)
    
    for index, row in data_label.iterrows():
        start = row[' inicio']
        finish = row[' fim']
        data.loc[((data['time_duration'] >= start) & (data['time_duration'] < finish)), 'event'] = row['evento']
    
    data['event'] = data['event'].fillna("No label")
    data['event'] = data.apply(replace_event, axis=1)
    
    return data

In [None]:
# Function for creating sequence of events in one dataframe
# Each event has its own number if it is on different time interval
def create_events_sequence(data):
    event_num = 1
    event = data.iloc[0]["event"]
    sequence = []
    
    for index, row in data.iterrows():
        if row["event"] != event:
            event_num += 1
            event = data.loc[index, "event"]
        sequence.append(event_num)
    return sequence

In [None]:
# Function for adding new events to the dictionary of events
def add_events_to_dict(data, dictionary):
    # Create events sequence in this dataframe
    data["event_number"] = create_events_sequence(data)
    # Select only labeled data
    data = data[data["event"] != "No label"]
    # Group data by unique number of event
    data_groupbed = data.groupby("event_number")
    
    # For each unique event number
    for group in np.unique(data["event_number"].values):
        current_group = data_groupbed.get_group(group)
        event_name = current_group["event"].values[0]
        # If dictionary has this event name add dataframe to the list
        # Otherwise create list with this dataframe
        if dictionary.get(event_name):
            dictionary[event_name].append(current_group)
        else:
            dictionary[event_name] = [current_group]
    # Return updated dictionary
    return dictionary

In [None]:
data1 = make_labeled_data(16)
data2 = make_labeled_data(17)
data3 = make_labeled_data(20)
data4 = make_labeled_data(21)

In [None]:
# Dictionary for storing parts of dataframe by its event
event_dict = {}

In [None]:
event_dict = add_events_to_dict(data1, event_dict)
event_dict = add_events_to_dict(data2, event_dict)
event_dict = add_events_to_dict(data3, event_dict)
event_dict = add_events_to_dict(data4, event_dict)

In [None]:
train_agg_br = pd.concat([event_dict["Aggressive breaking"][i] for i in [0, 2, 3, 4, 5, 6, 8, 9, 11, 7]])
val_agg_br = pd.concat([event_dict["Aggressive breaking"][i] for i in [1, 10]])

train_agg_ac = pd.concat([event_dict["Aggressive acceleration"][i] for i in [0, 2, 3, 4, 5, 6, 8, 9, 11, 7]])
val_agg_ac = pd.concat([event_dict["Aggressive acceleration"][i] for i in [1, 10]])

train_agg_lt = pd.concat([event_dict["Aggressive left turn"][i] for i in [0, 2, 3, 4, 5, 6, 8, 9, 7]])
val_agg_lt = pd.concat([event_dict["Aggressive left turn"][i] for i in [1, 10]])

train_agg_rt = pd.concat([event_dict["Aggressive right turn"][i] for i in [0, 2, 3, 4, 5, 6, 8, 9, 7]])
val_agg_rt = pd.concat([event_dict["Aggressive right turn"][i] for i in [1, 10]])

train_agg_lc = pd.concat([event_dict["Aggressive left lane change"][i] for i in [0, 2, 3]])
val_agg_lc = pd.concat([event_dict["Aggressive left lane change"][i] for i in [1]])

train_agg_rc = pd.concat([event_dict["Aggressive right lane change"][i] for i in [0, 2, 4, 3]])
val_agg_rc = pd.concat([event_dict["Aggressive right lane change"][i] for i in [1]])

train_agg_na = pd.concat([event_dict["Non-aggressive event"][i] for i in [0, 2, 3, 4, 5, 6, 8, 9, 11, 13, 10]])
val_agg_na = pd.concat([event_dict["Non-aggressive event"][i] for i in [1, 7, 12]])

In [None]:
train = pd.concat([train_agg_br, train_agg_ac, train_agg_lt, train_agg_rt, train_agg_lc, train_agg_rc, train_agg_na])
val = pd.concat([val_agg_br, val_agg_ac, val_agg_lt, val_agg_rt, val_agg_lc, val_agg_rc, val_agg_na])

In [None]:
columns_to_save = ['x_accelerometer', 'y_accelerometer', 'z_accelerometer', 'x_gyroscope', "y_gyroscope", 'z_gyroscope', 'event']

In [None]:
train[columns_to_save].to_csv('data/train_accelerometer_gyroscope.csv', index=False)

In [None]:
val[columns_to_save].to_csv('data/val_accelerometer_gyroscope.csv', index=False)