In [1]:
import pandas as pd
from tsnn.data_utils import *
from tsnn.models import DeepRecurrent, LSTNet, DeepSense

Using TensorFlow backend.


In [2]:
# Filepath
path = "/Users/sofiene/Desktop/states_2017-08-07-00.csv"

# Prediction task
input_cols = ["time", "lat", "lon", "velocity", "heading"]
input_cols_with_planes = ["time", "icao24", "lat", "lon", "velocity", "heading"]
target_cols = ["lat", "lon"]
target_cols_with_planes = ["icao24", "lat", "lon"]
pred_delay = 1

# Hyperparameters
scaling_method = "maxabs"
timesteps = 30
batch_size = 64
sampling_step = 1

# Simple cross-validation
train_ratio = 0.6
val_ratio = 0.2

In [3]:
# Define generator function

def my_generator(inputs_with_planes, targets_with_planes, limits=None, samples_length=168, sampling_step=1, batch_size=24):

    if limits is None:
        limits = (0, len(targets_with_planes))

    inp_row = limits[0]
    tar_row = limits[0]
    inp_batch = []
    tar_batch = []

    while inp_row < limits[1]:
        inp = inputs_with_planes.iloc[inp_row:inp_row + samples_length]
        tar = targets_with_planes.iloc[tar_row]
        if (len(inp["icao24"].unique()) == 1) & (inp["icao24"].iloc[-1] == tar.iloc[0]):
            inp = inp.drop('icao24', axis=1).values
            tar = tar.drop('icao24').values
            inp_batch.append(inp)
            tar_batch.append(tar)

        if len(inp_batch) == batch_size or (inp_row + sampling_step) >= limits[1]:
            yield np.array(inp_batch), np.array(tar_batch)
            inp_batch = []
            tar_batch = []
        inp_row += sampling_step
        tar_row += sampling_step

        if inp_row >= limits[1]:
            inp_row = limits[0]
            tar_row = limits[0]

In [4]:
# Load and sort data
raw_data = pd.read_csv(path)
raw_data_all_planes = raw_data[input_cols_with_planes].sort_values(["icao24", "time"])
raw_data_all_planes.reset_index(inplace=True, drop=True)

# Handle NaNs
clean_data = raw_data_all_planes.ffill().bfill()

# Scale
scaled, stats_df = scaling(clean_data[input_cols], "maxabs")
scaled_with_planes = pd.concat([scaled[["time"]], 
                                clean_data[["icao24"]], 
                                scaled[["lat", "lon", "velocity", "heading"]]], axis=1)

# Inputs / Targets
inputs, targets = inputs_targets_split(scaled_with_planes, 
                                       input_cols_with_planes, 
                                       target_cols_with_planes, 
                                       timesteps, 
                                       pred_delay)

# Train / Val / Test split
train_idx, val_idx, test_idx = train_val_split(targets, train_ratio, val_ratio)

# Prepare generators
train_gen = my_generator(inputs, targets, train_idx, timesteps, sampling_step, batch_size)
train_gen_steps = compute_generator_steps(train_idx, sampling_step, batch_size)

val_gen = my_generator(inputs, targets, val_idx, timesteps, sampling_step, batch_size)
val_gen_steps = compute_generator_steps(val_idx, sampling_step, batch_size)

test_gen = my_generator(inputs, targets, test_idx, timesteps, sampling_step, batch_size)
test_gen_steps = compute_generator_steps(test_idx, sampling_step, batch_size)

# LSTNet

In [None]:
target_cols_indices = [1, 2] # indices in the scaled df (without icao24)

# Create model
lstnet = LSTNet(input_shape=(timesteps, len(input_cols)), 
                interest_vars=target_cols_indices,
                cnn_filters=100, 
                cnn_kernel_height=6, 
                cnn_activation='relu', 
                cnn_use_bias=True,
                gru_units=100, 
                gru_activation='relu', 
                gru_use_bias=True,
                gru_skip_units=5, 
                gru_skip_step=6, 
                gru_skip_activation='relu', 
                gru_skip_use_bias=True,
                ar_window=6, 
                ar_use_bias=True, 
                dropout=0.2)
lstnet.compile(loss='mae', optimizer='adam')

In [None]:
from keras_tqdm import TQDMNotebookCallback

# Train the model
lstnet.fit_generator(generator=train_gen, 
                     steps_per_epoch=train_gen_steps, 
                     validation_data=val_gen, 
                     validation_steps=val_gen_steps,
                     epochs=1, 
                     shuffle=False,
                     verbose=0,
                     callbacks=[TQDMNotebookCallback(leave_inner=True, leave_outer=True)])