# Imports

In [1]:
import tensorflow as tf

# Dataset
import numpy as np
import pandas as pd

# # Utils
# from datetime import datetime

# Constants

In [2]:
# Model
learning_rate = 5e-2
optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
loss = tf.keras.losses.Huber()
metrics = ["mae", "mse"]
epochs = 1000

# Data split
test_split = 0.1               # Float or int
valid_split = 0.25             # Float
train_split = 1 - valid_split  # Float

# Dataset window
steps_size = 40                     # minutes
predicts_size = 1             # minute(s)
window_size = steps_size + predicts_size
batch_size = 32
shuffle_buffer_size = 64

# Dataset frame
num_of_features = 6
num_of_labels = 2

# File
dataset_file = './dataset/user_1/user_1_data.csv'
model_1_file = './model/user_1/'
model_2_file = './model/...'

# Data

## Fetching

In [3]:
data = pd.read_csv(dataset_file)

data

Unnamed: 0,date,time,latitude,longitude
0,3/7/2022,0:00:00,-6.268917,106.780112
1,3/7/2022,0:01:00,-6.268917,106.780112
2,3/7/2022,0:02:00,-6.268917,106.780112
3,3/7/2022,0:03:00,-6.268917,106.780112
4,3/7/2022,0:04:00,-6.268917,106.780112
...,...,...,...,...
20155,3/20/2022,23:55:00,-6.268917,106.779552
20156,3/20/2022,23:56:00,-6.268917,106.779552
20157,3/20/2022,23:57:00,-6.268917,106.779552
20158,3/20/2022,23:58:00,-6.268917,106.779552


## Preprocessing

In [4]:
# Converting date string to datetime
data["date"] = pd.to_datetime(data["date"])
data["day_of_week"] = data["date"].dt.day_of_week
data["month"] = data["date"].dt.month
data["year"] = data["date"].dt.year

# Converting time to cumulative minute
# source: https://stackoverflow.com/questions/17951820/convert-hhmmss-to-minutes-using-python-pandas
# credit: Andy Hayden
data["time"] = data["time"].str.split(':').apply(lambda time: int(time[0]) * 60 + int(time[1]))

# Removing unused column
del data["date"]

# Rearrange column
data = data[["year", "month", "day_of_week", "time", "latitude", "longitude"]]

data

Unnamed: 0,year,month,day_of_week,time,latitude,longitude
0,2022,3,0,0,-6.268917,106.780112
1,2022,3,0,1,-6.268917,106.780112
2,2022,3,0,2,-6.268917,106.780112
3,2022,3,0,3,-6.268917,106.780112
4,2022,3,0,4,-6.268917,106.780112
...,...,...,...,...,...,...
20155,2022,3,6,1435,-6.268917,106.779552
20156,2022,3,6,1436,-6.268917,106.779552
20157,2022,3,6,1437,-6.268917,106.779552
20158,2022,3,6,1438,-6.268917,106.779552


## Splitting

In [5]:
def split_data(data, train_split, test_split):
    """Split data to train, valid, and test data"""
    # Split train_valid data and test data
    test_len = test_split
    if type(test_split)==float:
        test_len = int(test_len * len(data))
    train_val_data, test_data = data[:-test_len], data[-test_len:]
    
    # Split train data and valid data
    train_len = int(len(train_val_data) * train_split)
    train_data, valid_data = train_val_data[:train_len], train_val_data[train_len:]
    
    return train_data, valid_data, test_data

train_data, valid_data, test_data = split_data(data, train_split, test_split)

print("Dataset Shape")
print(f'Train : {train_data.shape}')
print(f'Valid : {valid_data.shape}')
print(f'Test  : {test_data.shape}')

Dataset Shape
Train : (13608, 6)
Valid : (4536, 6)
Test  : (2016, 6)


## Windowing

In [6]:
def windowed_dataset(data, steps_size, predicts_size, batch_size, shuffle_buffer):
    """Create windowed dataset"""
    # Converting to tfds
    wds = tf.data.Dataset.from_tensor_slices(data)
    
    # Data shifting
    wds = wds.window(steps_size+predicts_size, shift=predicts_size, drop_remainder=True)
    
    # Flatten windows
    wds = wds.flat_map(lambda window : window.batch(steps_size+predicts_size))
    
    # Create window tuples
    wds = wds.map(lambda window: (window[:-predicts_size], window[-predicts_size:, -num_of_labels:]))

    # Shuffle windows
    wds = wds.shuffle(shuffle_buffer)
    
    # Batch windows
    wds = wds.batch(batch_size).prefetch(1)
    
    return wds

wds = windowed_dataset(data, steps_size, predicts_size, batch_size, shuffle_buffer_size)
for idx,(x,y) in enumerate(wds):
    print("x = ", x.numpy().shape)
    print("y = ", y.numpy().shape)
    break

x =  (32, 40, 6)
y =  (32, 1, 2)


In [7]:
train_wds = windowed_dataset(train_data, steps_size, predicts_size, batch_size, shuffle_buffer_size)
for idx,(x,y) in enumerate(train_wds):
    print("x = ", x.numpy().shape)
    print("y = ", y.numpy().shape)
    break
print(train_wds.element_spec)

valid_wds = windowed_dataset(valid_data, steps_size, predicts_size, batch_size, shuffle_buffer_size)
test_wds = windowed_dataset(test_data, steps_size, predicts_size, batch_size, shuffle_buffer_size)
    

x =  (32, 40, 6)
y =  (32, 1, 2)
(TensorSpec(shape=(None, None, 6), dtype=tf.float64, name=None), TensorSpec(shape=(None, None, 2), dtype=tf.float64, name=None))


# Model

In [8]:
def create_model():
    """Create Forecasting Model
    Model used: LSTM
    output should consist of 2 item, latitude and longitude
    """
    tf.keras.backend.clear_session()
    # Generating model
    model = tf.keras.models.Sequential([
        tf.keras.layers.LSTM(32, activation='relu', input_shape=(steps_size, num_of_features), return_sequences=True),
        tf.keras.layers.LSTM(16, activation='relu'),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(16, activation='linear'),
        tf.keras.layers.Dense(8, activation='sigmoid'),
        tf.keras.layers.Dense(num_of_labels, activation='linear')
    ])

    # Compiling model
    model.compile(
        loss=loss,
        optimizer=optimizer,
        metrics=metrics,
    )
    
    return model
model = create_model()
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 40, 32)            4992      
                                                                 
 lstm_1 (LSTM)               (None, 16)                3136      
                                                                 
 flatten (Flatten)           (None, 16)                0         
                                                                 
 dense (Dense)               (None, 16)                272       
                                                                 
 dense_1 (Dense)             (None, 8)                 136       
                                                                 
 dense_2 (Dense)             (None, 2)                 18        
                                                                 
Total params: 8,554
Trainable params: 8,554
Non-trainabl

In [9]:
model.fit(train_wds, epochs=20, validation_data=valid_wds)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x20b280951c0>

In [10]:
model.evaluate(test_wds)



[0.02281029522418976, 0.05797765776515007, 0.42852047085762024]

In [11]:
test_wds

<PrefetchDataset element_spec=(TensorSpec(shape=(None, None, 6), dtype=tf.float64, name=None), TensorSpec(shape=(None, None, 2), dtype=tf.float64, name=None))>

# References

- [Sequences, Time Series and Prediction by DeepLearning.AI](https://www.coursera.org/learn/tensorflow-sequences-time-series-and-prediction)
- [Multi-Variate Time Series Forecasting Tensorflow by Nicholas Jhana](https://www.kaggle.com/code/nicholasjhana/multi-variate-time-series-forecasting-tensorflow/notebook#Visualizing-Predictions)