In [None]:
import pandas as pd
import numpy as np
from sklearn import preprocessing
import pickle
import matplotlib.pyplot as plt

In [None]:
import tensorflow as tf
tf.__version__

In [None]:
bus_id_encoder     = pickle.load(open("./out/bus_id_encoder.pickle"    , "rb"))
route_id_encoder   = pickle.load(open("./out/route_id_encoder.pickle"  , "rb"))
busstop_id_encoder = pickle.load(open("./out/busstop_id_encoder.pickle", "rb"))

In [None]:
dummy_buses = [325819008, 451509115]
print("bus ids", dummy_buses)
print("bus categorical ids", bus_id_encoder.transform(dummy_buses))

dummy_routes = [11100010, 11100012]
print("route ids", dummy_routes)
print("route categorical ids", route_id_encoder.transform(dummy_routes))

dummy_busstops = [388, 112]
print("busstop ids", dummy_busstops)
print("busstop categorical ids", busstop_id_encoder.transform(dummy_busstops))


In [None]:
dataset_df = pd.read_csv("./out/cleaned_train.csv")
dataset_df

In [None]:
split_index = 2900000 # about 10%
train_df    = dataset_df[:split_index]
test_df     = dataset_df[split_index:]
print(len(train_df))
print(len(test_df))

In [None]:
training_columns = [
    'DAY_OF_WEEK'     ,
    'HOUR_OF_DAY'     ,
    'MINUTE_OF_HOUR'  ,
    'SECOND_OF_MINUTE',
    'TIME'            ,
    'BUS_ID'          ,
    'BUSROUTE_ID'     ,
    'BUSSTOP_ID'      ,
    'ROUTE_TIME'      ,
    'SEQ_NUM'
]

In [None]:
train_input = train_df[training_columns]
train_label = train_df[['TIMESTAMP_DIFF']]

test_input = test_df[training_columns]
test_label = test_df[['TIMESTAMP_DIFF']]

In [None]:
training_dataset = (
    tf.data.Dataset.from_tensor_slices(
        (
            tf.cast(train_input.values, tf.float32),
            tf.cast(train_label.values, tf.float32)
        )
    )
)
testing_dataset = (
    tf.data.Dataset.from_tensor_slices(
        (
            tf.cast(test_input.values, tf.float32),
            tf.cast(test_label.values, tf.float32)
        )
    )
)

In [None]:
def get_angles(pos, i, d_model):
    angle_rates = 1 / np.power(10000, (2 * (i//2)) / np.float32(d_model))
    return pos * angle_rates

def positional_encoding(position, d_model):
    angle_rads = get_angles(np.arange(position)[:, np.newaxis],
                        np.arange(d_model)[np.newaxis, :],
                        d_model)
    # apply sin to even indices in the array; 2i
    angle_rads[:, 0::2] = np.sin(angle_rads[:, 0::2])
    # apply cos to odd indices in the array; 2i+1
    angle_rads[:, 1::2] = np.cos(angle_rads[:, 1::2])
    pos_encoding = angle_rads[np.newaxis, ...]
    return tf.cast(pos_encoding, dtype=tf.float32)

In [None]:
time_dim      = 151
total_seconds = 18*3600 # 18hours
pos_encoding  = positional_encoding(total_seconds, time_dim)
print(pos_encoding.shape)

plt.pcolormesh(pos_encoding[0], cmap='RdBu')
plt.xlabel('Depth')
plt.xlim((0, time_dim))
plt.ylabel('Position')
plt.colorbar()
plt.show()

In [None]:
class BusNet(tf.keras.Model):
    def __init__(self, **kwargs):
        super(BusNet, self).__init__(**kwargs)

        self.week          = [i for i in range(0, 7) ]
        self.hours         = [i for i in range(0, 24)]
        self.minutes       = [i for i in range(0, 60)]
        self.seconds       = [i for i in range(0, 60)]
        
        
        self.time_dim      = 151 # 7 days + 24 hours + 60 minutes + 60 seconds
        self.time_count    = 18*3600

        self.bus_dim       = 32
        self.bus_count     = 1077

        self.route_dim     = 32
        self.route_count   = 353

        self.busstop_dim   = 32
        self.busstop_count = 1357

        self.dropout_rate  = 0.1

        self.days_of_week_matrix      = tf.one_hot(self.week   , len(self.week)   )
        self.hours_of_day_matrix      = tf.one_hot(self.hours  , len(self.hours)  )
        self.minutes_of_hour_matrix   = tf.one_hot(self.minutes, len(self.minutes))
        self.seconds_of_minute_matrix = tf.one_hot(self.seconds, len(self.seconds))
        self.time_position_matrix     = positional_encoding(self.time_count, self.time_dim)
        
        self.day_embedding     = tf.keras.layers.Embedding(len(self.week)    , len(self.week)   , weights=[self.days_of_week_matrix      ], trainable=False)
        self.hour_embedding    = tf.keras.layers.Embedding(len(self.hours)   , len(self.hours)  , weights=[self.hours_of_day_matrix      ], trainable=False)
        self.minute_embedding  = tf.keras.layers.Embedding(len(self.minutes) , len(self.minutes), weights=[self.minutes_of_hour_matrix   ], trainable=False)
        self.second_embedding  = tf.keras.layers.Embedding(len(self.seconds) , len(self.seconds), weights=[self.seconds_of_minute_matrix ], trainable=False)
        self.time_embedding    = tf.keras.layers.Embedding(self.time_count   , self.time_dim    , weights=[self.time_position_matrix[0]  ], trainable=True )
        self.bus_embedding     = tf.keras.layers.Embedding(self.bus_count    , self.bus_dim     , embeddings_initializer='uniform')
        self.route_embedding   = tf.keras.layers.Embedding(self.route_count  , self.route_dim   , embeddings_initializer='uniform')
        self.busstop_embedding = tf.keras.layers.Embedding(self.busstop_count, self.busstop_dim , embeddings_initializer='uniform')

        self.layer_1      = tf.keras.layers.Dense(100, activation='relu')
        #self.dropout_1    = tf.keras.layers.Dropout(self.dropout_rate)
        self.layer_2      = tf.keras.layers.Dense(64 , activation='relu')
        self.dropout_2    = tf.keras.layers.Dropout(self.dropout_rate)
        self.layer_3      = tf.keras.layers.Dense(32 , activation='relu')
        self.dropout_3    = tf.keras.layers.Dropout(self.dropout_rate)
        self.output_layer = tf.keras.layers.Dense(1)

    def call(self, inputs, training=True):
        # 'DAY_OF_WEEK', 'HOUR_OF_DAY', 'MINUTE_OF_HOUR', 'SECOND_OF_MINUTE', 'TIME',
        # 'BUS_ID', 'BUSROUTE_ID', 'BUSSTOP_ID', 'ROUTE_TIME', 'SEQ_NUM'
        days_of_week      = inputs[:, 0]
        hours_of_day      = inputs[:, 1]
        minutes_of_hour   = inputs[:, 2]
        seconds_of_minute = inputs[:, 3]
        time_positions    = inputs[:, 4]
        bus_ids           = inputs[:, 5]
        route_ids         = inputs[:, 6]
        busstop_ids       = inputs[:, 7]
        route_times       = inputs[:, 8]
        seq_nums          = inputs[:, 9]

        day_vectors     = self.day_embedding    (tf.cast(days_of_week     , dtype=tf.int32))
        hour_vectors    = self.hour_embedding   (tf.cast(hours_of_day     , dtype=tf.int32))
        minute_vectors  = self.minute_embedding (tf.cast(minutes_of_hour  , dtype=tf.int32))
        second_vectors  = self.second_embedding (tf.cast(seconds_of_minute, dtype=tf.int32))
        time_vectors    = self.time_embedding   (tf.cast(time_positions   , dtype=tf.int32))
        
        bus_vectors     = self.bus_embedding    (tf.cast(bus_ids    , dtype=tf.int32))
        route_vectors   = self.route_embedding  (tf.cast(route_ids  , dtype=tf.int32))
        busstop_vectors = self.busstop_embedding(tf.cast(busstop_ids, dtype=tf.int32))
        
        temporal_features =  tf.math.add(
            tf.concat([day_vectors, hour_vectors, minute_vectors, second_vectors], 1),
            time_vectors
        )
        #print(temporal_features.shape)

        concatted_input = tf.concat([
            temporal_features, 
            bus_vectors      , 
            route_vectors    , 
            busstop_vectors
        ], 1)
        #print(concatted_input.shape)
        
        x = self.layer_1(concatted_input)
        #print(x.shape)
        x = tf.concat([
            x, 
            tf.reshape(route_times, [route_times.shape[0], 1]),
            tf.reshape(seq_nums   , [seq_nums.shape   [0], 1])
        ], 1) # scaling factor
        #print(x.shape)
        x = self.layer_2(x)
        x = self.dropout_2(x, training=training)
        x = self.layer_3(x)
        x = self.dropout_3(x, training=training)

        return self.output_layer(x)

In [None]:
t = tf.constant([
    [1, 2, 3],
    [2, 3, 4],
])
a = tf.constant([2, 3])
b = tf.constant([4, 5])
print(t.shape, a.shape, b.shape)
print(tf.reshape(a, [a.shape[0], 1]))

c = tf.concat([t, tf.reshape(a, [a.shape[0], 1]), tf.reshape(b, [b.shape[0], 1])], 1)
print(c)

In [None]:
#del model
model = BusNet()
model(np.array([train_input.iloc[0].values]))

In [None]:
loss_object    = tf.keras.losses.MeanSquaredError()
optimizer      = tf.keras.optimizers.Adam()

train_loss     = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.MeanAbsoluteError(name='train_accuracy')

test_loss      = tf.keras.metrics.Mean(name='test_loss')
test_accuracy  = tf.keras.metrics.MeanAbsoluteError(name='test_accuracy')

ckpt    = tf.train.Checkpoint(step=tf.Variable(1), optimizer=optimizer, net=model)
manager = tf.train.CheckpointManager(ckpt, './busnet_checkpoints', max_to_keep=3)

In [None]:
@tf.function
def train_step(input_batch, label_batch):
    with tf.GradientTape() as tape:
        predictions = model(input_batch, training=True)
        loss        = loss_object(label_batch, predictions)
    gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
    
    train_loss(loss)
    train_accuracy(label_batch, predictions)

In [None]:
@tf.function
def test_step(input_batch, label_batch):
    predictions = model(input_batch, training=False)
    t_loss      = loss_object(label_batch, predictions)
    test_loss(t_loss)
    test_accuracy(label_batch, predictions)

In [None]:
ckpt.restore(manager.latest_checkpoint)
if manager.latest_checkpoint:
    print("Restored from {}".format(manager.latest_checkpoint))
else:
    print("Initializing from scratch.")

In [None]:
epochs     = 15
batch_size = 256

for epoch in range(0, epochs):
    train_loss.reset_states()
    train_accuracy.reset_states()
    print("starting of epoch", epoch)
    for step, (t_input, t_target) in enumerate(training_dataset.batch(batch_size)):
        train_step(tf.cast(t_input, dtype=tf.float32), t_target)
        if step % 200 == 0:
            print("epoch:", epoch, "step:", step, "training loss", float(train_loss.result()), "training accuracy", float(train_accuracy.result()))

            ckpt.step.assign_add(1)
        if int(ckpt.step) % 400 == 0:
            save_path = manager.save()
            print("Saved checkpoint for step {}: {}".format(int(ckpt.step), save_path))
            
        if step % 2000 == 0:
            print("evaluating on test data...")
            test_loss.reset_states()
            test_accuracy.reset_states()
            for t_step, (t_input, t_target) in enumerate(testing_dataset.shuffle(buffer_size=250).batch(batch_size)):
                test_step(tf.cast(t_input, dtype=tf.float32), t_target)
                if t_step%40==0 and t_step!=0:
                    break
            print("test data result => testing loss", float(test_loss.result()), "testing accuracy", float(test_accuracy.result()))
            print("back to training...")
    print(epoch, "is done.")