# Setup

In [None]:
import pandas as pd
import numpy as np
import time
import pickle

import warnings
warnings.filterwarnings('ignore')

In [None]:
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.layers import *
from sklearn.metrics import *

In [None]:
from tensorflow.keras import backend as K
import gc

In [None]:
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        tf.config.experimental.set_memory_growth(gpus[0], True)
    except RuntimeError as e:
        print(e)

In [None]:
df = pd.read_csv(f'data/data_total.csv', index_col=0)

In [None]:
ycols = [
    x+str(i) for i in range(5) for x in ['speed', 'stop', 'timeloss', 'travel', 'wait']
]

ycol = [
    df.columns[df.columns.str.contains('wait')],
    df.columns[df.columns.str.contains('timeloss')],
    df.columns[df.columns.str.contains('travel')],
    df.columns[df.columns.str.contains('speed')],
    df.columns[df.columns.str.contains('stop')],
]

In [None]:
y = [
    df[c].mean(1) for c in ycol
]

In [None]:
p = pd.read_csv('preprocessed/preprocessed_p.csv', index_col=0)
q = pd.read_csv('preprocessed/preprocessed_q.csv', index_col=0)
var = pd.read_csv('preprocessed/preprocessed_var.csv', index_col=0)

In [None]:
p.columns

In [None]:
q.columns

In [None]:
var.columns

In [None]:
strat = df['id']

In [None]:
X = pd.concat([var, p, q], 1)
X_wo_p = pd.concat([var, q], 1)
X_wo_q = pd.concat([var, p], 1)
X_wo_pq = pd.concat([var], 1)

In [None]:
y = np.stack(y, 1)

In [None]:
y[:,:3] /= 300
y[:, 3] = (y[:, 3]-3)/6
y[:, 4] /= 3

In [None]:
y1 = y[:,:1]

# Models

In [None]:
def get_simple_MLP_y1_model():
    i_ = Input((X.shape[1], ))
    h = Dense(256, activation='swish')(i_)
    h = Dense(128, activation='swish')(h)
    h = Dense(64, activation='swish')(h)
    o_ = Dense(1)(h)

    return tf.keras.models.Model(i_, o_)

"""
# Gora, P., & Bardoński, M. (2017, June). 
# Training neural networks to approximate traffic simulation outcomes. 
# In 2017 5th IEEE International Conference on Models and Technologies for Intelligent Transportation Systems (MT-ITS) (pp. 889-894). IEEE.
"""
def get_Gora_and_Bardonski_model(dropout_rate=0.05):
    i_ = Input((X.shape[1], ))
    h = Dense(100, activation='relu')(i_)
    h = Dropout(dropout_rate)(h)
    h = Dense(200, activation='relu')(h)
    h = Dropout(dropout_rate)(h)
    h = Dense(100, activation='relu')(h)
    h = Dropout(dropout_rate)(h)
    o_ = Dense(1)(h)

    return tf.keras.models.Model(i_, o_)


def get_simple_MLP_y5_model():
    i_ = Input((X.shape[1], ))
    h = Dense(256, activation='swish')(i_)
    h = Dense(128, activation='swish')(h)
    h = Dense(64, activation='swish')(h)
    o_ = Dense(5)(h)

    return tf.keras.models.Model(i_, o_)


def get_shared_bottom_model(num_tasks=5, emb_dim=64):
    i_ = Input((X.shape[1], ))
    h = Dense(256, activation='swish')(i_)
    h = Dense(128, activation='swish')(h)
    shared_bottom_out = Dense(emb_dim, activation='swish')(h)

    task_outs = []
    for _ in range(num_tasks):
        out_layer = build_layer(emb_dim, (64, ), 1)
        out = out_layer(shared_bottom_out)
        task_outs.append(out)
    
    task_outs = tf.concat(task_outs, 1)
    return tf.keras.models.Model(i_, task_outs)


def get_MMoE_model(num_experts=3, num_tasks=5, emb_dim=64):
    in_dim = X.shape[1]
    i_ = Input((X.shape[1], ))
    
    experts = []
    for _ in range(num_experts):
        exp = build_layer(in_dim, (256, 128), emb_dim)
        experts.append(exp(i_))
        
    expert_concat = tf.keras.layers.Lambda(lambda x: tf.stack(x, axis=1))(experts)
    
    mmoe_outs = []
    for _ in range(num_tasks):
        g_layer = build_layer(in_dim, (128, ), emb_dim)
        g_in = g_layer(i_)
        g_out = tf.keras.layers.Dense(num_experts, use_bias=False, activation='softmax')(g_in)
        g_out = tf.keras.layers.Lambda(lambda x: tf.expand_dims(x, axis=-1))(g_out)
        
        g_mul_out = tf.keras.layers.Lambda(lambda x: tf.reduce_sum(x[0] * x[1], axis=1, keepdims=False))([expert_concat, g_out])
        
        mmoe_outs.append(g_mul_out)
        
    task_outs = []
    for mmoe_out in mmoe_outs:
        out_layer = build_layer(emb_dim, (64, ), 1)
        out = out_layer(mmoe_out)
        task_outs.append(out)
        
    task_outs = tf.concat(task_outs, 1)
    return tf.keras.models.Model(i_, task_outs)


def get_OMoE_model(num_experts=3, num_tasks=5, emb_dim=64):
    in_dim = X.shape[1]
    i_ = Input((X.shape[1], ))
    
    experts = []
    for _ in range(num_experts):
        exp = build_layer(in_dim, (256, 128), emb_dim)
        experts.append(exp(i_))
        
    expert_concat = tf.keras.layers.Lambda(lambda x: tf.stack(x, axis=1))(experts)
    
    g_layer = build_layer(in_dim, (128, ), emb_dim)
    g_in = g_layer(i_)
    g_out = tf.keras.layers.Dense(num_experts, use_bias=False, activation='softmax')(g_in)
    g_out = tf.keras.layers.Lambda(lambda x: tf.expand_dims(x, axis=-1))(g_out)
    
    g_mul_out = tf.keras.layers.Lambda(lambda x: tf.reduce_sum(x[0] * x[1], axis=1, keepdims=False))([expert_concat, g_out])
    
    task_outs = []
    for _ in range(num_tasks):
        out_layer = build_layer(emb_dim, (64, ), 1)
        out = out_layer(g_mul_out)
        task_outs.append(out)
        
    task_outs = tf.concat(task_outs, 1)
    return tf.keras.models.Model(i_, task_outs)


def get_MMoE_wo_q_model(num_experts=3, num_tasks=5, emb_dim=64):
    in_dim = X_wo_q.shape[1]
    i_ = Input((X_wo_q.shape[1], ))
    
    experts = []
    for _ in range(num_experts):
        exp = build_layer(in_dim, (256, 128), emb_dim)
        experts.append(exp(i_))
        
    expert_concat = tf.keras.layers.Lambda(lambda x: tf.stack(x, axis=1))(experts)
    
    mmoe_outs = []
    for _ in range(num_tasks):
        g_layer = build_layer(in_dim, (128, ), emb_dim)
        g_in = g_layer(i_)
        g_out = tf.keras.layers.Dense(num_experts, use_bias=False, activation='softmax')(g_in)
        g_out = tf.keras.layers.Lambda(lambda x: tf.expand_dims(x, axis=-1))(g_out)
        
        g_mul_out = tf.keras.layers.Lambda(lambda x: tf.reduce_sum(x[0] * x[1], axis=1, keepdims=False))([expert_concat, g_out])
        
        mmoe_outs.append(g_mul_out)
        
    task_outs = []
    for mmoe_out in mmoe_outs:
        out_layer = build_layer(emb_dim, (64, ), 1)
        out = out_layer(mmoe_out)
        task_outs.append(out)
        
    task_outs = tf.concat(task_outs, 1)
    return tf.keras.models.Model(i_, task_outs)


def get_MMoE_wo_p_model(num_experts=3, num_tasks=5, emb_dim=64):
    in_dim = X_wo_p.shape[1]
    i_ = Input((X_wo_p.shape[1], ))
    
    experts = []
    for _ in range(num_experts):
        exp = build_layer(in_dim, (256, 128), emb_dim)
        experts.append(exp(i_))
        
    expert_concat = tf.keras.layers.Lambda(lambda x: tf.stack(x, axis=1))(experts)
    
    mmoe_outs = []
    for _ in range(num_tasks):
        g_layer = build_layer(in_dim, (128, ), emb_dim)
        g_in = g_layer(i_)
        g_out = tf.keras.layers.Dense(num_experts, use_bias=False, activation='softmax')(g_in)
        g_out = tf.keras.layers.Lambda(lambda x: tf.expand_dims(x, axis=-1))(g_out)
        
        g_mul_out = tf.keras.layers.Lambda(lambda x: tf.reduce_sum(x[0] * x[1], axis=1, keepdims=False))([expert_concat, g_out])
        
        mmoe_outs.append(g_mul_out)
        
    task_outs = []
    for mmoe_out in mmoe_outs:
        out_layer = build_layer(emb_dim, (64, ), 1)
        out = out_layer(mmoe_out)
        task_outs.append(out)
        
    task_outs = tf.concat(task_outs, 1)
    return tf.keras.models.Model(i_, task_outs)


def get_MMoE_wo_pq_model(num_experts=3, num_tasks=5, emb_dim=64):
    in_dim = X_wo_pq.shape[1]
    i_ = Input((X_wo_pq.shape[1], ))
    
    experts = []
    for _ in range(num_experts):
        exp = build_layer(in_dim, (256, 128), emb_dim)
        experts.append(exp(i_))
        
    expert_concat = tf.keras.layers.Lambda(lambda x: tf.stack(x, axis=1))(experts)
    
    mmoe_outs = []
    for _ in range(num_tasks):
        g_layer = build_layer(in_dim, (128, ), emb_dim)
        g_in = g_layer(i_)
        g_out = tf.keras.layers.Dense(num_experts, use_bias=False, activation='softmax')(g_in)
        g_out = tf.keras.layers.Lambda(lambda x: tf.expand_dims(x, axis=-1))(g_out)
        
        g_mul_out = tf.keras.layers.Lambda(lambda x: tf.reduce_sum(x[0] * x[1], axis=1, keepdims=False))([expert_concat, g_out])
        
        mmoe_outs.append(g_mul_out)
        
    task_outs = []
    for mmoe_out in mmoe_outs:
        out_layer = build_layer(emb_dim, (64, ), 1)
        out = out_layer(mmoe_out)
        task_outs.append(out)
        
    task_outs = tf.concat(task_outs, 1)
    return tf.keras.models.Model(i_, task_outs)


def build_layer(in_dim, h_dim, out_dim):
    i_ = Input((in_dim, ))
    h = i_
    for d in h_dim:
       h = Dense(d, activation='swish')(h)
    
    o = Dense(out_dim)(h)
    model = tf.keras.models.Model(i_, o)
    return model

In [None]:
tf.__version__

# Train models

In [None]:
import csv

with open('data/test_networks.csv', 'r', newline='') as myfile:
     wr = csv.reader(myfile, quoting=csv.QUOTE_ALL)
     test_data = list(wr)[0]

In [None]:
from tqdm import tqdm

val_id = list(tqdm(test_data))

In [None]:
tr_X = X[~strat.isin(val_id)]
tr_X_wo_p = X_wo_p[~strat.isin(val_id)] 
tr_X_wo_q = X_wo_q[~strat.isin(val_id)] 
tr_X_wo_pq = X_wo_pq[~strat.isin(val_id)] 
tr_y = y[~strat.isin(val_id)]
tr_y1 = y1[~strat.isin(val_id)]

val_X = X[strat.isin(val_id)]
val_X_wo_p = X_wo_p[strat.isin(val_id)]
val_X_wo_q = X_wo_q[strat.isin(val_id)]
val_X_wo_pq = X_wo_pq[strat.isin(val_id)]
val_y = y[strat.isin(val_id)]
val_y1 = y1[strat.isin(val_id)]

## Single-output Regression

In [None]:
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.compose import ColumnTransformer

In [None]:
poly = ColumnTransformer(
    transformers=[
        ('transformer', PolynomialFeatures(degree=2, include_bias=False), ['p1', 'p2', 'p3', 'p4', 'total_len']),
    ],
    remainder='passthrough'
)

tr_X_poly = poly.fit_transform(tr_X)

In [None]:
lin_reg = LinearRegression()

start_time = time.time()
lin_reg.fit(tr_X_poly, tr_y1)
end_time = time.time()

print(f'Elapsed time is {end_time - start_time} seconds')

In [None]:
with open('model_poly_reg/model.pkl','wb') as f:
    pickle.dump(lin_reg,f)

## Single-output MLP

In [None]:
tf.random.set_seed(0)
tf.keras.utils.set_random_seed(0)

es = tf.keras.callbacks.EarlyStopping(patience=15, restore_best_weights=True)
ld = tf.keras.callbacks.ReduceLROnPlateau(factor=0.5, min_lr=5e-5)

model = get_simple_MLP_y1_model()

start_time = time.time()

model.compile(loss='mape', optimizer=tf.keras.optimizers.Adam(1e-3))

history = model.fit(tr_X, tr_y1,
         epochs=1500,
         batch_size=1024,
         validation_split=0.15,
         callbacks=[es, ld],
         shuffle=True,
         )

end_time = time.time()

print(f'Elapsed time is {end_time - start_time} seconds')

In [None]:
model.save("model_simple_MLP_y1")

with open('model_simple_MLP_y1/historyDict', 'wb') as f:
        pickle.dump(history.history, f)

K.clear_session()
del model
gc.collect()

## Single-output MLP (Pawel Gora and Marek Bardonski, 2017)

In [None]:
tf.random.set_seed(0)
tf.keras.utils.set_random_seed(0)

es = tf.keras.callbacks.EarlyStopping(patience=1000, restore_best_weights=True)

model = get_Gora_and_Bardonski_model(dropout_rate=0.05)

start_time = time.time()

model.compile(loss='mse', optimizer=tf.keras.optimizers.Adam(1e-3))

history = model.fit(tr_X, tr_y1,
         epochs=1000000,
         batch_size=10240,
         validation_split=0.15,
         callbacks=[es],
         shuffle=True,
         )

end_time = time.time()

print(f'Elapsed time is {end_time - start_time} seconds')

In [None]:
model.save("model_Gora_and_Bardonski")

with open('model_Gora_and_Bardonski/historyDict', 'wb') as f:
    pickle.dump(history.history, f)

K.clear_session()
del model
gc.collect()

## Multi-output MLP

In [None]:
tf.random.set_seed(0)
tf.keras.utils.set_random_seed(0)

es = tf.keras.callbacks.EarlyStopping(patience=15, restore_best_weights=True)
ld = tf.keras.callbacks.ReduceLROnPlateau(factor=0.5, min_lr=5e-5)

model = get_simple_MLP_y5_model()

start_time = time.time()

model.compile(loss='mape', optimizer=tf.keras.optimizers.Adam(1e-3))

history = model.fit(tr_X, tr_y,
         epochs=1500,
         batch_size=1024,
         validation_split=0.15,
         callbacks=[es, ld],
         shuffle=True,
         )

end_time = time.time()

print(f'Elapsed time is {end_time - start_time} seconds')

In [None]:
model.save("model_simple_MLP_y5")

with open('model_simple_MLP_y5/historyDict', 'wb') as f:
    pickle.dump(history.history, f)

K.clear_session()
del model
gc.collect()

## Multi-output Shared Bottom

In [None]:
tf.random.set_seed(0)
tf.keras.utils.set_random_seed(0)

es = tf.keras.callbacks.EarlyStopping(patience=15, restore_best_weights=True)
ld = tf.keras.callbacks.ReduceLROnPlateau(factor=0.5, min_lr=5e-5)

model = get_shared_bottom_model()

start_time = time.time()

model.compile(loss='mape', optimizer=tf.keras.optimizers.Adam(1e-3))

history = model.fit(tr_X, tr_y,
         epochs=1500,
         batch_size=1024,
         validation_split=0.15,
         callbacks=[es, ld],
         shuffle=True,
         )

end_time = time.time()

print(f'Elapsed time is {end_time - start_time} seconds')

In [None]:
model.save("model_shared_bottom")

with open('model_shared_bottom/historyDict', 'wb') as f:
    pickle.dump(history.history, f)

K.clear_session()
del model
gc.collect()

## Multi-output OMoE

In [None]:
tf.random.set_seed(1)
tf.keras.utils.set_random_seed(1)

es = tf.keras.callbacks.EarlyStopping(patience=15, restore_best_weights=True)
ld = tf.keras.callbacks.ReduceLROnPlateau(factor=0.5, min_lr=5e-5)

model = get_OMoE_model()

start_time = time.time()

model.compile(loss='mape', optimizer=tf.keras.optimizers.Adam(1e-3))

history = model.fit(tr_X, tr_y,
         epochs=1500,
         batch_size=1024,
         validation_split=0.15,
         callbacks=[es, ld],
         shuffle=True,
         )

end_time = time.time()

print(f'Elapsed time is {end_time - start_time} seconds')

In [None]:
model.save("model_OMoE")

with open('model_OMoE/historyDict', 'wb') as f:
    pickle.dump(history.history, f)

K.clear_session()
del model
gc.collect()

## Multi-output MMoE

In [None]:
tf.random.set_seed(0)
tf.keras.utils.set_random_seed(0)

es = tf.keras.callbacks.EarlyStopping(patience=15, restore_best_weights=True)
ld = tf.keras.callbacks.ReduceLROnPlateau(factor=0.5, min_lr=5e-5)

model = get_MMoE_model()

start_time = time.time()

model.compile(loss='mape', optimizer=tf.keras.optimizers.Adam(1e-3))

history = model.fit(tr_X, tr_y,
         epochs=1500,
         batch_size=1024,
         validation_split=0.15,
         callbacks=[es, ld],
         shuffle=True,
         )

end_time = time.time()

print(f'Elapsed time is {end_time - start_time} seconds')

In [None]:
model.save("model_MMoE_original")

with open('model_MMoE_original/historyDict', 'wb') as f:
    pickle.dump(history.history, f)

K.clear_session()
del model
gc.collect()

## Multi-output MMoE without traffic parameters

In [None]:
tf.random.set_seed(0)
tf.keras.utils.set_random_seed(0)

es = tf.keras.callbacks.EarlyStopping(patience=15, restore_best_weights=True)
ld = tf.keras.callbacks.ReduceLROnPlateau(factor=0.5, min_lr=5e-5)

model = get_MMoE_wo_p_model()

start_time = time.time()

model.compile(loss='mape', optimizer=tf.keras.optimizers.Adam(1e-3))

history = model.fit(tr_X_wo_p, tr_y,
         epochs=1500,
         batch_size=1024,
         validation_split=0.15,
         callbacks=[es, ld],
         shuffle=True,
         )

end_time = time.time()

print(f'Elapsed time is {end_time - start_time} seconds')

In [None]:
model.save("model_MMoE_wo_traffic_param")

with open('model_MMoE_wo_traffic_param/historyDict', 'wb') as f:
    pickle.dump(history.history, f)

K.clear_session()
del model
gc.collect()

## Multi-output MMoE without network parameters

In [None]:
tf.random.set_seed(0)
tf.keras.utils.set_random_seed(0)

es = tf.keras.callbacks.EarlyStopping(patience=15, restore_best_weights=True)
ld = tf.keras.callbacks.ReduceLROnPlateau(factor=0.5, min_lr=5e-5)

model = get_MMoE_wo_q_model()

start_time = time.time()

model.compile(loss='mape', optimizer=tf.keras.optimizers.Adam(1e-3))

history = model.fit(tr_X_wo_q, tr_y,
         epochs=1500,
         batch_size=1024,
         validation_split=0.15,
         callbacks=[es, ld],
         shuffle=True,
         )

end_time = time.time()

print(f'Elapsed time is {end_time - start_time} seconds')

In [None]:
model.save("model_MMoE_wo_network_param")

with open('model_MMoE_wo_network_param/historyDict', 'wb') as f:
    pickle.dump(history.history, f)

K.clear_session()
del model
gc.collect()

## Multi-output MMoE without both parameters

In [None]:
tf.random.set_seed(0)
tf.keras.utils.set_random_seed(0)

es = tf.keras.callbacks.EarlyStopping(patience=15, restore_best_weights=True)
ld = tf.keras.callbacks.ReduceLROnPlateau(factor=0.5, min_lr=5e-5)

model = get_MMoE_wo_pq_model()

start_time = time.time()

model.compile(loss='mape', optimizer=tf.keras.optimizers.Adam(1e-3))

history = model.fit(tr_X_wo_pq, tr_y,
         epochs=1500,
         batch_size=1024,
         validation_split=0.15,
         callbacks=[es, ld],
         shuffle=True,
         )

end_time = time.time()

print(f'Elapsed time is {end_time - start_time} seconds')

In [None]:
model.save("model_MMoE_wo_both_param")

with open('model_MMoE_wo_both_param/historyDict', 'wb') as f:
    pickle.dump(history.history, f)

K.clear_session()
del model
gc.collect()