# Setup

In [None]:
import pandas as pd
import numpy as np
import time
import pickle

import warnings
warnings.filterwarnings('ignore')

In [None]:
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.layers import *
from sklearn.metrics import *

In [None]:
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        tf.config.experimental.set_memory_growth(gpus[0], True)
    except RuntimeError as e:
        print(e)

In [None]:
df = pd.read_csv(f'data/data_total.csv', index_col=0)

In [None]:
ycols = [
    x+str(i) for i in range(5) for x in ['speed', 'stop', 'timeloss', 'travel', 'wait']
]

ycol = [
    df.columns[df.columns.str.contains('wait')],
    df.columns[df.columns.str.contains('timeloss')],
    df.columns[df.columns.str.contains('travel')],
    df.columns[df.columns.str.contains('speed')],
    df.columns[df.columns.str.contains('stop')],
]

In [None]:
y = [
    df[c].mean(1) for c in ycol
]

In [None]:
p = pd.read_csv('preprocessed/preprocessed_p.csv', index_col=0)
q = pd.read_csv('preprocessed/preprocessed_q.csv', index_col=0)
var = pd.read_csv('preprocessed/preprocessed_var.csv', index_col=0)

In [None]:
p.columns

In [None]:
q.columns

In [None]:
var.columns

In [None]:
strat = df['id']

In [None]:
X = pd.concat([var, p, q], 1)

In [None]:
y = np.stack(y, 1)

In [None]:
y[:,:3] /= 300
y[:, 3] = (y[:, 3]-3)/6
y[:, 4] /= 3

# Models

In [None]:
def get_MMoE_model(num_experts=3, num_tasks=5, emb_dim=64):
    in_dim = X.shape[1]
    i_ = Input((X.shape[1], ))
    
    experts = []
    for _ in range(num_experts):
        exp = build_layer(in_dim, (256, 128), emb_dim)
        experts.append(exp(i_))
        
    expert_concat = tf.keras.layers.Lambda(lambda x: tf.stack(x, axis=1))(experts)
    
    mmoe_outs = []
    for _ in range(num_tasks):
        g_layer = build_layer(in_dim, (128, ), emb_dim)
        g_in = g_layer(i_)
        g_out = tf.keras.layers.Dense(num_experts, use_bias=False, activation='softmax')(g_in)
        g_out = tf.keras.layers.Lambda(lambda x: tf.expand_dims(x, axis=-1))(g_out)
        
        g_mul_out = tf.keras.layers.Lambda(lambda x: tf.reduce_sum(x[0] * x[1], axis=1, keepdims=False))([expert_concat, g_out])
        
        mmoe_outs.append(g_mul_out)
        
    task_outs = []
    for mmoe_out in mmoe_outs:
        out_layer = build_layer(emb_dim, (64, ), 1)
        out = out_layer(mmoe_out)
        task_outs.append(out)
        
    task_outs = tf.concat(task_outs, 1)
    return tf.keras.models.Model(i_, task_outs)


def build_layer(in_dim, h_dim, out_dim):
    i_ = Input((in_dim, ))
    h = i_
    for d in h_dim:
       h = Dense(d, activation='swish')(h)
    
    o = Dense(out_dim)(h)
    model = tf.keras.models.Model(i_, o)
    return model

In [None]:
tf.__version__

# Train models

In [None]:
import csv

with open('data/test_networks.csv', 'r', newline='') as myfile:
     wr = csv.reader(myfile, quoting=csv.QUOTE_ALL)
     test_data = list(wr)[0]

In [None]:
from tqdm import tqdm

val_id = list(tqdm(test_data))

In [None]:
tr_X = X[~strat.isin(val_id)]
tr_y = y[~strat.isin(val_id)]

val_X = X[strat.isin(val_id)]
val_y = y[strat.isin(val_id)]

## Sample Efficiency of Multi-output MMoE

In [None]:
def get_incremental_balanced_sample(tr_X, tr_y, samples_per_network, num_networks=600, network_size=5000):
    samples = []
    targets = []
    for i in range(num_networks):
        start = i * network_size
        end = start + samples_per_network
        samples.append(tr_X.iloc[start:end])
        targets.append(tr_y[start:end])
    X = pd.concat(samples).reset_index(drop=True)
    y = np.concatenate(targets)
    return X, y

In [None]:
sample_numbers_per_network = [10, 20, 50, 100, 200, 500, 1000, 2000, 5000]

In [None]:
from tensorflow.keras import backend as K
import gc

In [None]:
tf.random.set_seed(0)
tf.keras.utils.set_random_seed(0)

for sample_number in sample_numbers_per_network:    
    es = tf.keras.callbacks.EarlyStopping(patience=15, restore_best_weights=True)
    ld = tf.keras.callbacks.ReduceLROnPlateau(factor=0.5, min_lr=5e-5)
    
    model = get_MMoE_model()
    
    start_time = time.time()
    
    model.compile(loss='mape', optimizer=tf.keras.optimizers.Adam(1e-3))

    X_k, y_k = get_incremental_balanced_sample(tr_X, tr_y, sample_number)
    
    history = model.fit(X_k, y_k,
             epochs=1500,
             batch_size=1024,
             validation_split=0.15,
             callbacks=[es, ld],
             shuffle=True,
             )
    
    end_time = time.time()
    
    print(f'Elapsed time is {end_time - start_time} seconds')
    
    model.save(f"model_MMoE_{sample_number}_samples_per_network")
    
    with open(f'model_MMoE_{sample_number}_samples_per_network/historyDict', 'wb') as f:
        pickle.dump(history.history, f)        

    K.clear_session()
    del model
    gc.collect()

# Seaborn settings

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib import gridspec

In [None]:
sns.set()

In [None]:
palette = sns.color_palette()

# Inference

In [None]:
MMoE_models = {
    sample_number: tf.keras.models.load_model(f'models/model_MMoE_{sample_number}_samples_per_network') 
    for sample_number in [10, 20, 50, 100, 200, 500, 1000, 2000, 5000]   
}

In [None]:
import csv

with open('data/test_networks.csv', 'r', newline='') as myfile:
     wr = csv.reader(myfile, quoting=csv.QUOTE_ALL)
     test_data = list(wr)[0]

In [None]:
from tqdm import tqdm

val_id = list(tqdm(test_data))

In [None]:
prediction_results = []

for model_name, model in MMoE_models.items():
    num_tasks = 5
    
    test_X = val_X
    test_y = val_y
    
    pred = model.predict(test_X)
    trues = test_y
    
    # score writer
    tmp_scr = [model_name]
    tmp_scr.append((np.abs(pred - trues) / trues).mean() * 100)
    
    for i in range(num_tasks):
        s_ = ((np.abs(pred[:,i] - trues[:,i])*300) / (trues[:,i]*300)).mean() * 100
        tmp_scr.append(s_)

    prediction_results.append(tmp_scr)

In [None]:
df_prediction = pd.DataFrame(prediction_results, columns = ['sample number', 'total', 'wait', 'timeloss', 'travel', 'speed', 'stop'])
df_prediction

In [None]:
df_training_time = pd.read_csv('training_time.csv')
df_merged = pd.merge(df_prediction, df_training_time, on='sample number')
df_merged.columns = ['sample number', 'average', 'waiting time', 'time loss', 'travel time', 'speed', 'waiting count', 'training time']
df_merged

In [None]:
sns.set(font_scale=1.5)

In [None]:
import matplotlib.pyplot as plt

x = df_merged['sample number']
y_metrics = ['average', 'waiting time', 'time loss', 'travel time', 'speed', 'waiting count']
training_time = df_merged['training time']

fig, ax1 = plt.subplots(figsize=(10, 6))

# log scale
ax1.set_xscale('log')

# left y-axis : performance measures
lines_1 = []
labels_1 = []

for col in y_metrics:
    line, = ax1.plot(x, df_merged[col], marker='o', label=col)
    lines_1.append(line)
    labels_1.append(col)

ax1.set_xlabel('Sample Number (log scale)')
ax1.set_ylabel('MAPE (%)')
ax1.grid(True)

# right y-axis: training time
ax2 = ax1.twinx()
line2, = ax2.plot(x, training_time, color='black', linestyle='--', marker='x', label='Training Time')
lines_1.append(line2)
labels_1.append('training time')
ax2.set_ylabel('Training Time (s)')
ax2.grid(False)


desired_order = ['average', 'waiting time', 'travel time', 'time loss', 'waiting count', 'speed', 'training time']
line_dict = {label: line for line, label in zip(lines_1, labels_1)}
sorted_lines = [line_dict[label] for label in desired_order]
sorted_labels = desired_order

# Combine into one legend + adjust locatino
legend = ax1.legend(sorted_lines, sorted_labels, loc='upper center', bbox_to_anchor=(0.5, 1))
legend.get_frame().set_facecolor('white')
legend.get_frame().set_alpha(1)

plt.tight_layout()
plt.show()

In [None]:
fig.savefig('sample_efficiency_log_scale.png', bbox_inches='tight')

### Linear scale graph

In [None]:
import matplotlib.pyplot as plt

x = df_merged['sample number']
y_metrics = ['average', 'waiting time', 'time loss', 'travel time', 'speed', 'waiting count']
training_time = df_merged['training time']

fig, ax1 = plt.subplots(figsize=(10, 6))

# left y-axis : performance measures
lines_1 = []
labels_1 = []

for col in y_metrics:
    line, = ax1.plot(x, df_merged[col], marker='o', label=col)
    lines_1.append(line)
    labels_1.append(col)

ax1.set_xlabel('Sample Number (log scale)')
ax1.set_ylabel('MAPE (%)')
ax1.grid(True)

# right y-axis: training time
ax2 = ax1.twinx()
line2, = ax2.plot(x, training_time, color='black', linestyle='--', marker='x', label='Training Time')
lines_1.append(line2)
labels_1.append('training time')
ax2.set_ylabel('Training Time (s)')
ax2.grid(False)


desired_order = ['average', 'waiting time', 'travel time', 'time loss', 'waiting count', 'speed', 'training time']
line_dict = {label: line for line, label in zip(lines_1, labels_1)}
sorted_lines = [line_dict[label] for label in desired_order]
sorted_labels = desired_order

# Combine into one legend + adjust locatino
legend = ax1.legend(sorted_lines, sorted_labels, loc='upper center', bbox_to_anchor=(0.3, 1))
legend.get_frame().set_facecolor('white')
legend.get_frame().set_alpha(1)

plt.tight_layout()
plt.show()