<img src="https://raw.githubusercontent.com/patel-zeel/Adhoc/master/ADAIN.PNG" alt="Drawing" style="width: 600px;"/>

In [31]:
# !pip install -qq numpy==1.19.2
import os
os.environ["CUDA_VISIBLE_DEVICES"] = '1'
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, LSTM, Concatenate, Lambda, Multiply, Reshape, Input, Dropout
from tensorflow.keras.models import Sequential
from tensorflow.keras.initializers import RandomUniform
import tensorflow.keras.backend as K
from tensorflow.random import set_seed
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import mean_squared_error
from IPython.display import clear_output
from time import time
import matplotlib.pyplot as plt
from matplotlib import rc

rc('font', size=16)
# import logging
# tf.get_logger().setLevel(logging.ERROR)
np.__version__

'1.19.2'

### Defining the model

In [32]:
def ADAIN(time_window, met_dim):
    """
    # Vocab
    met         - meteorology
    dis         - distance
    aq          - air quality
    n_stn       - n_stations
    dim         - dimension
    """
    # Hyperparameters
    do = 0.2 # Drop out
    np.random.seed(0)
    set_seed(0)
    
    # Defining inputs
    train_dist = Input(shape=(2,None,))
#     train_dist = Lambda(lambda x: K.permute_dimensions(x, (2,0,1)))(train_dist)
    train_met_aq = Input(shape=(24,met_dim+1,None,)) # +1 for aq
#     train_met_aq = Lambda(lambda x: K.permute_dimensions(x, (3,0,1,2)))(train_met_aq)
    test_met = Input(shape=(24,met_dim,))
    
    n_stn = 29#2 if train_dist.shape[2] == None else train_dist.shape[2]
    
    # Local station                                                        ## Input                --- Output
    lcl_lstm = LSTM(300, dropout=do)(test_met)                             # (-1,time_window,met)  --- (-1,300)
    lcl_dns = Dense(200, activation='relu')(lcl_lstm)                      # (-1,300)              --- (-1,200) 
    lcl_dns = Dropout(do)(lcl_dns)
    
    # Defininig shared layers (shared among train stations)
    s_dns1 = Dense(100, activation='relu')
    s_lstm = LSTM(300, activation='relu', dropout=do)
    s_dns2 = Dense(200, activation='relu')
    
    a_dns1 = Dense(200, activation='relu')
    a_dns2 = Dense(200)
    a_dns3 = Dense(1)
    
    # Training stations
    att_list = [] # Saving station attention weights
    stn_list = [] # Saving station features
    
    # Iterating over train stations using shared layers
#     print(n_stn)
#     def forward_once(d):                                            ## Input               --- Output
    for s_i in range(n_stn):
#         pd.to_pickle(s_i,str(s_i))
        train_dist_slice = Lambda(lambda x:x[:,:,s_i])(train_dist)         # (-1,dis,n_stn)       --- (-1,dis)
        stn_dns1 = s_dns1(train_dist_slice)                                # (-1,dis)             --- (-1,100)
        stn_dns1 = Dropout(do)(stn_dns1)
        train_met_aq_slice = Lambda(lambda x:x[:,:,:,s_i])(train_met_aq)
        stn_lstm = s_lstm(train_met_aq_slice)                              # (-1,time_window,met) --- (-1,300)
        stn_cat1 = Concatenate()([stn_dns1, stn_lstm])                     # (-1,100)+(-1,300)    --- (-1,400)
        stn_dns2 = s_dns2(stn_cat1)                                        # (-1,400)             --- (-1,200)
        stn_dns2 = Dropout(do)(stn_dns2)
        stn_list.append(stn_dns2)
        ### Attention Mechanism
        att_cat = Concatenate()([stn_dns2, lcl_dns])                       # (-1,200)*2           --- (-1,400)
        att_dns1 = a_dns1(att_cat)                                         # (-1,400)             --- (-1,200)
        att_dns1 = Dropout(do)(att_dns1)
        att_dns2 = a_dns2(att_dns1)                                        # (-1,200)             --- (-1,200)
        att_dns3 = a_dns3(att_dns2)                                        # (-1,200)             --- (-1,1)
        att_list.append(att_dns3)
#         return stn_dns2, att_dns3

#     stn_lst, att_lst = tf.map_fn(forward_once, [train_dist, train_met_aq])
    ### Normalize Attention
    att_cat1 = Concatenate()(att_list)                                     # (-1,1)*n_stn         --- (-1,n_stn)
    att_cat2 = Lambda(lambda inp: inp/K.sum(inp, axis=0))(att_cat1)        # (-1,n_stn)           --- (-1,n_stn)
    
    ### Multiply Attention with station features
    stn_cat2 = Concatenate()(stn_list)                                     # (-1,200)*n_station   --- (-1,200*n_stn) 
    stn_cat2 = Lambda(lambda x: K.reshape(x, (-1,200,n_stn)))(stn_cat2)    # (-1,200*n_stn)       --- (-1,200,n_stn)      
    att_cat2 = Lambda(lambda x: K.reshape(x, (-1,1,n_stn)))(att_cat2)      # (-1,n_stn)           --- (-1,1,n_stn)
#     print(stn_cat2)
    stn_mul = Multiply()([stn_cat2, att_cat2])                             # (-1,200,n_stn)*(-1,1,n_stn)  --- (-1,200,n_stn)
    stn_add = Lambda(lambda x: K.sum(x, axis=2))(stn_mul)                  # (-1,200,n_stn)       --- (-1,200)
    
    ### Concatenate local and station features
    final_cat = Concatenate()([stn_add, lcl_dns])                          # (-1,200)*2           --- (-1,400)
    final_dns1 = Dense(200, activation='relu')(final_cat)                  # (-1,400)             --- (-1,200)
    final_dns1 = Dropout(do)(final_dns1)
    final_dns2 = Dense(200)(final_dns1)
    final_dns3 = Dense(1)(final_dns2)                                      # (-1,200)             --- (-1,1)
    
    model = Model(inputs=[train_dist, train_met_aq, test_met], outputs=final_dns3)
    model.compile(loss='mean_squared_error',
                optimizer=tf.keras.optimizers.Adam(0.001))
    return model

### Model train and test

In [None]:
np.random.seed(0)
set_seed(0)
path = '../../../data_and_results/u-air/production/pm25_beijing_best36/quadratic/'
n_folds = 6

init = time()
fold_history = []
model = ADAIN(time_window=24, met_dim=4)
model.save_weights('model.h5')
for fold in [str(i) for i in range(n_folds)]:
    clear_output(wait=True)
    print('Training fold',fold)
    
    # Load initial weights
    model.load_weights('model.h5')
    print('weights loaded')
    # Load data
    train_dst = np.load(path+'data/fold_'+fold+'/train/adain/trn_dst.npy', allow_pickle=True).astype(np.float32)
    train_met_aq = np.load(path+'data/fold_'+fold+'/train/adain/trn_metaq.npy').astype(np.float32)
    test_met = np.load(path+'data/fold_'+fold+'/train/adain/tst_met.npy').astype(np.float32)
    test_aq = np.load(path+'data/fold_'+fold+'/train/adain/tst_aqi.npy').astype(np.float32)
    yscaler = pd.read_pickle(path+'data/fold_'+fold+'/scaler/adain/yscaler.pickle')
#     print(train_dst.shape, train_met_aq.shape, test_met.shape)
    # Define and train the model
    es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=2, restore_best_weights=True)
    history = model.fit(x=[train_dst, train_met_aq, test_met], y=test_aq, 
          batch_size=128, epochs=10, validation_split=0.1, verbose=1)#, callbacks=[es])
    fold_history.append(history)
    
    # Test the model
    train_dst = np.load(path+'data/fold_'+fold+'/test/adain/trn_dst.npy', allow_pickle=True).astype(np.float32)
    train_met_aq = np.load(path+'data/fold_'+fold+'/test/adain/trn_metaq.npy').astype(np.float32)
    test_met = np.load(path+'data/fold_'+fold+'/test/adain/tst_met.npy').astype(np.float32)
    test_aq = np.load(path+'data/fold_'+fold+'/test/adain/tst_aqi.npy').astype(np.float32)
    
    pred_y = np.nan*np.zeros((train_dst.shape[0], train_dst.shape[-1]))
    for test_id in range(train_dst.shape[-1]):
        pred_yy = model.predict(x=[train_dst[:,:,:29,test_id], train_met_aq[:,:,:,:29], test_met[:,:,:,test_id]])
        pred_y[:,test_id] = yscaler.inverse_transform(pred_yy).ravel()
    
    if not os.path.exists(path+'results/adain/fold_'+fold):
        os.makedirs(path+'results/adain/fold_'+fold)
    np.save(path+'results/adain/fold_'+fold+'/pred_y.npy', pred_y)
    np.save(path+'results/adain/fold_'+fold+'/test_y.npy', test_aq)
print('Finished in',round(time()-init)/60,'minutes')

Training fold 0
weights loaded
Epoch 1/10


In [None]:
path = '../../../data_and_results/u-air/production/pm25_beijing_best36/quadratic/'
folds = [str(i) for i in range(6)]

### Plotting training curves

In [None]:
fig, ax = plt.subplots(2, len(folds)//2, figsize=(15,6))
ax = ax.ravel()
for i in range(len(folds)):
    ax[i].plot(fold_history[i].history['loss'], label='train loss')
    ax[i].plot(fold_history[i].history['val_loss'], label='validation loss')
plt.legend();

### Checking results

In [None]:
models = ['svr', 'gp_rbf', 'gp_m32', 'gp_m12', 'gp_linear','nsgp_rbf','adain']
res = pd.DataFrame(index=models, columns=['fold_'+str(i) for i in range(len(folds))]+['avg'])
# for model in models[:-1]:
#     pred_y, test_y = load_results(path, folds, f_ids, n_test, model)
#     fold_rmse = fold_wise_rmse(pred_y, test_y, len(folds))
#     fold_rmse.append(np.mean(fold_rmse))
#     res.loc[model, :] = fold_rmse

# ADAIN
for fold in folds:
    pred_y = np.load(path+'results/adain/fold_'+fold+'/pred_y.npy')
    test_y = np.load(path+'results/adain/fold_'+fold+'/test_y.npy')
    res.loc['adain', 'fold_'+fold] = mean_squared_error(test_y.ravel(), pred_y.ravel(), squared=False)
res.loc['adain','avg'] = res.loc['adain'].mean()

res

## Plotting predictions on test

In [None]:
plt.plot(test_aq.ravel(), test_aq.ravel())
plt.scatter(test_aq.ravel(), pred_y.ravel());
plt.xlabel('PM2.5 (Ground truth)');plt.ylabel('PM2.5 (Predictions)');

In [None]:
# plt.plot(o_tests, o_tests)
plt.plot(test_aq[:,0], label='ground truth');
plt.plot(pred_y[:,0], label='predictions');
plt.xlabel('Time-stamps'); plt.ylabel('PM2.5');
plt.legend(bbox_to_anchor=(1.5,1));

### Plotting attention

In [30]:
# print(model.layers)
# print(model.layers[10].get_config(), test_met.shape)
get_layer_output = K.function(
  [model.layers[0].input, model.layers[2].input, model.layers[91].input], 
  model.layers[-8].output)

output = get_layer_output([train_dst[0:1,:,:29,0], train_met_aq[0:1,:,:], test_met[0:1,:,0]])
output.shape

ValueError: Input 2 is incompatible with layer model_3: expected shape=(None, 24, 4), found shape=(1, 24, 6)

In [29]:
model.layers

[<tensorflow.python.keras.engine.input_layer.InputLayer at 0x7f9dc9ed8400>,
 <tensorflow.python.keras.layers.core.Lambda at 0x7f9dbc5f6160>,
 <tensorflow.python.keras.engine.input_layer.InputLayer at 0x7f9dc9e905e0>,
 <tensorflow.python.keras.layers.core.Lambda at 0x7f9dbc583f70>,
 <tensorflow.python.keras.layers.core.Lambda at 0x7f9dbc587f10>,
 <tensorflow.python.keras.layers.core.Lambda at 0x7f9dbc51bd30>,
 <tensorflow.python.keras.layers.core.Lambda at 0x7f9dbc4f5490>,
 <tensorflow.python.keras.layers.core.Lambda at 0x7f9dbc4e9c40>,
 <tensorflow.python.keras.layers.core.Lambda at 0x7f9dbc4d1e50>,
 <tensorflow.python.keras.layers.core.Lambda at 0x7f9dbc457a00>,
 <tensorflow.python.keras.layers.core.Lambda at 0x7f9dbc474550>,
 <tensorflow.python.keras.layers.core.Lambda at 0x7f9dbc43af10>,
 <tensorflow.python.keras.layers.core.Lambda at 0x7f9dbc52f160>,
 <tensorflow.python.keras.layers.core.Lambda at 0x7f9dbc51b760>,
 <tensorflow.python.keras.layers.core.Lambda at 0x7f9dbc3ab340>,
 <t