In [35]:
import pandas as pd
import numpy as np
from tensorflow import keras
from keras import layers
import tensorflow as tf
import tensorflow_probability as tfp
# from tensorflow_probability.layers import DenseVariational
from tensorflow.keras.layers import Input, Dropout, Dense, BatchNormalization
import sys
sys.path.append('../..')
from modules import utils
from sklearn.metrics import mean_squared_error, mean_absolute_error
from math import sqrt

#### The dataset

In [19]:
jinja_df = pd.read_csv('../data/jinja_data.csv', parse_dates=['timestamp'])
jinja_df.head()

Unnamed: 0,site_name,latitude,longitude,city,timestamp,pm2_5_calibrated_value,pm2_5_raw_value,pm10_raw_value,pm10_calibrated_value,site_id,device_number,device_name
0,"Jinja Main Street, Jinja",0.437337,33.211051,Jinja,2021-09-01 00:00:00+00:00,,,,,60d058c8048305120d2d6142,689753,aq_23
1,"Jinja Main Street, Jinja",0.437337,33.211051,Jinja,2021-09-01 01:00:00+00:00,,,,,60d058c8048305120d2d6142,689753,aq_23
2,"Jinja Main Street, Jinja",0.437337,33.211051,Jinja,2021-09-01 02:00:00+00:00,,,,,60d058c8048305120d2d6142,689753,aq_23
3,"Jinja Main Street, Jinja",0.437337,33.211051,Jinja,2021-09-01 03:00:00+00:00,,,,,60d058c8048305120d2d6142,689753,aq_23
4,"Jinja Main Street, Jinja",0.437337,33.211051,Jinja,2021-09-01 04:00:00+00:00,,,,,60d058c8048305120d2d6142,689753,aq_23


In [20]:
latitudes = jinja_df['latitude'].unique()
longitudes = jinja_df['longitude'].unique()
device_ids = jinja_df['device_number'].unique()
len(latitudes), len(longitudes), len(device_ids)

(10, 10, 10)

In [21]:
final_df = pd.DataFrame()
cols = ['timestamp', 'latitude', 'longitude', 'pm2_5_calibrated_value']
for i, device_id in enumerate(device_ids):
    device_df = utils.get_device_data(jinja_df, device_id, cols)
    processed_df = utils.preprocessing(device_df)
    final_df = pd.concat([final_df, processed_df])
final_df.reset_index(drop=True, inplace=True)

final_df.head()

Unnamed: 0,time,latitude,longitude,pm2_5
0,452909.0,0.437337,33.211051,12.2844
1,452910.0,0.437337,33.211051,11.6507
2,452911.0,0.437337,33.211051,22.398
3,452912.0,0.437337,33.211051,17.4937
4,452913.0,0.437337,33.211051,25.1622


In [22]:
idx=1
device_indices = final_df[final_df.latitude==latitudes[idx]].index
test_dataset = final_df.loc[device_indices]
train_dataset = pd.concat([final_df, test_dataset]).drop_duplicates(keep=False)
len(final_df), len(train_dataset), len(test_dataset)

(13653, 12801, 852)

#### The wine dataset

In [23]:
# import tensorflow_datasets as tfds

In [24]:
# def get_train_test_splits(train_size, batch_size=1):
#     dataset = (
#         tfds.load(name='wine_quality', as_supervised=True, split='train')
#         .map(lambda x, y: (x, tf.cast(y, tf.float32)))
#         .prefetch(buffer_size=dataset_size)
#         .cache()
        
#     )
#     print(f'type of dataset: {type(dataset)}')
# #     print(f'shape of dataset: {dataset._input_dataset._input_dataset._batch_size.numpy()}')
    
#     train_dataset= (dataset.take(train_size).shuffle(buffer_size=train_size).batch(batch_size))
#     test_dataset = dataset.skip(train_size).batch(batch_size)
#     return train_dataset, test_dataset

In [25]:
# dataset_size = 4898
# batch_size = 256
# train_size = int(dataset_size*0.85)
# train_dataset, test_dataset = get_train_test_splits(train_size, batch_size)

In [26]:
# type(train_dataset)

#### Defining the model

In [27]:
hidden_units = [8, 8]
learning_rate = 0.001

In [66]:
def run_experiment(model, loss, train_dataset, test_dataset):
    X_train = train_dataset.iloc[:, 0:-1]
    y_train = train_dataset.iloc[:, -1]
    X_train, y_train = np.array(X_train), np.array(y_train).reshape(-1, 1)
    
    X_test = test_dataset.iloc[:, 0:-1]
    y_test = test_dataset.iloc[:, -1]
    X_test, y_test = np.array(X_test), np.array(y_test).reshape(-1, 1)
    
    model.compile(optimizer = keras.optimizers.RMSprop(learning_rate=learning_rate), loss=loss, 
                  metrics = [keras.metrics.RootMeanSquaredError()])
    print('Start training the model ...')
    model.fit(X_train, y_train, epochs=num_epochs, validation_split=0.2)
    print('Model training finished.')
    print('Evaluating model performance ...')
    
#     y_pred = model.predict(X_test) 
    y_pred = model.predict(X_test).flatten()
    rmse_a = sqrt(mean_squared_error(y_test, y_pred))
    _, rmse_b = model.evaluate(X_test, y_test, verbose=0)
    print(f'Test RMSEs: {round(rmse_a, 3)}, {round(rmse_b, 3)}')

In [67]:
FEATURE_NAMES = ['time', 'latitude', 'longitude']

In [68]:
# def create_model_inputs():
#     inputs = {}
#     for feature_name in FEATURE_NAMES:
#         inputs[feature_name] = Input(name=feature_name, shape=(1,), dtype=tf.float32)
#     return inputs

In [69]:
def create_baseline_model(input_size, dropout=0.1):
#     inputs = create_model_inputs()
#     input_values = [value for _,value in sorted(inputs.items())]
#     features = layers.concatenate(input_values)
#     features = BatchNormalization()(features) #to remove
    
#     for units in hidden_units:
#         features = Dense(units, activation='sigmoid')(features)
#     outputs = Dense(units=1)(features)
#     model = keras.Model(inputs=inputs, outputs=outputs)
    
    model = tf.keras.Sequential() 
    model.add(Input(shape=(input_size,), name='Input-Layer')) 
    model.add(Dropout(rate=dropout))
    model.add(Dense(128, activation='relu', name='Hidden-Layer1'))
    model.add(Dropout(rate=dropout))
    model.add(Dense(32, activation='relu', name='Hidden-Layer2'))
    model.add(Dropout(rate=dropout))
    model.add(Dense(1, activation='linear', name='Output-Layer')) 
    
    return model

In [71]:
# dataset_size = len(final_df)
# batch_size=256
# num_epochs=500
# mse_loss = keras.losses.MeanSquaredError()
# baseline_model = create_baseline_model(final_df.shape[1]-1)
# run_experiment(baseline_model, mse_loss, train_dataset, test_dataset)

#### Bayesian NN

In [None]:
def bnn_not_used_chatgpt_i_think(X_train, y_train):
    model = tf.keras.Sequential([
    tfp.layers.DenseVariational(units=128, make_prior_fn=tfp.layers.default_multivariate_normal_fn),
    tfp.layers.DenseVariational(units=64, make_prior_fn=tfp.layers.default_multivariate_normal_fn),
    tfp.layers.DenseVariational(units=1, make_prior_fn=tfp.layers.default_multivariate_normal_fn),
    ])
    
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), loss=tf.keras.losses.MeanSquaredError())
    model.fit(x_train, y_train, epochs=10, batch_size=32)
    
    return model

In [72]:
def prior(kernel_size, bias_size, dtype=None):
    n = kernel_size +bias_size
    prior_model = keras.Sequential(
        [tfp.layers.DistributionLambda(
            lambda t:tfp.distributions.MultivariateNormalDiag(
                loc=tf.zeros(n), scale_diag=tf.ones(n)
            ))
        ])
    return prior_model

In [73]:
def posterior(kernel_size, bias_size, dtype=None):
    n = kernel_size +bias_size
    posterior_model = keras.Sequential(
        [
            tfp.layers.VariableLayer(
                tfp.layers.MultivariateNormalTriL.params_size(n), dtype=dtype
            ),
         tfp.layers.MultivariateNormalTriL(n),
        ]
    )
    return posterior_model

In [74]:
def create_bnn_model(input_size, train_size, dropout=0.1): #train_size = len(X_train)
#     inputs = create_model_inputs()
#     features = keras.layers.concatenate(list(inputs.values()))
#     features = layers.BatchNormalization()(features)


    # Create hidden layers with weight uncertainty using the DenseVariational layer.
#     for units in hidden_units:
#         features = tfp.layers.DenseVariational(
#             units=units,
#             make_prior_fn=prior,
#             make_posterior_fn=posterior,
#             kl_weight=1 / train_size,
#             activation='sigmoid',
#         )(features)
# The output is deterministic: a single point estimate.
#     outputs = layers.Dense(units=1)(features)
#     model = keras.Model(inputs=inputs, outputs=outputs)
    
    model = tf.keras.Sequential() 
    model.add(Input(shape=(input_size,), name='Input-Layer')) 
    model.add(Dropout(rate=dropout))
    model.add(tfp.layers.DenseVariational(units=128, make_prior_fn=prior, make_posterior_fn=posterior, 
              activation='relu', name='Hidden-Layer-1', kl_weight=1/train_size))
    model.add(Dropout(rate=dropout))
    model.add(tfp.layers.DenseVariational(units=32, make_prior_fn=prior, make_posterior_fn=posterior, 
              activation='relu', name='Hidden-Layer-2', kl_weight=1/train_size))
    model.add(Dropout(rate=dropout))
    model.add(Dense(1))
    
    return model

In [76]:
# num_epochs = 500
# train_sample_size = 100
# small_train_dataset = train_dataset[:100]

# bnn_model_small = create_bnn_model(final_df.shape[1]-1, train_sample_size)
# run_experiment(bnn_model_small, mse_loss, small_train_dataset, test_dataset)

In [77]:
num_epochs = 500
bnn_model_full = create_bnn_model(final_df.shape[1]-1, len(train_dataset))
run_experiment(bnn_model_full, mse_loss, train_dataset, test_dataset)
# compute_predictions(bnn_model_full)

Start training the model ...
Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500


Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/500
Epoch 75/500
Epoch 76/500
Epoch 77/500
Epoch 78/500
Epoch 79/500
Epoch 80/500
Epoch 81/500
Epoch 82/500
Epoch 83/500
Epoch 84/500
Epoch 85/500


Epoch 86/500
Epoch 87/500
Epoch 88/500
Epoch 89/500
Epoch 90/500
Epoch 91/500
Epoch 92/500
Epoch 93/500
Epoch 94/500
Epoch 95/500
Epoch 96/500
Epoch 97/500
Epoch 98/500
Epoch 99/500
Epoch 100/500
Epoch 101/500
Epoch 102/500
Epoch 103/500
Epoch 104/500
Epoch 105/500
Epoch 106/500
Epoch 107/500
Epoch 108/500
Epoch 109/500
Epoch 110/500
Epoch 111/500
Epoch 112/500
Epoch 113/500
Epoch 114/500
Epoch 115/500
Epoch 116/500
Epoch 117/500
Epoch 118/500
Epoch 119/500
Epoch 120/500
Epoch 121/500
Epoch 122/500
Epoch 123/500
Epoch 124/500
Epoch 125/500
Epoch 126/500
Epoch 127/500
Epoch 128/500


Epoch 129/500
Epoch 130/500
Epoch 131/500
Epoch 132/500
Epoch 133/500
Epoch 134/500
Epoch 135/500
Epoch 136/500
Epoch 137/500
Epoch 138/500
Epoch 139/500
Epoch 140/500
Epoch 141/500
Epoch 142/500
Epoch 143/500
Epoch 144/500
Epoch 145/500
Epoch 146/500
Epoch 147/500
Epoch 148/500
Epoch 149/500
Epoch 150/500
Epoch 151/500
Epoch 152/500
Epoch 153/500
Epoch 154/500
Epoch 155/500
Epoch 156/500
Epoch 157/500
Epoch 158/500
Epoch 159/500
Epoch 160/500
Epoch 161/500
Epoch 162/500
Epoch 163/500
Epoch 164/500
Epoch 165/500
Epoch 166/500
Epoch 167/500
Epoch 168/500
Epoch 169/500
Epoch 170/500
Epoch 171/500


Epoch 172/500
Epoch 173/500
Epoch 174/500
Epoch 175/500
Epoch 176/500
Epoch 177/500
Epoch 178/500
Epoch 179/500
Epoch 180/500
Epoch 181/500
Epoch 182/500
Epoch 183/500
Epoch 184/500
Epoch 185/500
Epoch 186/500
Epoch 187/500
Epoch 188/500
Epoch 189/500
Epoch 190/500
Epoch 191/500
Epoch 192/500
Epoch 193/500
Epoch 194/500
Epoch 195/500
Epoch 196/500
Epoch 197/500
Epoch 198/500
Epoch 199/500
Epoch 200/500
Epoch 201/500
Epoch 202/500
Epoch 203/500
Epoch 204/500
Epoch 205/500
Epoch 206/500
Epoch 207/500
Epoch 208/500
Epoch 209/500
Epoch 210/500
Epoch 211/500
Epoch 212/500
Epoch 213/500
Epoch 214/500


Epoch 215/500
Epoch 216/500
Epoch 217/500
Epoch 218/500
Epoch 219/500
Epoch 220/500
Epoch 221/500
Epoch 222/500
Epoch 223/500
Epoch 224/500
Epoch 225/500
Epoch 226/500
Epoch 227/500
Epoch 228/500
Epoch 229/500
Epoch 230/500
Epoch 231/500
Epoch 232/500
Epoch 233/500
Epoch 234/500
Epoch 235/500
Epoch 236/500
Epoch 237/500
Epoch 238/500
Epoch 239/500
Epoch 240/500
Epoch 241/500
Epoch 242/500
Epoch 243/500
Epoch 244/500
Epoch 245/500
Epoch 246/500
Epoch 247/500
Epoch 248/500
Epoch 249/500
Epoch 250/500
Epoch 251/500
Epoch 252/500
Epoch 253/500
Epoch 254/500
Epoch 255/500
Epoch 256/500
Epoch 257/500


Epoch 258/500
Epoch 259/500
Epoch 260/500
Epoch 261/500
Epoch 262/500
Epoch 263/500
Epoch 264/500
Epoch 265/500
Epoch 266/500
Epoch 267/500
Epoch 268/500
Epoch 269/500
Epoch 270/500
Epoch 271/500
Epoch 272/500
Epoch 273/500
Epoch 274/500
Epoch 275/500
Epoch 276/500
Epoch 277/500
Epoch 278/500
Epoch 279/500
Epoch 280/500
Epoch 281/500
Epoch 282/500
Epoch 283/500
Epoch 284/500
Epoch 285/500
Epoch 286/500
Epoch 287/500
Epoch 288/500
Epoch 289/500
Epoch 290/500
Epoch 291/500
Epoch 292/500
Epoch 293/500
Epoch 294/500
Epoch 295/500
Epoch 296/500
Epoch 297/500
Epoch 298/500
Epoch 299/500
Epoch 300/500


Epoch 301/500
Epoch 302/500
Epoch 303/500
Epoch 304/500
Epoch 305/500
Epoch 306/500
Epoch 307/500
Epoch 308/500
Epoch 309/500
Epoch 310/500
Epoch 311/500
Epoch 312/500
Epoch 313/500
Epoch 314/500
Epoch 315/500
Epoch 316/500
Epoch 317/500
Epoch 318/500
Epoch 319/500
Epoch 320/500
Epoch 321/500
Epoch 322/500
Epoch 323/500
Epoch 324/500
Epoch 325/500
Epoch 326/500
Epoch 327/500
Epoch 328/500
Epoch 329/500
Epoch 330/500
Epoch 331/500
Epoch 332/500
Epoch 333/500
Epoch 334/500
Epoch 335/500
Epoch 336/500
Epoch 337/500
Epoch 338/500
Epoch 339/500
Epoch 340/500
Epoch 341/500
Epoch 342/500
Epoch 343/500


Epoch 344/500
Epoch 345/500
Epoch 346/500
Epoch 347/500
Epoch 348/500
Epoch 349/500
Epoch 350/500
Epoch 351/500
Epoch 352/500
Epoch 353/500
Epoch 354/500
Epoch 355/500
Epoch 356/500
Epoch 357/500
Epoch 358/500
Epoch 359/500
Epoch 360/500
Epoch 361/500
Epoch 362/500
Epoch 363/500
Epoch 364/500
Epoch 365/500
Epoch 366/500
Epoch 367/500
Epoch 368/500
Epoch 369/500
Epoch 370/500
Epoch 371/500
Epoch 372/500
Epoch 373/500
Epoch 374/500
Epoch 375/500
Epoch 376/500
Epoch 377/500
Epoch 378/500
Epoch 379/500
Epoch 380/500
Epoch 381/500
Epoch 382/500
Epoch 383/500
Epoch 384/500
Epoch 385/500
Epoch 386/500


Epoch 387/500
Epoch 388/500
Epoch 389/500
Epoch 390/500
Epoch 391/500
Epoch 392/500
Epoch 393/500
Epoch 394/500
Epoch 395/500
Epoch 396/500
Epoch 397/500
Epoch 398/500
Epoch 399/500
Epoch 400/500
Epoch 401/500
Epoch 402/500
Epoch 403/500
Epoch 404/500
Epoch 405/500
Epoch 406/500
Epoch 407/500
Epoch 408/500
Epoch 409/500
Epoch 410/500
Epoch 411/500
Epoch 412/500
Epoch 413/500
Epoch 414/500
Epoch 415/500
Epoch 416/500
Epoch 417/500
Epoch 418/500
Epoch 419/500
Epoch 420/500
Epoch 421/500
Epoch 422/500
Epoch 423/500
Epoch 424/500
Epoch 425/500
Epoch 426/500
Epoch 427/500
Epoch 428/500
Epoch 429/500


Epoch 430/500
Epoch 431/500
Epoch 432/500
Epoch 433/500
Epoch 434/500
Epoch 435/500
Epoch 436/500
Epoch 437/500
Epoch 438/500
Epoch 439/500
Epoch 440/500
Epoch 441/500
Epoch 442/500
Epoch 443/500
Epoch 444/500
Epoch 445/500
Epoch 446/500
Epoch 447/500
Epoch 448/500
Epoch 449/500
Epoch 450/500
Epoch 451/500
Epoch 452/500
Epoch 453/500
Epoch 454/500
Epoch 455/500
Epoch 456/500
Epoch 457/500
Epoch 458/500
Epoch 459/500
Epoch 460/500
Epoch 461/500
Epoch 462/500
Epoch 463/500
Epoch 464/500
Epoch 465/500
Epoch 466/500
Epoch 467/500
Epoch 468/500
Epoch 469/500
Epoch 470/500
Epoch 471/500
Epoch 472/500


Epoch 473/500
Epoch 474/500
Epoch 475/500
Epoch 476/500
Epoch 477/500
Epoch 478/500
Epoch 479/500
Epoch 480/500
Epoch 481/500
Epoch 482/500
Epoch 483/500
Epoch 484/500
Epoch 485/500
Epoch 486/500
Epoch 487/500
Epoch 488/500
Epoch 489/500
Epoch 490/500
Epoch 491/500
Epoch 492/500
Epoch 493/500
Epoch 494/500
Epoch 495/500
Epoch 496/500
Epoch 497/500
Epoch 498/500
Epoch 499/500
Epoch 500/500
Model training finished.
Evaluating model performance ...
Test RMSEs: 27.91, 27.91


In [65]:
def compute_predictions(model, iterations=100):
    predicted = []
    for _ in range(iterations):
        predicted.append(model(examples).numpy())
    predicted = np.concatenate(predicted, axis=1)

    prediction_mean = np.mean(predicted, axis=1).tolist()
    prediction_min = np.min(predicted, axis=1).tolist()
    prediction_max = np.max(predicted, axis=1).tolist()
    prediction_range = (np.max(predicted, axis=1) - np.min(predicted, axis=1)).tolist()

    for idx in range(sample):
        print(
            f'Predictions mean: {round(prediction_mean[idx], 2)}, '
            f'min: {round(prediction_min[idx], 2)}, '
            f'max: {round(prediction_max[idx], 2)}, '
            f'range: {round(prediction_range[idx], 2)} - '
            f'Actual: {targets[idx]}'
        )


compute_predictions(bnn_model_small)

NameError: name 'examples' is not defined

#### Define the model

In [None]:
def normal_exp(params):
    return tfd.Normal(loc=params[:, 0:1], scale=tf.math.exp(params[:, 1:2]))

In [None]:
inputs = Input(shape=(1,))
hidden = Dense(200, activation='relu')(inputs)
hidden = Dropout(0.1)(hidden, training=True)
hidden = Dense(500, activation='relu')(hidden)
hidden = Dropout(0.1)(hidden, training=True)
hidden = Dense(500, activation='relu')(hidden)
hideen = Dropout(0.1)(hidden, training=True)
hidden = Dense(500, activation='relu')(hidden)
hidden = Dropout(0.1)(hidden, training=True)
hidden = Dense(200, activation='relu')(hidden)
hidden = Dropout(0.1)(hidden, training=True)
params_mc = Dense(2)(hidden)
dist_mc = tfp.layers.DistributionLambda(normal_exp, name='normal_exp')(params_mc)

model_mc = Model(inputs=inputs, outputs=dist_mc)
model_mc.compile(Adam(learning_rate=0.0002, loss=NLL))
model_mc.summary()