In [1]:
import pandas as pd
import numpy as np
import sys
from math import sqrt
sys.path.append('../..')
from modules import utils
import gpflow
from gpflow import set_trainable
from sklearn.metrics import mean_squared_error
import warnings
warnings.filterwarnings('ignore')
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
jinja_df = pd.read_csv('../data/jinja_data.csv', parse_dates=['timestamp'])
jinja_df.head()

Unnamed: 0,site_name,latitude,longitude,city,timestamp,pm2_5_calibrated_value,pm2_5_raw_value,pm10_raw_value,pm10_calibrated_value,site_id,device_number,device_name
0,"Jinja Main Street, Jinja",0.437337,33.211051,Jinja,2021-09-01 00:00:00+00:00,,,,,60d058c8048305120d2d6142,689753,aq_23
1,"Jinja Main Street, Jinja",0.437337,33.211051,Jinja,2021-09-01 01:00:00+00:00,,,,,60d058c8048305120d2d6142,689753,aq_23
2,"Jinja Main Street, Jinja",0.437337,33.211051,Jinja,2021-09-01 02:00:00+00:00,,,,,60d058c8048305120d2d6142,689753,aq_23
3,"Jinja Main Street, Jinja",0.437337,33.211051,Jinja,2021-09-01 03:00:00+00:00,,,,,60d058c8048305120d2d6142,689753,aq_23
4,"Jinja Main Street, Jinja",0.437337,33.211051,Jinja,2021-09-01 04:00:00+00:00,,,,,60d058c8048305120d2d6142,689753,aq_23


In [3]:
jinja_df.tail()

Unnamed: 0,site_name,latitude,longitude,city,timestamp,pm2_5_calibrated_value,pm2_5_raw_value,pm10_raw_value,pm10_calibrated_value,site_id,device_number,device_name
16827,"Mpumudde, Jinja",0.463428,33.206262,Jinja,2021-11-29 19:00:00+00:00,37.4454,47.906395,57.335814,47.493504,615c878d580358002ae96dc2,1379965,aq_g532
16828,"Mpumudde, Jinja",0.463428,33.206262,Jinja,2021-11-29 20:00:00+00:00,57.2599,61.122143,73.658571,61.470257,615c878d580358002ae96dc2,1379965,aq_g532
16829,"Mpumudde, Jinja",0.463428,33.206262,Jinja,2021-11-29 21:00:00+00:00,27.6905,40.096667,48.702381,38.925092,615c878d580358002ae96dc2,1379965,aq_g532
16830,"Mpumudde, Jinja",0.463428,33.206262,Jinja,2021-11-29 22:00:00+00:00,17.8444,32.145714,36.381786,30.198896,615c878d580358002ae96dc2,1379965,aq_g532
16831,"Mpumudde, Jinja",0.463428,33.206262,Jinja,2021-11-29 23:00:00+00:00,18.1647,32.135,36.75,30.314966,615c878d580358002ae96dc2,1379965,aq_g532


In [5]:
latitudes = jinja_df['latitude'].unique()
longitudes = jinja_df['longitude'].unique()
device_ids = jinja_df['device_number'].unique()
len(latitudes), len(longitudes), len(device_ids)

(10, 10, 10)

In [6]:
final_df = pd.DataFrame()
cols = ['timestamp', 'latitude', 'longitude', 'pm2_5_calibrated_value']
for i, device_id in enumerate(device_ids):
    device_df = utils.get_device_data(jinja_df, device_id, cols)
    processed_df = utils.preprocessing(device_df)
    final_df = pd.concat([final_df, processed_df])
final_df.reset_index(drop=True, inplace=True)
final_df.head()

Unnamed: 0,time,latitude,longitude,pm2_5
0,452909.0,0.437337,33.211051,12.2844
1,452910.0,0.437337,33.211051,11.6507
2,452911.0,0.437337,33.211051,22.398
3,452912.0,0.437337,33.211051,17.4937
4,452913.0,0.437337,33.211051,25.1622


#### The real work

In [8]:
lengthscales = [[0.008, 0.008, 2], None, 'train_shape']
likelihood_variances = [400, 625, None]
kernel_variances = [400, 625, None]
trainable_kernels = [False, True]
trainable_variances = [False, True]
trainable_lengthscales = [False, True]

In [12]:
results_df = pd.DataFrame()
count = 0
for lengthscale in lengthscales:
    for likelihood_variance in likelihood_variances:
        for kernel_variance in kernel_variances:
            for trainable_kernel in trainable_kernels:
                for trainable_variance in trainable_variances:
                    for trainable_lengthscale in trainable_lengthscales:
                        count+=1
                        rmse_list = []
                        print(f'EXPERIMENT {count}')
                        for i in range(len(latitudes)):
                            try:
                                rmse = cross_validation(final_df, i, kernel_variance, lengthscale,
                                                        likelihood_variance, trainable_kernel, trainable_variance,
                                                        trainable_lengthscale)
                                rmse_list.append(rmse)
                                print(f'{device_ids[i]} successful')
                            except Exception as e:
                                print(e)
                                print(f'{device_ids[i]} failed')
                            
                        mean_rmse = np.mean(rmse_list)
                        results_dict= {'lengthscale':lengthscale, 'likelihood_variance':likelihood_variance, 
                                       'kernel_variance':kernel_variance, 'trainable_kernel':trainable_kernel, 
                                       'trainable_variance':trainable_variance, 
                                       'trainable_lengthscale':trainable_lengthscale, 'avg_rmse':mean_rmse, 
                                       'rmse_list':rmse_list}
                        print(results_dict)
                        results_df = results_df.append(results_dict, ignore_index=True)
                        results_df.to_csv(f'../results/basic_results_{count}.csv', index=False)               

EXPERIMENT 1
Instructions for updating:
Lambda fuctions will be no more assumed to be used in the statement where they are used, or at least in the same block. https://github.com/tensorflow/tensorflow/issues/56089
689753 successful
1014698 successful
1014692 successful
{'lengthscale': None, 'likelihood_variance': None, 'kernel_variance': None, 'trainable_kernel': True, 'trainable_variance': True, 'trainable_lengthscale': True, 'avg_rmse': 14.494394716616823, 'rmse_list': [9.197245008922383, 21.38619316947233, 12.899745971455749]}
EXPERIMENT 2
689753 successful
1014698 successful
1014692 successful
{'lengthscale': None, 'likelihood_variance': None, 'kernel_variance': None, 'trainable_kernel': True, 'trainable_variance': True, 'trainable_lengthscale': False, 'avg_rmse': 15.09476851187678, 'rmse_list': [13.707817552141956, 19.360463598358283, 12.216024385130098]}
EXPERIMENT 3
689753 successful
1014698 successful
1014692 successful
{'lengthscale': None, 'likelihood_variance': None, 'kernel

689753 successful
1014698 successful
1014692 successful
{'lengthscale': None, 'likelihood_variance': None, 'kernel_variance': 600, 'trainable_kernel': False, 'trainable_variance': False, 'trainable_lengthscale': False, 'avg_rmse': 15.096816760237395, 'rmse_list': [13.663788059217845, 19.367773369570443, 12.2588888519239]}
EXPERIMENT 25
689753 successful
1014698 successful
1014692 successful
{'lengthscale': None, 'likelihood_variance': 425, 'kernel_variance': None, 'trainable_kernel': True, 'trainable_variance': True, 'trainable_lengthscale': True, 'avg_rmse': 15.594641057689678, 'rmse_list': [9.197229061039794, 23.570095120900536, 14.016598991128703]}
EXPERIMENT 26
689753 successful
1014698 successful
1014692 successful
{'lengthscale': None, 'likelihood_variance': 425, 'kernel_variance': None, 'trainable_kernel': True, 'trainable_variance': True, 'trainable_lengthscale': False, 'avg_rmse': 17.098144709477555, 'rmse_list': [13.707736998505732, 23.57009686258952, 14.016600267337418]}
EXP

689753 successful
1014698 successful
1014692 successful
{'lengthscale': None, 'likelihood_variance': 600, 'kernel_variance': None, 'trainable_kernel': True, 'trainable_variance': True, 'trainable_lengthscale': True, 'avg_rmse': 15.594653736797595, 'rmse_list': [9.197269458809183, 23.57009375931784, 14.01659799226576]}
EXPERIMENT 50
689753 successful
1014698 successful
1014692 successful
{'lengthscale': None, 'likelihood_variance': 600, 'kernel_variance': None, 'trainable_kernel': True, 'trainable_variance': True, 'trainable_lengthscale': False, 'avg_rmse': 17.098181636255514, 'rmse_list': [13.70784995555286, 23.570095606919587, 14.01659934629409]}
EXPERIMENT 51
689753 successful
1014698 successful
1014692 successful
{'lengthscale': None, 'likelihood_variance': 600, 'kernel_variance': None, 'trainable_kernel': True, 'trainable_variance': False, 'trainable_lengthscale': True, 'avg_rmse': 15.634690528069248, 'rmse_list': [9.144940900634925, 23.66868820062335, 14.090442482949474]}
EXPERIME

689753 successful
1014698 successful
1014692 successful
{'lengthscale': [0.008, 0.008, 2], 'likelihood_variance': None, 'kernel_variance': None, 'trainable_kernel': True, 'trainable_variance': True, 'trainable_lengthscale': False, 'avg_rmse': 14.858071801872782, 'rmse_list': [8.656917646810474, 22.516110008994783, 13.401187749813094]}
EXPERIMENT 75
689753 successful
1014698 successful
1014692 successful
{'lengthscale': [0.008, 0.008, 2], 'likelihood_variance': None, 'kernel_variance': None, 'trainable_kernel': True, 'trainable_variance': False, 'trainable_lengthscale': True, 'avg_rmse': 14.658187301100503, 'rmse_list': [9.476855735150055, 21.5684054853013, 12.92930068285015]}
EXPERIMENT 76
689753 successful
1014698 successful
1014692 successful
{'lengthscale': [0.008, 0.008, 2], 'likelihood_variance': None, 'kernel_variance': None, 'trainable_kernel': True, 'trainable_variance': False, 'trainable_lengthscale': False, 'avg_rmse': 14.658187301100503, 'rmse_list': [9.476855735150055, 21.5

689753 successful
1014698 successful
1014692 successful
{'lengthscale': [0.008, 0.008, 2], 'likelihood_variance': 425, 'kernel_variance': None, 'trainable_kernel': True, 'trainable_variance': True, 'trainable_lengthscale': False, 'avg_rmse': 15.594629228712806, 'rmse_list': [9.197192741522338, 23.57009560113042, 14.016599343485666]}
EXPERIMENT 99
689753 successful
1014698 successful
1014692 successful
{'lengthscale': [0.008, 0.008, 2], 'likelihood_variance': 425, 'kernel_variance': None, 'trainable_kernel': True, 'trainable_variance': False, 'trainable_lengthscale': True, 'avg_rmse': 15.627270649857358, 'rmse_list': [9.179210192257944, 23.636561617708672, 14.066040139605454]}
EXPERIMENT 100
689753 successful
1014698 successful
1014692 successful
{'lengthscale': [0.008, 0.008, 2], 'likelihood_variance': 425, 'kernel_variance': None, 'trainable_kernel': True, 'trainable_variance': False, 'trainable_lengthscale': False, 'avg_rmse': 15.627270649857358, 'rmse_list': [9.179210192257944, 23.6

689753 successful
1014698 successful
1014692 successful
{'lengthscale': [0.008, 0.008, 2], 'likelihood_variance': 600, 'kernel_variance': None, 'trainable_kernel': True, 'trainable_variance': True, 'trainable_lengthscale': False, 'avg_rmse': 15.594499156385474, 'rmse_list': [9.196806929337153, 23.57009306003109, 14.016597479788182]}
EXPERIMENT 123
689753 successful
1014698 successful
1014692 successful
{'lengthscale': [0.008, 0.008, 2], 'likelihood_variance': 600, 'kernel_variance': None, 'trainable_kernel': True, 'trainable_variance': False, 'trainable_lengthscale': True, 'avg_rmse': 15.634690739335504, 'rmse_list': [9.144939410498765, 23.66868938452345, 14.0904434229843]}
EXPERIMENT 124
689753 successful
1014698 successful
1014692 successful
{'lengthscale': [0.008, 0.008, 2], 'likelihood_variance': 600, 'kernel_variance': None, 'trainable_kernel': True, 'trainable_variance': False, 'trainable_lengthscale': False, 'avg_rmse': 15.634690739335504, 'rmse_list': [9.144939410498765, 23.668

689753 successful
1014698 successful
1014692 successful
{'lengthscale': 'train_shape', 'likelihood_variance': None, 'kernel_variance': None, 'trainable_kernel': True, 'trainable_variance': True, 'trainable_lengthscale': False, 'avg_rmse': 15.09476851187678, 'rmse_list': [13.707817552141956, 19.360463598358283, 12.216024385130098]}
EXPERIMENT 147
689753 successful
1014698 successful
1014692 successful
{'lengthscale': 'train_shape', 'likelihood_variance': None, 'kernel_variance': None, 'trainable_kernel': True, 'trainable_variance': False, 'trainable_lengthscale': True, 'avg_rmse': 14.543308101150123, 'rmse_list': [12.070171890395828, 19.342243268901967, 12.217509144152572]}
EXPERIMENT 148
689753 successful
1014698 successful
1014692 successful
{'lengthscale': 'train_shape', 'likelihood_variance': None, 'kernel_variance': None, 'trainable_kernel': True, 'trainable_variance': False, 'trainable_lengthscale': False, 'avg_rmse': 15.11122940027712, 'rmse_list': [13.67205310083204, 19.42875650

689753 successful
1014698 successful
1014692 successful
{'lengthscale': 'train_shape', 'likelihood_variance': 425, 'kernel_variance': None, 'trainable_kernel': True, 'trainable_variance': True, 'trainable_lengthscale': False, 'avg_rmse': 17.098144709477555, 'rmse_list': [13.707736998505732, 23.57009686258952, 14.016600267337418]}
EXPERIMENT 171
689753 successful
1014698 successful
1014692 successful
{'lengthscale': 'train_shape', 'likelihood_variance': 425, 'kernel_variance': None, 'trainable_kernel': True, 'trainable_variance': False, 'trainable_lengthscale': True, 'avg_rmse': 15.627270522746892, 'rmse_list': [9.179211071069615, 23.63656089857811, 14.066039598592951]}
EXPERIMENT 172
689753 successful
1014698 successful
1014692 successful
{'lengthscale': 'train_shape', 'likelihood_variance': 425, 'kernel_variance': None, 'trainable_kernel': True, 'trainable_variance': False, 'trainable_lengthscale': False, 'avg_rmse': 15.627270514678449, 'rmse_list': [9.179211071410053, 23.636560876396

689753 successful
1014698 successful
1014692 successful
{'lengthscale': 'train_shape', 'likelihood_variance': 600, 'kernel_variance': None, 'trainable_kernel': True, 'trainable_variance': True, 'trainable_lengthscale': False, 'avg_rmse': 17.098181636255514, 'rmse_list': [13.70784995555286, 23.570095606919587, 14.01659934629409]}
EXPERIMENT 195
689753 successful
1014698 successful
1014692 successful
{'lengthscale': 'train_shape', 'likelihood_variance': 600, 'kernel_variance': None, 'trainable_kernel': True, 'trainable_variance': False, 'trainable_lengthscale': True, 'avg_rmse': 15.634690189953597, 'rmse_list': [9.144939986504514, 23.668688121458217, 14.09044246189806]}
EXPERIMENT 196
689753 successful
1014698 successful
1014692 successful
{'lengthscale': 'train_shape', 'likelihood_variance': 600, 'kernel_variance': None, 'trainable_kernel': True, 'trainable_variance': False, 'trainable_lengthscale': False, 'avg_rmse': 15.634690484121515, 'rmse_list': [9.144940813988022, 23.6686881448888

In [13]:
results = pd.read_csv('../results/basic_results.csv')
results

FileNotFoundError: [Errno 2] No such file or directory: '../results/hyperparameter_results.csv'