In [1]:
#import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np, pandas as pd
from fbprophet import Prophet
import math
from sklearn.metrics import mean_squared_error, mean_absolute_error
from matplotlib import pyplot
from pmdarima.arima.utils import ndiffs
from statsmodels.tsa.stattools import adfuller

import warnings
warnings.filterwarnings("ignore")

  from pandas import (to_datetime, Int64Index, DatetimeIndex, Period,
  from pandas import (to_datetime, Int64Index, DatetimeIndex, Period,


In [2]:
#DATA PREPARATION FOR UNIVARIATE PROPHET MODEL

#GATHER ATHENS DATA FROM ALL AVAILABLE YEARS
Athens_Data = pd.read_csv('Athens_data.csv')

Athens_Data.rename(columns = {Athens_Data.columns[0]:'ds'}, inplace = True)
#KEEP ONLY THE POLLUTANTS COLUMNS
cols=['pm25', 'pm10', 'no2', 'o3', 'so2', 'ds']

#MAKE VALUES NUMERIC
Athens_Data_uni = Athens_Data.loc[:, Athens_Data.columns.intersection(cols)]
Athens_Data_uni[['pm25', 'pm10', 'no2', 'o3', 'so2']] = Athens_Data_uni[['pm25', 'pm10', 'no2', 'o3', 'so2']].apply(pd.to_numeric, errors='coerce')

#FILL NAN VALUES
Athens_Data_uni = Athens_Data_uni.iloc[1: , :]

#SOME VALUES ARE EMPTY, BUT NOT RECOGNIZED AS NAN. REPLACE EMPTY VALUES WITH NAN TO FILL THEM LATER
Athens_Data_uni = Athens_Data_uni.replace(r'^\s*$', np.nan, regex=True)
Athens_Data_uni = Athens_Data_uni.fillna(method='ffill')
Athens_Data_uni = Athens_Data_uni.fillna(method='bfill')

#REMOVE DUPLICATE VALUES
Athens_Data_uni = Athens_Data_uni[~Athens_Data_uni.index.duplicated(keep='first')]

#Fb prophet model predicts univariate time series. It requires that the Date index column is named 'ds'
#and the values column is named 'y'
Athens_Data_uni['ds'] = pd.DatetimeIndex(Athens_Data_uni['ds'])

In [3]:
Athens_Data_uni

Unnamed: 0,ds,no2,o3,pm10,pm25,so2
1,2019-11-08,0.423792,0.470135,0.442623,0.352381,0.039683
2,2019-11-09,0.442379,0.454721,0.213115,0.228571,0.039683
3,2019-11-10,0.237918,0.516378,0.131148,0.152381,0.039683
4,2019-11-11,0.356877,0.375723,0.229508,0.180952,0.039683
5,2019-11-12,0.490706,0.470135,0.459016,0.485714,0.039683
...,...,...,...,...,...,...
834,2022-03-15,0.371747,0.539499,0.327869,0.419048,0.277778
835,2022-03-16,0.442379,0.547206,0.426230,0.542857,0.317460
836,2022-03-17,0.338290,0.587669,0.459016,0.504762,0.198413
837,2022-03-18,0.204461,0.633911,0.295082,0.342857,0.079365


In [4]:
#Check data for stationarity to apply the ARIMA model.

#The null hypothesis is that the time series is non-stationary.
#Using the Augmented Dickey Fuller test (adfuller()) we can check for stationarity.
#If p-value < significance level(0.05), the we reject the null hypothesis.
#Otherwise, the time series is non-stationary and needs differencing.

print("Check if Athens_Data is stationary :")
for col in ['no2', 'o3', 'so2', 'pm10', 'pm25']:
    result = adfuller(Athens_Data[col])
    print('p-value for ' + col + ' : %f' % result[1])

Check if Athens_Data is stationary :
p-value for no2 : 0.000000
p-value for o3 : 0.185124
p-value for so2 : 0.000001
p-value for pm10 : 0.000000
p-value for pm25 : 0.000000


In [5]:
Athens_Data_uni['o3'] = Athens_Data_uni['o3'].diff()

In [6]:
def fbprophet_predict(time_series, col):
    
    #CREATE MODEL
    model = Prophet()
    
    #SPLIT TO TRAIN AND TEST PORTIONS
    train_size = 0.8
    train = time_series[:int(train_size*(len(time_series)))]
    test = time_series[int(train_size*(len(time_series))):]
    
    #FIT TRAIN DATA TO MODEL
    model.fit(train)
    
    #PREDICT THE TEST PORTION. THE PARAMETER IN THE PREDICT FUNCTION IS THE TIME INTERVAL TO PREDICT
    forecast = model.predict(test[['ds']])
    
    #FORECAST PREVIEW. 'ds' IS THE DAILY INDEX, 'yhat' IS THE FORECAST, 'yhat_lower' and 'yhat_upper' ARE THE
    #LOWER AND UPPER BOUND OF THE FORECASTED VALUE RESPECTIVELY.
    print(forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].head())
    print(test.reset_index(drop=True).head())
    
    #COMPUTE RMSE OF THE FORECASTED AND ACTUAL VALUE
    rmse = math.sqrt(mean_squared_error(test[['y']], forecast[['yhat']]))
    mse = mean_squared_error(test[['y']], forecast[['yhat']])
    mae = mean_absolute_error(test[['y']], forecast[['yhat']])
    
    print("FB Prophet RMSE for Athens[" + col + "]: " + str(rmse) + "\n")
    print("FB Prophet MSE for Athens[" + col + "]: " + str(mse) + "\n")
    print("FB Prophet MAE for Athens[" + col + "]: " + str(mae) + "\n")

    return test, forecast, rmse

In [7]:
#FB PROPHET REQUIRES THE DATAFRAME TO HAVE TWO COLUMNS. 'ds' AND 'y', 'ds' IS THE TIME INDEX AND 'y' IS THE
#VALUE OF THE POLLUTANT

#RENAME COLUMNS TO FIT TO THE PROPHET MODEL
Athens_o3 = Athens_Data_uni[['ds', 'o3']]
Athens_o3.rename(columns = {'o3':'y'}, inplace = True)

Athens_no2 = Athens_Data_uni[['ds', 'no2']]
Athens_no2.rename(columns = {'no2':'y'}, inplace = True)

Athens_so2 = Athens_Data_uni[['ds', 'so2']]
Athens_so2.rename(columns = {'so2':'y'}, inplace = True)

Athens_pm10 = Athens_Data_uni[['ds', 'pm10']]
Athens_pm10.rename(columns = {'pm10':'y'}, inplace = True)

Athens_pm25 = Athens_Data_uni[['ds', 'pm25']]
Athens_pm25.rename(columns = {'pm25':'y'}, inplace = True)

datasets = [Athens_o3, Athens_no2, Athens_so2, Athens_pm10, Athens_pm25]

In [8]:
pollutants = ['o3', 'no2', 'so2', 'pm10', 'pm25']

i=0

#EVALUATE MODEL FOR EVERY POLLUTANT
for data in datasets:
    test, forecast, error = fbprophet_predict(data, pollutants[i])
    i = i+1

INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


          ds      yhat  yhat_lower  yhat_upper
0 2021-09-08  0.006752   -0.122841    0.137250
1 2021-09-09  0.006488   -0.118690    0.130057
2 2021-09-10 -0.012038   -0.142443    0.114423
3 2021-09-11  0.011291   -0.113628    0.132303
4 2021-09-12  0.020931   -0.110356    0.157100
          ds         y
0 2021-09-08 -0.055877
1 2021-09-09 -0.053950
2 2021-09-10  0.023121
3 2021-09-11  0.078998
4 2021-09-12  0.211946
FB Prophet RMSE for Athens[o3]: 0.10655574865383032

FB Prophet MSE for Athens[o3]: 0.011354127571178261

FB Prophet MAE for Athens[o3]: 0.07654105528978698



INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


          ds      yhat  yhat_lower  yhat_upper
0 2021-09-08  0.272681    0.051158    0.476967
1 2021-09-09  0.249364    0.039475    0.450819
2 2021-09-10  0.281400    0.047735    0.488435
3 2021-09-11  0.244117    0.030128    0.472036
4 2021-09-12  0.164482   -0.038346    0.377382
          ds         y
0 2021-09-08  0.118959
1 2021-09-09  0.167286
2 2021-09-10  0.286245
3 2021-09-11  0.390335
4 2021-09-12  0.442379
FB Prophet RMSE for Athens[no2]: 0.23506819389679431

FB Prophet MSE for Athens[no2]: 0.055257055781900884

FB Prophet MAE for Athens[no2]: 0.18983884663712397



INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


          ds      yhat  yhat_lower  yhat_upper
0 2021-09-08  0.299989    0.164245    0.438990
1 2021-09-09  0.295024    0.170147    0.439437
2 2021-09-10  0.302028    0.162895    0.445232
3 2021-09-11  0.288599    0.158340    0.434759
4 2021-09-12  0.277152    0.138180    0.412191
          ds         y
0 2021-09-08  0.198413
1 2021-09-09  0.238095
2 2021-09-10  0.238095
3 2021-09-11  0.119048
4 2021-09-12  0.198413
FB Prophet RMSE for Athens[so2]: 0.20969681691063943

FB Prophet MSE for Athens[so2]: 0.043972755022454235

FB Prophet MAE for Athens[so2]: 0.1716231431952845



INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


          ds      yhat  yhat_lower  yhat_upper
0 2021-09-08  0.398105    0.209365    0.608405
1 2021-09-09  0.367147    0.166712    0.575600
2 2021-09-10  0.357534    0.153905    0.556900
3 2021-09-11  0.386527    0.189500    0.583334
4 2021-09-12  0.384672    0.185754    0.570382
          ds         y
0 2021-09-08  0.344262
1 2021-09-09  0.131148
2 2021-09-10  0.229508
3 2021-09-11  0.295082
4 2021-09-12  0.426230
FB Prophet RMSE for Athens[pm10]: 0.17229507034669883

FB Prophet MSE for Athens[pm10]: 0.0296855912657739

FB Prophet MAE for Athens[pm10]: 0.14431306937199231

          ds      yhat  yhat_lower  yhat_upper
0 2021-09-08  0.388271    0.220519    0.555538
1 2021-09-09  0.364057    0.203656    0.518729
2 2021-09-10  0.357505    0.190816    0.527460
3 2021-09-11  0.387461    0.221563    0.558216
4 2021-09-12  0.397610    0.235492    0.560122
          ds         y
0 2021-09-08  0.314286
1 2021-09-09  0.123810
2 2021-09-10  0.314286
3 2021-09-11  0.342857
4 2021-09-12  0.38095