In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
from pmdarima.arima import auto_arima
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.arima.model import ARIMA,ARIMAResults
from sklearn.metrics import mean_squared_error,mean_absolute_percentage_error
from statsmodels.tools.eval_measures import rmse

from sklearn.preprocessing import OneHotEncoder
import pickle
from statsmodels.tsa.statespace.sarimax import SARIMAX
from pmdarima.arima.utils import nsdiffs

In [2]:
#pip install --upgrade statsmodels

In [3]:
df_weekly= pd.read_csv('weekly_AQI (1).csv',index_col='DateTime',parse_dates=True)
df_weekly.head()

FileNotFoundError: [Errno 2] No such file or directory: 'weekly_AQI (1).csv'

In [4]:
df_weekly.shape

(2314, 2)

In [5]:
df_weekly.tail()

Unnamed: 0_level_0,id,AQI
DateTime,Unnamed: 1_level_1,Unnamed: 2_level_1
2022-09-18,ID035,71.848485
2022-09-25,ID035,59.061538
2022-10-02,ID035,56.682927
2022-10-09,ID035,52.505837
2022-10-16,ID035,59.309091


In [6]:
df_filtered = df_weekly[df_weekly['id'] != 'ID023']
df_filtered

Unnamed: 0_level_0,id,AQI
DateTime,Unnamed: 1_level_1,Unnamed: 2_level_1
2021-06-06,ID001,49.448276
2021-06-13,ID001,159.000000
2021-06-20,ID001,159.000000
2021-06-27,ID001,159.000000
2021-07-04,ID001,159.000000
...,...,...
2022-09-18,ID035,71.848485
2022-09-25,ID035,59.061538
2022-10-02,ID035,56.682927
2022-10-09,ID035,52.505837


In [7]:
df_filtered.id.unique()

array(['ID001', 'ID003', 'ID004', 'ID005', 'ID006', 'ID007', 'ID008',
       'ID009', 'ID010', 'ID011', 'ID012', 'ID013', 'ID014', 'ID015',
       'ID016', 'ID017', 'ID018', 'ID020', 'ID021', 'ID022', 'ID024',
       'ID025', 'ID026', 'ID027', 'ID028', 'ID029', 'ID030', 'ID031',
       'ID032', 'ID033', 'ID034', 'ID035'], dtype=object)

In [9]:
class TSA:
    def __init__(self,df,idcol,loc):
        self.df = df
        self.idcol = idcol
        self.loc = loc
        
        
    def zone_df(self):
        self.df = self.df[self.df[self.idcol]== self.loc]
        print(self.loc)
    
    def adf_test(self,valcol):
        """
        Pass in a time series and an optional title, returns an ADF report
        """
        print('Testing')
        result = adfuller(self.df[valcol].dropna(),autolag='AIC') # .dropna() handles differenced data
        print('result')
    
        labels = ['ADF test statistic','p-value','# lags used','# observations']
        out = pd.Series(result[0:4],index=labels)

        for key,val in result[4].items():
            out[f'critical value ({key})']=val
        
        print(out.to_string())          # .to_string() removes the line "dtype: float64"
    
        if result[1] <= 0.05:
            print("Strong evidence against the null hypothesis")
            print("Reject the null hypothesis")
            print("Data has no unit root and is stationary")
        else:
            print("Weak evidence against the null hypothesis")
            print("Fail to reject the null hypothesis")
            print("Data has a unit root and is non-stationary")
    def determine_ARIMA_order(self,valcol):
        stepwise_fit = auto_arima(self.df[valcol], start_p=0, start_q=0,
                          error_action='ignore',   # we don't want to know if an order does not work
                          suppress_warnings=True,  # we don't want convergence warnings
                          stepwise=True)           # set to stepwise
        best_order = stepwise_fit.get_params().get('order')
        print('The best order is '.format(best_order))
        return best_order
    def fit_model(self,valcol):
        if len(self.df[valcol]) > 70:
            train = self.df[valcol][:len(self.df[valcol])-8]
            test = self.df[valcol][len(self.df[valcol])-8:]
        else:
            train = self.df[valcol][:len(self.df[valcol])-4]
            test = self.df[valcol][len(self.df[valcol])-4:]
        start = len(train)
        end = len(train)+len(test)-1
#             print('train : {}'.format(train))
#             print('test : {}'.format(test))
        print('start : {}'.format(start))
        print('end : {}'.format(end))
        results = ARIMA(train,order=c1.determine_ARIMA_order(valcol)).fit()
        predictions = results.predict(start=start, end=end)
        error1 = mean_squared_error(test, predictions)
        error2 = rmse(test, predictions)
        print(f'MSE Error: {error1:11.10}')
        print(f'RMSE Error: {error2:11.10}')
            
    def full_data_model(self,valcol):
        results = ARIMA(self.df[valcol],order=c1.determine_ARIMA_order(valcol)).fit()
        
        with open('TSA_AQI_{}.pkl'.format(self.loc),'wb')as f:
            pickle.dump(results,f)
            
        order=c1.determine_ARIMA_order(valcol)
        with open('Order_TSA_AQI_{}.pkl'.format(self.loc),'wb')as f:
            pickle.dump(order,f)
            
        if len(self.df[valcol]) > 70:
            fcast = results.predict(len(self.df), len(self.df)+7,typ='levels').round(2)
        else:
            fcast = results.predict(len(self.df), len(self.df)+3,typ='levels').round(2)
#         ax = self.df[valcol].plot(legend=True,figsize=(12,6))
#         fcast.plot(legend=True)
        print(order)
        print(fcast)
        DF = pd.DataFrame(self.df[valcol])
        DF['Type'] = 'Actual'
        DF_fcast = pd.DataFrame(fcast)
        DF_fcast = DF_fcast.rename(columns={'predicted_mean':'AQI'})
        DF_fcast['Type'] = 'Predicted'
        final_DF =  pd.concat([DF,DF_fcast], ignore_index=True)
        final_DF = final_DF.reset_index()
        final_DF = final_DF.rename(columns={'index':'Date'})
        print(final_DF)
        #final_DF.to_json('/Users/nithingopinath/Desktop/Bayesian Ways/AQI Deployment\{}.json'.format(item),orient='records')

id_list = list(df_filtered.id.unique())
for item in id_list:
    c1 = TSA(df_weekly,'id',item)
    c1.zone_df()
    c1.adf_test('AQI')
    c1.determine_ARIMA_order('AQI')
    c1.fit_model('AQI')
    c1.full_data_model('AQI')

ID001
Testing
result
ADF test statistic      -1.389406
p-value                  0.587344
# lags used              1.000000
# observations          70.000000
critical value (1%)     -3.527426
critical value (5%)     -2.903811
critical value (10%)    -2.589320
Weak evidence against the null hypothesis
Fail to reject the null hypothesis
Data has a unit root and is non-stationary
The best order is 
start : 64
end : 71
The best order is 
MSE Error: 387.9975279
RMSE Error: 19.69765285


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


The best order is 


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


The best order is 
(0, 1, 1)
2022-10-23    59.08
2022-10-30    59.08
2022-11-06    59.08
2022-11-13    59.08
2022-11-20    59.08
2022-11-27    59.08
2022-12-04    59.08
2022-12-11    59.08
Freq: W-SUN, Name: predicted_mean, dtype: float64
    Date         AQI       Type
0      0   49.448276     Actual
1      1  159.000000     Actual
2      2  159.000000     Actual
3      3  159.000000     Actual
4      4  159.000000     Actual
..   ...         ...        ...
75    75   59.080000  Predicted
76    76   59.080000  Predicted
77    77   59.080000  Predicted
78    78   59.080000  Predicted
79    79   59.080000  Predicted

[80 rows x 3 columns]
ID003
Testing
result
ADF test statistic     -1.563377e+15
p-value                 0.000000e+00
# lags used             0.000000e+00
# observations          7.100000e+01
critical value (1%)    -3.526005e+00
critical value (5%)    -2.903200e+00
critical value (10%)   -2.588995e+00
Strong evidence against the null hypothesis
Reject the null hypothesis
Dat

  llf = -nobs2*np.log(2*np.pi) - nobs2*np.log(ssr / nobs) - nobs2


The best order is 
start : 64
end : 71
The best order is 
MSE Error: 1.069921296
RMSE Error: 1.034369999


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


The best order is 


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


The best order is 
(0, 0, 0)
2022-10-23    169.92
2022-10-30    169.92
2022-11-06    169.92
2022-11-13    169.92
2022-11-20    169.92
2022-11-27    169.92
2022-12-04    169.92
2022-12-11    169.92
Freq: W-SUN, Name: predicted_mean, dtype: float64
    Date     AQI       Type
0      0  235.20     Actual
1      1  169.00     Actual
2      2  169.00     Actual
3      3  169.00     Actual
4      4  169.00     Actual
..   ...     ...        ...
75    75  169.92  Predicted
76    76  169.92  Predicted
77    77  169.92  Predicted
78    78  169.92  Predicted
79    79  169.92  Predicted

[80 rows x 3 columns]
ID004
Testing
result
ADF test statistic      -1.200950
p-value                  0.673187
# lags used             12.000000
# observations          59.000000
critical value (1%)     -3.546395
critical value (5%)     -2.911939
critical value (10%)    -2.593652
Weak evidence against the null hypothesis
Fail to reject the null hypothesis
Data has a unit root and is non-stationary




The best order is 
start : 64
end : 71
The best order is 
MSE Error: 4.580436791e-05
RMSE Error: 0.006767892427


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


The best order is 


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


The best order is 
(0, 1, 0)
2022-10-23    33.0
2022-10-30    33.0
2022-11-06    33.0
2022-11-13    33.0
2022-11-20    33.0
2022-11-27    33.0
2022-12-04    33.0
2022-12-11    33.0
Freq: W-SUN, Name: predicted_mean, dtype: float64
    Date    AQI       Type
0      0  159.0     Actual
1      1  159.0     Actual
2      2  159.0     Actual
3      3  228.0     Actual
4      4  366.0     Actual
..   ...    ...        ...
75    75   33.0  Predicted
76    76   33.0  Predicted
77    77   33.0  Predicted
78    78   33.0  Predicted
79    79   33.0  Predicted

[80 rows x 3 columns]
ID005
Testing
result
ADF test statistic      -2.634790
p-value                  0.086018
# lags used              1.000000
# observations          51.000000
critical value (1%)     -3.565624
critical value (5%)     -2.920142
critical value (10%)    -2.598015
Weak evidence against the null hypothesis
Fail to reject the null hypothesis
Data has a unit root and is non-stationary




The best order is 
start : 49
end : 52
The best order is 


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


MSE Error: 235.1673185
RMSE Error: 15.33516607
The best order is 


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


The best order is 
(1, 0, 3)
2022-10-23    108.78
2022-10-30    110.65
2022-11-06    109.31
2022-11-13    109.92
Freq: W-SUN, Name: predicted_mean, dtype: float64
    Date         AQI       Type
0      0  204.430380     Actual
1      1  186.338583     Actual
2      2  119.808000     Actual
3      3  169.241935     Actual
4      4  171.166667     Actual
5      5  113.215517     Actual
6      6  120.178344     Actual
7      7  157.279661     Actual
8      8  130.083333     Actual
9      9   95.254545     Actual
10    10   95.595506     Actual
11    11   99.767606     Actual
12    12   94.952381     Actual
13    13   84.000000     Actual
14    14   44.000000     Actual
15    15   89.322581     Actual
16    16   78.968750     Actual
17    17   65.000000     Actual
18    18   93.890688     Actual
19    19   43.000000     Actual
20    20   43.000000     Actual
21    21   43.000000     Actual
22    22   43.000000     Actual
23    23   43.000000     Actual
24    24   43.000000     Actual
25   



The best order is 
start : 64
end : 71
The best order is 
MSE Error: 8.384994173
RMSE Error: 2.895685441


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


The best order is 


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


The best order is 
(0, 1, 0)
2022-10-23    56.07
2022-10-30    56.07
2022-11-06    56.07
2022-11-13    56.07
2022-11-20    56.07
2022-11-27    56.07
2022-12-04    56.07
2022-12-11    56.07
Freq: W-SUN, Name: predicted_mean, dtype: float64
    Date     AQI       Type
0      0  135.00     Actual
1      1  135.00     Actual
2      2  135.00     Actual
3      3  135.00     Actual
4      4  135.00     Actual
..   ...     ...        ...
75    75   56.07  Predicted
76    76   56.07  Predicted
77    77   56.07  Predicted
78    78   56.07  Predicted
79    79   56.07  Predicted

[80 rows x 3 columns]
ID007
Testing
result
ADF test statistic      -1.961101
p-value                  0.303885
# lags used              3.000000
# observations          68.000000
critical value (1%)     -3.530399
critical value (5%)     -2.905087
critical value (10%)    -2.590001
Weak evidence against the null hypothesis
Fail to reject the null hypothesis
Data has a unit root and is non-stationary




The best order is 
start : 64
end : 71
The best order is 
MSE Error:         0.0
RMSE Error:         0.0


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


The best order is 


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


The best order is 
(0, 1, 0)
2022-10-23    31.0
2022-10-30    31.0
2022-11-06    31.0
2022-11-13    31.0
2022-11-20    31.0
2022-11-27    31.0
2022-12-04    31.0
2022-12-11    31.0
Freq: W-SUN, Name: predicted_mean, dtype: float64
    Date         AQI       Type
0      0  176.724138     Actual
1      1  158.029630     Actual
2      2  157.257576     Actual
3      3  157.915094     Actual
4      4  160.603774     Actual
..   ...         ...        ...
75    75   31.000000  Predicted
76    76   31.000000  Predicted
77    77   31.000000  Predicted
78    78   31.000000  Predicted
79    79   31.000000  Predicted

[80 rows x 3 columns]
ID008
Testing
result
ADF test statistic      -0.655038
p-value                  0.858055
# lags used              8.000000
# observations          63.000000
critical value (1%)     -3.538695
critical value (5%)     -2.908645
critical value (10%)    -2.591897
Weak evidence against the null hypothesis
Fail to reject the null hypothesis
Data has a unit root and i



The best order is 
start : 64
end : 71
The best order is 
MSE Error:         0.0
RMSE Error:         0.0


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


The best order is 


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


The best order is 
(0, 1, 0)
2022-10-23    218.0
2022-10-30    218.0
2022-11-06    218.0
2022-11-13    218.0
2022-11-20    218.0
2022-11-27    218.0
2022-12-04    218.0
2022-12-11    218.0
Freq: W-SUN, Name: predicted_mean, dtype: float64
    Date         AQI       Type
0      0  135.000000     Actual
1      1  135.000000     Actual
2      2  135.000000     Actual
3      3  188.925926     Actual
4      4  196.706897     Actual
..   ...         ...        ...
75    75  218.000000  Predicted
76    76  218.000000  Predicted
77    77  218.000000  Predicted
78    78  218.000000  Predicted
79    79  218.000000  Predicted

[80 rows x 3 columns]
ID009
Testing
result
ADF test statistic      -1.943009
p-value                  0.312107
# lags used              0.000000
# observations          71.000000
critical value (1%)     -3.526005
critical value (5%)     -2.903200
critical value (10%)    -2.588995
Weak evidence against the null hypothesis
Fail to reject the null hypothesis
Data has a unit ro



The best order is 
start : 64
end : 71
The best order is 
MSE Error: 294.4390204
RMSE Error: 17.15922552


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


The best order is 


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


The best order is 
(0, 1, 1)
2022-10-23    132.0
2022-10-30    132.0
2022-11-06    132.0
2022-11-13    132.0
2022-11-20    132.0
2022-11-27    132.0
2022-12-04    132.0
2022-12-11    132.0
Freq: W-SUN, Name: predicted_mean, dtype: float64
    Date         AQI       Type
0      0  195.675676     Actual
1      1  197.861314     Actual
2      2  198.444444     Actual
3      3  200.666667     Actual
4      4  219.888889     Actual
..   ...         ...        ...
75    75  132.000000  Predicted
76    76  132.000000  Predicted
77    77  132.000000  Predicted
78    78  132.000000  Predicted
79    79  132.000000  Predicted

[80 rows x 3 columns]
ID010
Testing
result
ADF test statistic      -4.273971
p-value                  0.000493
# lags used              6.000000
# observations          65.000000
critical value (1%)     -3.535217
critical value (5%)     -2.907154
critical value (10%)    -2.591103
Strong evidence against the null hypothesis
Reject the null hypothesis
Data has no unit root an



The best order is 
start : 64
end : 71
The best order is 
MSE Error:  31.9910251
RMSE Error: 5.656060917


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


The best order is 


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


The best order is 
(2, 0, 0)
2022-10-23    113.91
2022-10-30    115.09
2022-11-06    116.29
2022-11-13    117.39
2022-11-20    118.37
2022-11-27    119.21
2022-12-04    119.94
2022-12-11    120.56
Freq: W-SUN, Name: predicted_mean, dtype: float64
    Date     AQI       Type
0      0  205.50     Actual
1      1  229.00     Actual
2      2  229.00     Actual
3      3  229.00     Actual
4      4  229.00     Actual
..   ...     ...        ...
75    75  117.39  Predicted
76    76  118.37  Predicted
77    77  119.21  Predicted
78    78  119.94  Predicted
79    79  120.56  Predicted

[80 rows x 3 columns]
ID011
Testing
result
ADF test statistic      -3.131976
p-value                  0.024276
# lags used              0.000000
# observations          71.000000
critical value (1%)     -3.526005
critical value (5%)     -2.903200
critical value (10%)    -2.588995
Strong evidence against the null hypothesis
Reject the null hypothesis
Data has no unit root and is stationary




The best order is 
start : 64
end : 71
The best order is 
MSE Error: 248.2939525
RMSE Error: 15.75734599


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


The best order is 


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


The best order is 
(1, 0, 0)
2022-10-23    80.78
2022-10-30    84.49
2022-11-06    87.37
2022-11-13    89.60
2022-11-20    91.33
2022-11-27    92.68
2022-12-04    93.72
2022-12-11    94.53
Freq: W-SUN, Name: predicted_mean, dtype: float64
    Date         AQI       Type
0      0  175.600000     Actual
1      1  128.000000     Actual
2      2  128.000000     Actual
3      3  224.250000     Actual
4      4  239.538462     Actual
..   ...         ...        ...
75    75   89.600000  Predicted
76    76   91.330000  Predicted
77    77   92.680000  Predicted
78    78   93.720000  Predicted
79    79   94.530000  Predicted

[80 rows x 3 columns]
ID012
Testing
result
ADF test statistic     -7.739548e+00
p-value                 1.071004e-11
# lags used             1.200000e+01
# observations          5.900000e+01
critical value (1%)    -3.546395e+00
critical value (5%)    -2.911939e+00
critical value (10%)   -2.593652e+00
Strong evidence against the null hypothesis
Reject the null hypothesis
Dat



The best order is 
start : 64
end : 71
The best order is 
MSE Error:         0.0
RMSE Error:         0.0


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


The best order is 


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


The best order is 
(0, 1, 0)
2022-10-23    56.0
2022-10-30    56.0
2022-11-06    56.0
2022-11-13    56.0
2022-11-20    56.0
2022-11-27    56.0
2022-12-04    56.0
2022-12-11    56.0
Freq: W-SUN, Name: predicted_mean, dtype: float64
    Date         AQI       Type
0      0  141.750000     Actual
1      1  145.000000     Actual
2      2  145.000000     Actual
3      3  145.000000     Actual
4      4  217.589744     Actual
..   ...         ...        ...
75    75   56.000000  Predicted
76    76   56.000000  Predicted
77    77   56.000000  Predicted
78    78   56.000000  Predicted
79    79   56.000000  Predicted

[80 rows x 3 columns]
ID013
Testing
result
ADF test statistic      -1.858488
p-value                  0.351835
# lags used              1.000000
# observations          70.000000
critical value (1%)     -3.527426
critical value (5%)     -2.903811
critical value (10%)    -2.589320
Weak evidence against the null hypothesis
Fail to reject the null hypothesis
Data has a unit root and i



The best order is 
start : 64
end : 71
The best order is 
MSE Error:         0.0
RMSE Error:         0.0


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


The best order is 


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


The best order is 
(0, 1, 0)
2022-10-23    304.0
2022-10-30    304.0
2022-11-06    304.0
2022-11-13    304.0
2022-11-20    304.0
2022-11-27    304.0
2022-12-04    304.0
2022-12-11    304.0
Freq: W-SUN, Name: predicted_mean, dtype: float64
    Date         AQI       Type
0      0   98.353846     Actual
1      1   13.579439     Actual
2      2   16.297521     Actual
3      3   19.234694     Actual
4      4   14.707865     Actual
..   ...         ...        ...
75    75  304.000000  Predicted
76    76  304.000000  Predicted
77    77  304.000000  Predicted
78    78  304.000000  Predicted
79    79  304.000000  Predicted

[80 rows x 3 columns]
ID014
Testing
result
ADF test statistic      -4.300984
p-value                  0.000442
# lags used             11.000000
# observations          60.000000
critical value (1%)     -3.544369
critical value (5%)     -2.911073
critical value (10%)    -2.593190
Strong evidence against the null hypothesis
Reject the null hypothesis
Data has no unit root an



The best order is 
start : 64
end : 71
The best order is 
MSE Error: 2.428568804e-13
RMSE Error: 4.92805114e-07


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


The best order is 


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


The best order is 
(0, 1, 1)
2022-10-23    87.0
2022-10-30    87.0
2022-11-06    87.0
2022-11-13    87.0
2022-11-20    87.0
2022-11-27    87.0
2022-12-04    87.0
2022-12-11    87.0
Freq: W-SUN, Name: predicted_mean, dtype: float64
    Date         AQI       Type
0      0  192.532258     Actual
1      1  169.877551     Actual
2      2  170.976744     Actual
3      3  172.144144     Actual
4      4  177.379032     Actual
..   ...         ...        ...
75    75   87.000000  Predicted
76    76   87.000000  Predicted
77    77   87.000000  Predicted
78    78   87.000000  Predicted
79    79   87.000000  Predicted

[80 rows x 3 columns]
ID015
Testing
result
ADF test statistic      -1.887594
p-value                  0.337920
# lags used              2.000000
# observations          69.000000
critical value (1%)     -3.528890
critical value (5%)     -2.904440
critical value (10%)    -2.589656
Weak evidence against the null hypothesis
Fail to reject the null hypothesis
Data has a unit root and i



The best order is 
start : 64
end : 71
The best order is 
MSE Error: 6.572501805
RMSE Error: 2.563689101


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


The best order is 


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


The best order is 
(0, 1, 2)
2022-10-23    57.38
2022-10-30    55.62
2022-11-06    55.62
2022-11-13    55.62
2022-11-20    55.62
2022-11-27    55.62
2022-12-04    55.62
2022-12-11    55.62
Freq: W-SUN, Name: predicted_mean, dtype: float64
    Date         AQI       Type
0      0  216.325000     Actual
1      1  213.092857     Actual
2      2  212.419847     Actual
3      3  216.310345     Actual
4      4  218.520833     Actual
..   ...         ...        ...
75    75   55.620000  Predicted
76    76   55.620000  Predicted
77    77   55.620000  Predicted
78    78   55.620000  Predicted
79    79   55.620000  Predicted

[80 rows x 3 columns]
ID016
Testing
result
ADF test statistic      -1.767513
p-value                  0.396706
# lags used              0.000000
# observations          71.000000
critical value (1%)     -3.526005
critical value (5%)     -2.903200
critical value (10%)    -2.588995
Weak evidence against the null hypothesis
Fail to reject the null hypothesis
Data has a unit ro



The best order is 
start : 64
end : 71
The best order is 
MSE Error: 0.5162541712
RMSE Error: 0.7185082959


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


The best order is 


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


The best order is 
(0, 1, 0)
2022-10-23    51.0
2022-10-30    51.0
2022-11-06    51.0
2022-11-13    51.0
2022-11-20    51.0
2022-11-27    51.0
2022-12-04    51.0
2022-12-11    51.0
Freq: W-SUN, Name: predicted_mean, dtype: float64
    Date         AQI       Type
0      0  256.538462     Actual
1      1  500.000000     Actual
2      2  500.000000     Actual
3      3  500.000000     Actual
4      4  500.000000     Actual
..   ...         ...        ...
75    75   51.000000  Predicted
76    76   51.000000  Predicted
77    77   51.000000  Predicted
78    78   51.000000  Predicted
79    79   51.000000  Predicted

[80 rows x 3 columns]
ID017
Testing
result
ADF test statistic      -1.496549
p-value                  0.535218
# lags used              0.000000
# observations          71.000000
critical value (1%)     -3.526005
critical value (5%)     -2.903200
critical value (10%)    -2.588995
Weak evidence against the null hypothesis
Fail to reject the null hypothesis
Data has a unit root and i



The best order is 
start : 64
end : 71
The best order is 
MSE Error:  4.42332605
RMSE Error: 2.103170476


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


The best order is 


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


The best order is 
(0, 1, 0)
2022-10-23    17.0
2022-10-30    17.0
2022-11-06    17.0
2022-11-13    17.0
2022-11-20    17.0
2022-11-27    17.0
2022-12-04    17.0
2022-12-11    17.0
Freq: W-SUN, Name: predicted_mean, dtype: float64
    Date    AQI       Type
0      0  366.0     Actual
1      1  366.0     Actual
2      2  366.0     Actual
3      3  366.0     Actual
4      4  366.0     Actual
..   ...    ...        ...
75    75   17.0  Predicted
76    76   17.0  Predicted
77    77   17.0  Predicted
78    78   17.0  Predicted
79    79   17.0  Predicted

[80 rows x 3 columns]
ID018
Testing
result
ADF test statistic      -1.832963
p-value                  0.364224
# lags used              1.000000
# observations          70.000000
critical value (1%)     -3.527426
critical value (5%)     -2.903811
critical value (10%)    -2.589320
Weak evidence against the null hypothesis
Fail to reject the null hypothesis
Data has a unit root and is non-stationary




The best order is 
start : 64
end : 71
The best order is 
MSE Error: 646.2113683
RMSE Error:  25.4206878


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


The best order is 


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


The best order is 
(1, 0, 0)
2022-10-23    39.79
2022-10-30    44.98
2022-11-06    49.64
2022-11-13    53.82
2022-11-20    57.56
2022-11-27    60.92
2022-12-04    63.93
2022-12-11    66.63
Freq: W-SUN, Name: predicted_mean, dtype: float64
    Date         AQI       Type
0      0  162.969697     Actual
1      1  121.000000     Actual
2      2  121.000000     Actual
3      3  121.000000     Actual
4      4  209.318182     Actual
..   ...         ...        ...
75    75   53.820000  Predicted
76    76   57.560000  Predicted
77    77   60.920000  Predicted
78    78   63.930000  Predicted
79    79   66.630000  Predicted

[80 rows x 3 columns]
ID020
Testing
result
ADF test statistic      -2.251808
p-value                  0.187982
# lags used             12.000000
# observations          59.000000
critical value (1%)     -3.546395
critical value (5%)     -2.911939
critical value (10%)    -2.593652
Weak evidence against the null hypothesis
Fail to reject the null hypothesis
Data has a unit ro



The best order is 
start : 64
end : 71
The best order is 
MSE Error: 1472.219987
RMSE Error: 38.36951898


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-invertible starting MA parameters found.'


The best order is 


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


The best order is 
(2, 1, 1)
2022-10-23    97.96
2022-10-30    80.90
2022-11-06    56.06
2022-11-13    72.47
2022-11-20    80.62
2022-11-27    69.55
2022-12-04    68.58
2022-12-11    74.69
Freq: W-SUN, Name: predicted_mean, dtype: float64
    Date         AQI       Type
0      0  198.292683     Actual
1      1  185.000000     Actual
2      2  185.000000     Actual
3      3  198.102564     Actual
4      4  201.775510     Actual
..   ...         ...        ...
75    75   72.470000  Predicted
76    76   80.620000  Predicted
77    77   69.550000  Predicted
78    78   68.580000  Predicted
79    79   74.690000  Predicted

[80 rows x 3 columns]
ID021
Testing
result
ADF test statistic      -0.674984
p-value                  0.853123
# lags used              3.000000
# observations          68.000000
critical value (1%)     -3.530399
critical value (5%)     -2.905087
critical value (10%)    -2.590001
Weak evidence against the null hypothesis
Fail to reject the null hypothesis
Data has a unit ro



The best order is 
start : 64
end : 71
The best order is 
MSE Error: 630.6054805
RMSE Error: 25.11185936


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


The best order is 


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


The best order is 
(0, 1, 1)
2022-10-23    57.21
2022-10-30    57.21
2022-11-06    57.21
2022-11-13    57.21
2022-11-20    57.21
2022-11-27    57.21
2022-12-04    57.21
2022-12-11    57.21
Freq: W-SUN, Name: predicted_mean, dtype: float64
    Date         AQI       Type
0      0  222.586207     Actual
1      1  132.000000     Actual
2      2  132.000000     Actual
3      3  132.000000     Actual
4      4  132.000000     Actual
..   ...         ...        ...
75    75   57.210000  Predicted
76    76   57.210000  Predicted
77    77   57.210000  Predicted
78    78   57.210000  Predicted
79    79   57.210000  Predicted

[80 rows x 3 columns]
ID022
Testing
result
ADF test statistic      -2.121008
p-value                  0.236151
# lags used              0.000000
# observations          71.000000
critical value (1%)     -3.526005
critical value (5%)     -2.903200
critical value (10%)    -2.588995
Weak evidence against the null hypothesis
Fail to reject the null hypothesis
Data has a unit ro



The best order is 
start : 64
end : 71
The best order is 
MSE Error: 8.007423558
RMSE Error: 2.829739132


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


The best order is 


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


The best order is 
(0, 1, 0)
2022-10-23    41.0
2022-10-30    41.0
2022-11-06    41.0
2022-11-13    41.0
2022-11-20    41.0
2022-11-27    41.0
2022-12-04    41.0
2022-12-11    41.0
Freq: W-SUN, Name: predicted_mean, dtype: float64
    Date         AQI       Type
0      0  198.000000     Actual
1      1  195.510638     Actual
2      2  197.930769     Actual
3      3  198.661017     Actual
4      4  201.233333     Actual
..   ...         ...        ...
75    75   41.000000  Predicted
76    76   41.000000  Predicted
77    77   41.000000  Predicted
78    78   41.000000  Predicted
79    79   41.000000  Predicted

[80 rows x 3 columns]
ID024
Testing
result
ADF test statistic     -1.567191e+14
p-value                 0.000000e+00
# lags used             0.000000e+00
# observations          7.100000e+01
critical value (1%)    -3.526005e+00
critical value (5%)    -2.903200e+00
critical value (10%)   -2.588995e+00
Strong evidence against the null hypothesis
Reject the null hypothesis
Data has no

  llf = -nobs2*np.log(2*np.pi) - nobs2*np.log(ssr / nobs) - nobs2


The best order is 
start : 64
end : 71
The best order is 
MSE Error: 0.005624259142
RMSE Error: 0.07499506078


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


The best order is 


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


The best order is 
(0, 0, 0)
2022-10-23    135.07
2022-10-30    135.07
2022-11-06    135.07
2022-11-13    135.07
2022-11-20    135.07
2022-11-27    135.07
2022-12-04    135.07
2022-12-11    135.07
Freq: W-SUN, Name: predicted_mean, dtype: float64
    Date     AQI       Type
0      0  139.80     Actual
1      1  135.00     Actual
2      2  135.00     Actual
3      3  135.00     Actual
4      4  135.00     Actual
..   ...     ...        ...
75    75  135.07  Predicted
76    76  135.07  Predicted
77    77  135.07  Predicted
78    78  135.07  Predicted
79    79  135.07  Predicted

[80 rows x 3 columns]
ID025
Testing
result
ADF test statistic      -3.394535
p-value                  0.011147
# lags used              6.000000
# observations          65.000000
critical value (1%)     -3.535217
critical value (5%)     -2.907154
critical value (10%)    -2.591103
Strong evidence against the null hypothesis
Reject the null hypothesis
Data has no unit root and is stationary




The best order is 
start : 64
end : 71
The best order is 
MSE Error: 35.47624564
RMSE Error: 5.956193888


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


The best order is 


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


The best order is 
(1, 0, 1)
2022-10-23    128.33
2022-10-30    127.28
2022-11-06    126.33
2022-11-13    125.49
2022-11-20    124.73
2022-11-27    124.04
2022-12-04    123.43
2022-12-11    122.88
Freq: W-SUN, Name: predicted_mean, dtype: float64
    Date         AQI       Type
0      0  216.057143     Actual
1      1  217.153285     Actual
2      2  217.000000     Actual
3      3  218.045872     Actual
4      4  220.452174     Actual
..   ...         ...        ...
75    75  125.490000  Predicted
76    76  124.730000  Predicted
77    77  124.040000  Predicted
78    78  123.430000  Predicted
79    79  122.880000  Predicted

[80 rows x 3 columns]
ID026
Testing
result
ADF test statistic      -3.073235
p-value                  0.028598
# lags used             12.000000
# observations          59.000000
critical value (1%)     -3.546395
critical value (5%)     -2.911939
critical value (10%)    -2.593652
Strong evidence against the null hypothesis
Reject the null hypothesis
Data has no unit



The best order is 
start : 64
end : 71
The best order is 
MSE Error: 1.842582423
RMSE Error: 1.357417556


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


The best order is 


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


The best order is 
(0, 1, 0)
2022-10-23    55.0
2022-10-30    55.0
2022-11-06    55.0
2022-11-13    55.0
2022-11-20    55.0
2022-11-27    55.0
2022-12-04    55.0
2022-12-11    55.0
Freq: W-SUN, Name: predicted_mean, dtype: float64
    Date         AQI       Type
0      0  227.111111     Actual
1      1  228.877863     Actual
2      2  228.833333     Actual
3      3  228.978947     Actual
4      4  231.433962     Actual
..   ...         ...        ...
75    75   55.000000  Predicted
76    76   55.000000  Predicted
77    77   55.000000  Predicted
78    78   55.000000  Predicted
79    79   55.000000  Predicted

[80 rows x 3 columns]
ID027
Testing
result
ADF test statistic     -6.246985e+14
p-value                 0.000000e+00
# lags used             0.000000e+00
# observations          7.100000e+01
critical value (1%)    -3.526005e+00
critical value (5%)    -2.903200e+00
critical value (10%)   -2.588995e+00
Strong evidence against the null hypothesis
Reject the null hypothesis
Data has no

  llf = -nobs2*np.log(2*np.pi) - nobs2*np.log(ssr / nobs) - nobs2


The best order is 
start : 64
end : 71
The best order is 
MSE Error: 1.268218482
RMSE Error: 1.126152069


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


The best order is 


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


The best order is 
(0, 0, 0)
2022-10-23    133.0
2022-10-30    133.0
2022-11-06    133.0
2022-11-13    133.0
2022-11-20    133.0
2022-11-27    133.0
2022-12-04    133.0
2022-12-11    133.0
Freq: W-SUN, Name: predicted_mean, dtype: float64
    Date         AQI       Type
0      0  204.074074     Actual
1      1  132.000000     Actual
2      2  132.000000     Actual
3      3  132.000000     Actual
4      4  132.000000     Actual
..   ...         ...        ...
75    75  133.000000  Predicted
76    76  133.000000  Predicted
77    77  133.000000  Predicted
78    78  133.000000  Predicted
79    79  133.000000  Predicted

[80 rows x 3 columns]
ID028
Testing
result
ADF test statistic      -2.003011
p-value                  0.285254
# lags used              0.000000
# observations          28.000000
critical value (1%)     -3.688926
critical value (5%)     -2.971989
critical value (10%)    -2.625296
Weak evidence against the null hypothesis
Fail to reject the null hypothesis
Data has a unit ro



The best order is 
start : 25
end : 28
The best order is 
MSE Error: 8353.915598
RMSE Error:  91.3997571


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


The best order is 


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


The best order is 
(1, 0, 0)
2022-10-23    209.60
2022-10-30    235.83
2022-11-06    254.13
2022-11-13    266.90
Freq: W-SUN, Name: predicted_mean, dtype: float64
    Date         AQI       Type
0      0  277.844037     Actual
1      1  164.562500     Actual
2      2  147.000000     Actual
3      3  474.450000     Actual
4      4  500.000000     Actual
5      5  500.000000     Actual
6      6  500.000000     Actual
7      7  500.000000     Actual
8      8  500.000000     Actual
9      9  500.000000     Actual
10    10  500.000000     Actual
11    11  500.000000     Actual
12    12  171.463087     Actual
13    13  168.116197     Actual
14    14  169.146119     Actual
15    15  168.632184     Actual
16    16  177.000000     Actual
17    17  500.000000     Actual
18    18  500.000000     Actual
19    19  449.235507     Actual
20    20  173.771186     Actual
21    21  172.653846     Actual
22    22  172.716578     Actual
23    23  175.098266     Actual
24    24  173.522124     Actual
25   



The best order is 
start : 64
end : 71
The best order is 
MSE Error:         0.0
RMSE Error:         0.0


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


The best order is 


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


The best order is 
(0, 1, 0)
2022-10-23    35.0
2022-10-30    35.0
2022-11-06    35.0
2022-11-13    35.0
2022-11-20    35.0
2022-11-27    35.0
2022-12-04    35.0
2022-12-11    35.0
Freq: W-SUN, Name: predicted_mean, dtype: float64
    Date         AQI       Type
0      0  433.800000     Actual
1      1  500.000000     Actual
2      2  500.000000     Actual
3      3  500.000000     Actual
4      4  490.153846     Actual
..   ...         ...        ...
75    75   35.000000  Predicted
76    76   35.000000  Predicted
77    77   35.000000  Predicted
78    78   35.000000  Predicted
79    79   35.000000  Predicted

[80 rows x 3 columns]
ID030
Testing
result
ADF test statistic      -1.679049
p-value                  0.441938
# lags used              0.000000
# observations          71.000000
critical value (1%)     -3.526005
critical value (5%)     -2.903200
critical value (10%)    -2.588995
Weak evidence against the null hypothesis
Fail to reject the null hypothesis
Data has a unit root and i



The best order is 
start : 64
end : 71
The best order is 
MSE Error: 779.0309798
RMSE Error: 27.91112645


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


The best order is 


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


The best order is 
(2, 0, 0)
2022-10-23    62.05
2022-10-30    67.38
2022-11-06    72.16
2022-11-13    76.28
2022-11-20    79.82
2022-11-27    82.85
2022-12-04    85.44
2022-12-11    87.66
Freq: W-SUN, Name: predicted_mean, dtype: float64
    Date     AQI       Type
0      0  111.00     Actual
1      1  111.00     Actual
2      2  111.00     Actual
3      3  111.00     Actual
4      4  111.00     Actual
..   ...     ...        ...
75    75   76.28  Predicted
76    76   79.82  Predicted
77    77   82.85  Predicted
78    78   85.44  Predicted
79    79   87.66  Predicted

[80 rows x 3 columns]
ID031
Testing
result
ADF test statistic      -1.047350
p-value                  0.735605
# lags used              2.000000
# observations          69.000000
critical value (1%)     -3.528890
critical value (5%)     -2.904440
critical value (10%)    -2.589656
Weak evidence against the null hypothesis
Fail to reject the null hypothesis
Data has a unit root and is non-stationary




The best order is 
start : 64
end : 71
The best order is 
MSE Error:         0.0
RMSE Error:         0.0


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


The best order is 


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


The best order is 
(3, 1, 0)
2022-10-23    14.0
2022-10-30    14.0
2022-11-06    14.0
2022-11-13    14.0
2022-11-20    14.0
2022-11-27    14.0
2022-12-04    14.0
2022-12-11    14.0
Freq: W-SUN, Name: predicted_mean, dtype: float64
    Date         AQI       Type
0      0  159.107143     Actual
1      1  159.107143     Actual
2      2  159.107143     Actual
3      3  159.107143     Actual
4      4  159.107143     Actual
..   ...         ...        ...
75    75   14.000000  Predicted
76    76   14.000000  Predicted
77    77   14.000000  Predicted
78    78   14.000000  Predicted
79    79   14.000000  Predicted

[80 rows x 3 columns]
ID032
Testing
result
ADF test statistic      -2.329062
p-value                  0.162766
# lags used              0.000000
# observations          71.000000
critical value (1%)     -3.526005
critical value (5%)     -2.903200
critical value (10%)    -2.588995
Weak evidence against the null hypothesis
Fail to reject the null hypothesis
Data has a unit root and i



The best order is 
start : 64
end : 71
The best order is 
MSE Error: 497.2761352
RMSE Error: 22.29968913


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


The best order is 


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


The best order is 
(0, 1, 0)
2022-10-23    45.78
2022-10-30    45.78
2022-11-06    45.78
2022-11-13    45.78
2022-11-20    45.78
2022-11-27    45.78
2022-12-04    45.78
2022-12-11    45.78
Freq: W-SUN, Name: predicted_mean, dtype: float64
    Date         AQI       Type
0      0  211.322581     Actual
1      1  204.616541     Actual
2      2  208.730435     Actual
3      3  326.875000     Actual
4      4  314.878788     Actual
..   ...         ...        ...
75    75   45.780000  Predicted
76    76   45.780000  Predicted
77    77   45.780000  Predicted
78    78   45.780000  Predicted
79    79   45.780000  Predicted

[80 rows x 3 columns]
ID033
Testing
result
ADF test statistic      -1.616336
p-value                  0.474651
# lags used             11.000000
# observations          60.000000
critical value (1%)     -3.544369
critical value (5%)     -2.911073
critical value (10%)    -2.593190
Weak evidence against the null hypothesis
Fail to reject the null hypothesis
Data has a unit ro



The best order is 
start : 64
end : 71
The best order is 
MSE Error:         0.0
RMSE Error:         0.0


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


The best order is 


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


The best order is 
(3, 1, 0)
2022-10-23    30.0
2022-10-30    30.0
2022-11-06    30.0
2022-11-13    30.0
2022-11-20    30.0
2022-11-27    30.0
2022-12-04    30.0
2022-12-11    30.0
Freq: W-SUN, Name: predicted_mean, dtype: float64
    Date         AQI       Type
0      0  223.944444     Actual
1      1  219.626866     Actual
2      2  219.385246     Actual
3      3  220.190476     Actual
4      4  222.778846     Actual
..   ...         ...        ...
75    75   30.000000  Predicted
76    76   30.000000  Predicted
77    77   30.000000  Predicted
78    78   30.000000  Predicted
79    79   30.000000  Predicted

[80 rows x 3 columns]
ID034
Testing
result
ADF test statistic      -2.741582
p-value                  0.067115
# lags used             12.000000
# observations          59.000000
critical value (1%)     -3.546395
critical value (5%)     -2.911939
critical value (10%)    -2.593652
Weak evidence against the null hypothesis
Fail to reject the null hypothesis
Data has a unit root and i



The best order is 
start : 64
end : 71
The best order is 
MSE Error:         0.0
RMSE Error:         0.0


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


The best order is 


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


The best order is 
(0, 1, 0)
2022-10-23    133.0
2022-10-30    133.0
2022-11-06    133.0
2022-11-13    133.0
2022-11-20    133.0
2022-11-27    133.0
2022-12-04    133.0
2022-12-11    133.0
Freq: W-SUN, Name: predicted_mean, dtype: float64
    Date         AQI       Type
0      0  201.285714     Actual
1      1  201.285714     Actual
2      2  201.285714     Actual
3      3  201.285714     Actual
4      4  454.000000     Actual
..   ...         ...        ...
75    75  133.000000  Predicted
76    76  133.000000  Predicted
77    77  133.000000  Predicted
78    78  133.000000  Predicted
79    79  133.000000  Predicted

[80 rows x 3 columns]
ID035
Testing
result
ADF test statistic      -3.002933
p-value                  0.034621
# lags used              1.000000
# observations          70.000000
critical value (1%)     -3.527426
critical value (5%)     -2.903811
critical value (10%)    -2.589320
Strong evidence against the null hypothesis
Reject the null hypothesis
Data has no unit root an



The best order is 
start : 64
end : 71
The best order is 
MSE Error: 157.9075409
RMSE Error: 12.56612673


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


The best order is 


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


The best order is 
(0, 1, 0)
2022-10-23    59.31
2022-10-30    59.31
2022-11-06    59.31
2022-11-13    59.31
2022-11-20    59.31
2022-11-27    59.31
2022-12-04    59.31
2022-12-11    59.31
Freq: W-SUN, Name: predicted_mean, dtype: float64
    Date         AQI       Type
0      0  200.148148     Actual
1      1  214.000000     Actual
2      2  214.000000     Actual
3      3  214.000000     Actual
4      4  210.756757     Actual
..   ...         ...        ...
75    75   59.310000  Predicted
76    76   59.310000  Predicted
77    77   59.310000  Predicted
78    78   59.310000  Predicted
79    79   59.310000  Predicted

[80 rows x 3 columns]




In [10]:

#pickle_out = open("model.pkl","wb")
#pickle.dump(results, pickle_out)
#pickle_out.close()

In [11]:
#Arima_model = ARIMA(random_state=0).fit()
#Arima_model = ARIMA(train,order=c1.determine_ARIMA_order(valcol)).fit()
#pickle.dump(Arima_model, open("/Users/nithingopinath/Desktop/Bayesian Ways/AQI Deployment using fastapi/ArimaModel.pkl","wb"))

# SARIMA Model

In [12]:
class TSA_seasonal:
    def __init__(self, df,idcol,loc):
        self.df = df
        self.idcol = idcol
        self.loc = loc
    
    def zone_df(self):
        self.df = self.df[self.df[self.idcol]== self.loc]
        print(self.loc)
        
#     def find_D(self,valcol):
#          # estimate number of seasonal differences using a Canova-Hansen test
#         D = nsdiffs(self.df[valcol],m=12,
#             test='ch')  # -> 0
#         return D

    def adf_test(self, valcol):
        """
        Pass in a time series and an optional title, returns an ADF report
        """
        result = adfuller(self.df[valcol].dropna(), autolag='AIC')  # .dropna() handles differenced data

        labels = ['ADF test statistic', 'p-value', '# lags used', '# observations']
        out = pd.Series(result[0:4], index=labels)

        for key, val in result[4].items():
            out[f'critical value ({key})'] = val

        if result[1] <= 0.05:
            state = "Stationary"
        else:
            state = "Non-stationary"
        return state

    def determine_SARIMA_order(self, valcol):
#         D = self.find_D(valcol)
        stepwise_fit = auto_arima(self.df[valcol], seasonal=True, m=12,
#                               start_p=0, start_q=0,
#                               start_P=0, start_Q=0,
#                                   D=D,
                                  error_action='ignore',  # we don't want to know if an order does not work
                                  suppress_warnings=True,  # we don't want convergence warnings
                                  stepwise=True)  # set to stepwise
        best_order = stepwise_fit.get_params().get('order')
        best_seasonal_order = stepwise_fit.get_params().get('seasonal_order')
        print('The best seasonal order is {}'.format(best_seasonal_order))
        print('The best order is {}'.format(best_order))
        return best_order, best_seasonal_order

    def fit_model(self, valcol):
    # Split the data into train, test, and validation sets
        train = self.df[valcol][:len(self.df[valcol]) - 12]
        test = self.df[valcol][len(self.df[valcol]) - 12:len(self.df[valcol]) - 8]
        val = self.df[valcol][len(self.df[valcol]) - 8:]

        # Determine the best SARIMA order
        best_order, best_seasonal_order = self.determine_SARIMA_order(valcol)

        # Fit the SARIMA model on the training data
        model = SARIMAX(train, order=best_order, seasonal_order=best_seasonal_order)
        results = model.fit()

        # Generate predictions for the test set
        predictions = results.predict(start=len(train), end=len(train) + len(test) - 1)

        # Generate predictions for the validation set
        predictions_val = results.predict(start=len(train) + len(test), end=len(train) + len(test) + len(val) - 1)

        # Calculate error metrics for the test set
        error1 = mean_squared_error(test, predictions)
        error2 = mean_squared_error(test, predictions, squared=False)  # RMSE
        error3 = mean_absolute_percentage_error(test, predictions)
        accuracy = (1 - error3) * 100
        print(f'MSE Error: {error1:11.10}')
        print(f'RMSE Error: {error2:11.10}')
        print(f'MAPE Error: {error3:11.10}')
        print(f'Accuracy: {accuracy:11.10}')
        return predictions_val

    def full_data_model(self, valcol):
        best_order, best_seasonal_order = self.determine_SARIMA_order(valcol)
        model = SARIMAX(self.df[valcol], order=best_order, seasonal_order=best_seasonal_order)
        results = model.fit()
        fcast = results.forecast(steps=8).round(2)  # Forecast 4 steps ahead
#         print(fcast)
#         fcast_index = pd.date_range(start=self.df.index[-1], periods=4 + 1, freq='M')[1:]  # Assuming monthly data
        DF_fcast = pd.DataFrame({valcol: fcast})
        DF_fcast['Type'] = 'Predicted'
        print(DF_fcast)
        DF = pd.DataFrame(self.df[valcol])
        DF['Type'] = 'Actual'
        # Concatenate original data and forecast data
        combined_DF = pd.concat([DF, DF_fcast])
        combined_DF = combined_DF.reset_index().rename(columns={'index':'Date'})
        print(combined_DF)
        DF_val = pd.DataFrame(c2.fit_model(valcol))
        DF_val = DF_val.reset_index()
        DF_val = DF_val.rename(columns={'index':'Date','predicted_mean':'Validation'})
#         combined_DF = combined_DF.rename(columns={'index':'Date'})
        print(DF_val)
        print(combined_DF)
#         final_DF =  final_DF.merge(DF_val, on='DateTime',how='outer')
        final_DF =  combined_DF.merge(DF_val, on='Date',how='outer')
        final_DF['Date'] = final_DF['Date'].astype('str')
#         print(combined_df)
        return final_DF

id_list = list(df_filtered.id.unique())
for item in id_list:
    c2 = TSA_seasonal(df_weekly,'id',item)
    c2.zone_df()
#     c2.find_D('AQI')
    c2.adf_test('AQI')
    c2.determine_SARIMA_order('AQI')
    c2.fit_model('AQI')
    c2.full_data_model('AQI')
    


ID001
The best seasonal order is (0, 0, 0, 12)
The best order is (0, 1, 1)
The best seasonal order is (0, 0, 0, 12)
The best order is (0, 1, 1)
MSE Error: 95.45083029
RMSE Error: 9.769894078
MAPE Error: 0.2496423892
Accuracy: 75.03576108


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


The best seasonal order is (0, 0, 0, 12)
The best order is (0, 1, 1)
              AQI       Type
2022-10-23  59.08  Predicted
2022-10-30  59.08  Predicted
2022-11-06  59.08  Predicted
2022-11-13  59.08  Predicted
2022-11-20  59.08  Predicted
2022-11-27  59.08  Predicted
2022-12-04  59.08  Predicted
2022-12-11  59.08  Predicted
         Date         AQI       Type
0  2021-06-06   49.448276     Actual
1  2021-06-13  159.000000     Actual
2  2021-06-20  159.000000     Actual
3  2021-06-27  159.000000     Actual
4  2021-07-04  159.000000     Actual
..        ...         ...        ...
75 2022-11-13   59.080000  Predicted
76 2022-11-20   59.080000  Predicted
77 2022-11-27   59.080000  Predicted
78 2022-12-04   59.080000  Predicted
79 2022-12-11   59.080000  Predicted

[80 rows x 3 columns]


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


The best seasonal order is (0, 0, 0, 12)
The best order is (0, 1, 1)
MSE Error: 95.45083029
RMSE Error: 9.769894078
MAPE Error: 0.2496423892
Accuracy: 75.03576108
        Date  Validation
0 2022-08-28   29.991575
1 2022-09-04   29.991575
2 2022-09-11   29.991575
3 2022-09-18   29.991575
4 2022-09-25   29.991575
5 2022-10-02   29.991575
6 2022-10-09   29.991575
7 2022-10-16   29.991575
         Date         AQI       Type
0  2021-06-06   49.448276     Actual
1  2021-06-13  159.000000     Actual
2  2021-06-20  159.000000     Actual
3  2021-06-27  159.000000     Actual
4  2021-07-04  159.000000     Actual
..        ...         ...        ...
75 2022-11-13   59.080000  Predicted
76 2022-11-20   59.080000  Predicted
77 2022-11-27   59.080000  Predicted
78 2022-12-04   59.080000  Predicted
79 2022-12-11   59.080000  Predicted

[80 rows x 3 columns]
ID003


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  llf = -nobs2*np.log(2*np.pi) - nobs2*np.log(ssr / nobs) - nobs2


ValueError: All lag values up to 'maxlag' produced singular matrices. Consider using a longer series, a different lag term or a different test.