In [55]:

import torch 
import neuralprophet
from neuralprophet import NeuralProphet, set_log_level
from neuralprophet import set_random_seed 
set_log_level("ERROR", "INFO")

import numpy as np
set_random_seed(0)
import pandas as pd
from sklearn.preprocessing import StandardScaler


def MSE(g_truth, measured):
    n=len(g_truth)
    diff=g_truth-measured
    return np.sum(np.square(diff)) / n 
    
    

from sklearn.metrics import mean_absolute_percentage_error
def MAPE(g_truth, measured):
    return (mean_absolute_percentage_error(g_truth,measured))

def wMAPE(g_truth, measured):
    nR = np.sum( np.absolute (g_truth-measured ))
    dR = np.sum( np.absolute (g_truth) )
    return (nR/dR)


def getPercentagedataset(N,n=96):
    print ("N, n", N,n)
    return ((n)/N)

def readDataset(csvFilename=""):
    df = pd.read_csv(csvFilename)
    print ("shape of dataset: ", df.shape)
    return df 

def ScaleDataset(df, date, numericalCols ):
    scaler = StandardScaler()
    # Fit the scaler to the data
    scaler.fit(df)
    # Transform the data using the scaler
    df = scaler.transform(df)
    df = pd.DataFrame(df)
    #df["date"] = date
    #numericalCols.append("date")
    numericalCols.insert(0,"date")
    print ("numericalCols: ",numericalCols)
    #df = df.set_axis(numericalCols, axis=1, inplace=False)
    df.insert(0,'date', date)
    print ("columns: " , df.columns)
    df = df.set_axis(numericalCols, axis=1, inplace=False)

    return df



def prepareDataFrame(df):
    nrows_=df.shape[0]
    regions = list(df)[1:]
    df_global = pd.DataFrame()
    for col in regions:
        print (col)
        aux = df[["date", col]].copy(deep=True)  # select column associated with region
        aux = aux.iloc[:nrows_, :].copy(deep=True)  # selects data up to 26301 row (2004 to 2007 time stamps)
        aux = aux.rename(columns={col: "y"})  # rename column of data to 'y' which is compatible with Neural Prophet
        aux["ID"] = col
        df_global = pd.concat((df_global, aux))


    df_global.rename({"date":"ds"},
                 inplace=True,
                axis=1)
    return df_global

def train_test_split(df_global, nhorizion=96, valid_p_=-1):
    print ("percentage test: ", valid_p_)
    m = NeuralProphet(n_lags=336)
    df_train, df_test = m.split_df(df_global, valid_p=valid_p_, local_split=True)
    print ("train, test shape: ",df_train.shape, df_test.shape)
    return (df_train, df_test)

def training(df_train, df_test, col=""):
    print ("initialising  neuralprophet model with n_lag=336, global trend and seasonal effect")
    m = NeuralProphet(trend_global_local="global", season_global_local="global", n_lags=336)
    metrics = m.fit(df_train, freq='auto')
    future = m.make_future_dataframe(df_test, n_historic_predictions=True)
    forecast = m.predict(future)
    forecast_=forecast[(forecast.ID=="OT")].dropna()
    mse_=MSE(forecast_.y.to_numpy(), forecast_.yhat1.to_numpy()), MAPE(forecast_.y.to_numpy(), forecast_.yhat1.to_numpy()), wMAPE(forecast_.y.to_numpy(), forecast_.yhat1.to_numpy())
    return (mse_,forecast_,forecast)

def forecast( dataset_ = "ETTh1.csv",nhorizion=96):
    df=readDataset(dataset_)
    
    numericalCols = list(df)[1:] 
    df = ScaleDataset(df[numericalCols],df.date, numericalCols)

    ####valid_p_ = getPercentagedataset(len(df),nhorizion)
    df = prepareDataFrame(df)
    df_train,df_test = train_test_split(df, valid_p_=nhorizion) # valid_p can be kept as integer as well, note that test dataset will also have the past look up dataset, i.e. 338 in this case, so the total dataset size would be past look up + forecast horizion ; 338 + 96 
    mse_ = training(df_train,df_test)
    return mse_




In [56]:
forecast(dataset_="dataset/ETTh1.csv",nhorizion=96)

shape of dataset:  (17420, 8)
numericalCols:  ['date', 'HUFL', 'HULL', 'MUFL', 'MULL', 'LUFL', 'LULL', 'OT']
columns:  Index(['date', 0, 1, 2, 3, 4, 5, 6], dtype='object')
HUFL
HULL
MUFL
MULL
LUFL
LULL
OT
percentage test:  96
train, test shape:  (121268, 3) (3024, 3)
initialising  neuralprophet model with n_lag=336, global trend and seasonal effect


TypeError: __init__() got an unexpected keyword argument 'trend_global_local'

In [13]:
df=readDataset("ETTh1.csv")
numericalCols = list(df)[1:] 
df = ScaleDataset(df[numericalCols],df.date, numericalCols)


shape of dataset:  (17420, 8)
['HUFL', 'HULL', 'MUFL', 'MULL', 'LUFL', 'LULL', 'OT', 'date']


In [58]:
import neuralprophet  as nph
nph.__version__


'0.4.1'

In [18]:
#!pip install scikit-learn

In [15]:
list(df)[0:-1]

['HUFL', 'HULL', 'MUFL', 'MULL', 'LUFL', 'LULL', 'OT']

In [16]:
df

Unnamed: 0,HUFL,HULL,MUFL,MULL,LUFL,LULL,OT,date
0,-0.219049,-0.114207,-0.395683,-0.231903,0.976355,0.805738,2.008513,2016-07-01 00:00:00
1,-0.238009,-0.081400,-0.411356,-0.251800,0.923970,0.857445,1.688203,2016-07-01 01:00:00
2,-0.313849,-0.245432,-0.442557,-0.291043,0.610524,0.602247,1.688203,2016-07-01 02:00:00
3,-0.323329,-0.147013,-0.442557,-0.271146,0.636286,0.703993,1.368010,2016-07-01 03:00:00
4,-0.285409,-0.147013,-0.411356,-0.231903,0.688671,0.703993,1.006610,2016-07-01 04:00:00
...,...,...,...,...,...,...,...,...
17415,-1.280380,0.640341,-1.452403,0.691136,0.348602,1.110975,-0.282568,2018-06-26 15:00:00
17416,-1.820597,1.001211,-1.967580,0.769622,0.400987,1.364505,-0.266225,2018-06-26 16:00:00
17417,-0.645506,0.771566,-0.749583,0.671791,0.558139,1.110975,-0.356458,2018-06-26 17:00:00
17418,0.264287,0.771566,0.171642,0.671791,0.505755,0.959190,-0.414007,2018-06-26 18:00:00


  df2 = df.set_axis(numericalCols, axis=1, inplace=False)



In [240]:
df2.HULL

0       -0.114207
1       -0.081400
2       -0.245432
3       -0.147013
4       -0.147013
           ...   
17415    0.640341
17416    1.001211
17417    0.771566
17418    0.771566
17419    0.640341
Name: HULL, Length: 17420, dtype: float64

In [19]:
mse_etth1 =  forecast(dataset_="dataset/ETTh1.csv",nhorizion=96)

shape of dataset:  (17420, 8)
['HUFL', 'HULL', 'MUFL', 'MULL', 'LUFL', 'LULL', 'OT', 'date']
HULL
MUFL
MULL
LUFL
LULL
OT
date


ValueError: Plan shapes are not aligned

In [127]:
mse_etth2 = forecast(dataset_="dataset/ETTh2.csv")

shape of dataset:  (17420, 8)
HUFL
HULL
MUFL
MULL
LUFL
LULL
OT
percentage test:  96
train, test shape:  (121268, 3) (3024, 3)
initialising  neuralprophet model with n_lag=336, global trend and seasonal effect


Finding best initial lr:   0%|          | 0/276 [00:00<?, ?it/s]

Training: 0it [00:00, ?it/s]

Predicting: 930it [00:00, ?it/s]

Predicting: 930it [00:00, ?it/s]

Predicting: 930it [00:00, ?it/s]

Predicting: 930it [00:00, ?it/s]

Predicting: 930it [00:00, ?it/s]

Predicting: 930it [00:00, ?it/s]

Predicting: 930it [00:00, ?it/s]

In [100]:
#mse_electricity = forecast(dataset_="dataset/electricity.csv")

In [128]:
mse_nillness = forecast(dataset_="dataset/national_illness.csv")

shape of dataset:  (966, 8)
% WEIGHTED ILI
%UNWEIGHTED ILI
AGE 0-4
AGE 5-24
ILITOTAL
NUM. OF PROVIDERS
OT
percentage test:  96
train, test shape:  (6090, 3) (3024, 3)
initialising  neuralprophet model with n_lag=336, global trend and seasonal effect


Finding best initial lr:   0%|          | 0/239 [00:00<?, ?it/s]

Training: 0it [00:00, ?it/s]

Predicting: 117it [00:00, ?it/s]

Predicting: 117it [00:00, ?it/s]

Predicting: 117it [00:00, ?it/s]

Predicting: 117it [00:00, ?it/s]

Predicting: 117it [00:00, ?it/s]

Predicting: 117it [00:00, ?it/s]

Predicting: 117it [00:00, ?it/s]

In [129]:
mse_nillness[0]

(7694508293.371501, 0.0541546324048271, 0.05158129635692672)

In [131]:
mse_nillness[2].dropna()

Unnamed: 0,ds,y,ID,yhat1,ar1,trend,season_yearly
336,2018-09-04,1.216970e+00,% WEIGHTED ILI,1.111358e+00,5.043338e-01,0.456522,0.150503
337,2018-09-11,1.112660e+00,% WEIGHTED ILI,1.107359e+00,5.561739e-01,0.456794,0.094391
338,2018-09-18,9.918880e-01,% WEIGHTED ILI,8.349651e-01,3.555984e-01,0.457067,0.022300
339,2018-09-25,9.464010e-01,% WEIGHTED ILI,6.290653e-01,2.063827e-01,0.457339,-0.034657
340,2018-10-02,8.746950e-01,% WEIGHTED ILI,8.906633e-01,4.838840e-01,0.457612,-0.050833
...,...,...,...,...,...,...,...
3025,2020-06-02,1.525058e+06,OT,1.490205e+06,1.452211e+06,94142.640625,-56148.046875
3026,2020-06-09,1.538038e+06,OT,1.403703e+06,1.364514e+06,94204.632812,-55015.355469
3027,2020-06-16,1.528103e+06,OT,1.442493e+06,1.388488e+06,94266.632812,-40261.847656
3028,2020-06-23,1.542813e+06,OT,1.408602e+06,1.332760e+06,94328.625000,-18486.371094


In [103]:
df=readDataset("dataset/traffic.csv")
df = prepareDataFrame(df)
#df_train,df_test = train_test_split(df)
#mse_ = training(df_train,df_test)
df

shape of dataset:  (17544, 863)
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
26

Unnamed: 0,ds,y,ID
0,2016-07-01 02:00:00,0.0048,0
1,2016-07-01 03:00:00,0.0072,0
2,2016-07-01 04:00:00,0.0040,0
3,2016-07-01 05:00:00,0.0039,0
4,2016-07-01 06:00:00,0.0042,0
...,...,...,...
17539,2018-07-01 21:00:00,0.0429,OT
17540,2018-07-01 22:00:00,0.0363,OT
17541,2018-07-01 23:00:00,0.0325,OT
17542,2018-07-02 00:00:00,0.0279,OT


In [None]:
df_train,df_test = train_test_split(df)


In [None]:
mse_ = training(df_train,df_test)
mse_

In [63]:
mse_traffic = forecast(dataset_="dataset/traffic.csv")

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
27

KeyboardInterrupt: 

In [None]:
#mse_weather = forecast(dataset_="dataset/weather.csv")

In [114]:
mse_exchange = forecast(dataset_="dataset/exchange_rate.csv")

shape of dataset:  (7588, 9)
N, n 7588 96
0
1
2
3
4
5
6
OT
percentage test:  0.012651555086979441
train, test shape:  (59976, 3) (3416, 3)
initialising  neuralprophet model with n_lag=336, global trend and seasonal effect


Finding best initial lr:   0%|          | 0/268 [00:00<?, ?it/s]

Training: 0it [00:00, ?it/s]

Predicting: 896it [00:00, ?it/s]

Predicting: 896it [00:00, ?it/s]

Predicting: 896it [00:00, ?it/s]

Predicting: 896it [00:00, ?it/s]

Predicting: 896it [00:00, ?it/s]

Predicting: 896it [00:00, ?it/s]

Predicting: 896it [00:00, ?it/s]

Predicting: 896it [00:00, ?it/s]

In [None]:
#mse_etth1[0], mse_etth2[0], mse_electricity[0], mse_nillness[0], mse_traffic[0], mse_weather[0], mse_exchange[0]

In [105]:
mse_etth1[0], mse_etth2[0], mse_exchange[0], mse_nillness[0]

((0.19139924184288307, 0.039036368771985276, 0.037714267825433787),
 (0.7489584372863051, 0.016698239520735465, 0.0172925895662325),
 2.6234128379888493e-05,
 (9424765047.663118, 0.0602314506398637, 0.053110932630107854))

In [111]:
mse_exchange

(2.6234128379888493e-05, 0.006606978528943875, 0.006604926697787127)

In [81]:
mse_etth1[1]

Unnamed: 0,ds,y,ID,yhat1,ar1,trend,season_weekly,season_daily
2436,2018-06-26 07:00:00,9.638,OT,9.662354,13.499414,-3.911371,-0.066544,0.140854
2437,2018-06-26 08:00:00,9.426,OT,9.763968,13.798329,-3.911364,-0.071024,-0.051975
2438,2018-06-26 09:00:00,9.075,OT,9.48516,13.651322,-3.911356,-0.075206,-0.1796
2439,2018-06-26 10:00:00,8.934,OT,9.380656,13.507432,-3.911348,-0.079056,-0.136371
2440,2018-06-26 11:00:00,9.215,OT,9.072696,13.13217,-3.91134,-0.082542,-0.065592
2441,2018-06-26 12:00:00,9.215,OT,9.172856,13.239254,-3.911333,-0.085634,-0.069431
2442,2018-06-26 13:00:00,9.426,OT,9.34729,13.395054,-3.911325,-0.088308,-0.04813
2443,2018-06-26 14:00:00,10.2,OT,9.776398,13.726209,-3.911317,-0.09054,0.052046
2444,2018-06-26 15:00:00,10.904,OT,10.521626,14.425433,-3.91131,-0.092312,0.099815
2445,2018-06-26 16:00:00,11.044,OT,10.942122,14.920029,-3.911302,-0.093609,0.027004


In [88]:
mse_etth1[1].shape

(13, 8)

104