In [None]:
# Import required packages
import numpy as np
import pandas as pd
from sklearn.neural_network import MLPRegressor
from sklearn.linear_model import LinearRegression
import os
import warnings
warnings.filterwarnings("ignore")
from openpyxl.utils import datetime

In [5]:
# split a univariate time series into patterns
def get_Patterns(TSeries, n_inputs,h):
    X,z = pd.DataFrame(np.zeros((len(TSeries)-n_inputs-h+1,n_inputs))), pd.DataFrame()
    y_list = [] 
    for i in range(len(TSeries)):
        # find the end of this pattern
        end_ix = i + n_inputs + h - 1
        # check if we are beyond the time series
        if end_ix > len(TSeries)-1:
            break
        # gather input and output parts of the pattern
        for j in range(n_inputs):
            X.loc[i,j]=TSeries.iloc[i+j,0]
        i=i+n_inputs
        # y=pd.concat([TSeries.iloc[end_ix],y])
        y_list.append(TSeries.iloc[end_ix])
        y = pd.concat(y_list, axis=1).T.reset_index(drop=True)
    return X,y

In [6]:
# originalData should be a Column Vectored DataFrame
def minmaxNorm(originalData, lenTrainValidation):
    # Maximum Value
    max2norm=max(originalData.iloc[0:lenTrainValidation,0])
    # Minimum Value
    min2norm=min(originalData.iloc[0:lenTrainValidation,0])
    lenOriginal=len(originalData)
    normalizedData=np.zeros(lenOriginal)   
    normalizedData = []
    #Normalize using Min-Max Normalization
    for i in range (lenOriginal):
        normalizedData.append((originalData.iloc[i]-min2norm)/(max2norm-min2norm))    
    return pd.DataFrame(normalizedData)

In [7]:
# originalData and forecastedData should be Column Vectored DataFrames
def minmaxDeNorm( originalData, forecastedData, lenTrainValidation):
    # Maximum Value
    max2norm=max(originalData.iloc[0:lenTrainValidation,0])
    # Minimum Value
    min2norm=min(originalData.iloc[0:lenTrainValidation,0])
    lenOriginal=len(originalData)
    denormalizedData=[]   
    #De-Normalize using Min-Max Normalization
    for i in range (lenOriginal):
        denormalizedData.append((forecastedData.iloc[i]*(max2norm-min2norm))+min2norm)  
    return pd.DataFrame(denormalizedData)

In [8]:
# Timeseries_Data and forecasted_value should be Column Vectored DataFrames
def findRMSE( Timeseries_Data, forecasted_value,lenTrainValidation):
    forGraph = {
        'Actual': [],
        'Predicted': [],
        'Error': []
    }

    l=Timeseries_Data.shape[0]
    lenTest=l-lenTrainValidation
    # RMSE on Train & Validation Set
    trainRMSE=0;
    for i in range (lenTrainValidation):
        forGraph['Actual'].append(Timeseries_Data.iloc[i,0])
        forGraph['Predicted'].append(forecasted_value.iloc[i,0])
        forGraph['Error'].append(forecasted_value.iloc[i,0]-Timeseries_Data.iloc[i,0])
        trainRMSE=trainRMSE+np.power((forecasted_value.iloc[i,0]-Timeseries_Data.iloc[i,0]),2)
    trainRMSE=np.sqrt(trainRMSE/lenTrainValidation)
    # RMSE on Test Set
    testRMSE=0;
    for i in range (lenTrainValidation,l,1):
        testRMSE=testRMSE+np.power((forecasted_value.iloc[i,0]-Timeseries_Data.iloc[i,0]),2)
    testRMSE=np.sqrt(testRMSE/lenTest)
    return trainRMSE, testRMSE, forGraph

In [9]:
# Timeseries_Data and forecasted_value should be Column Vectored DataFrames
def findMAE(Timeseries_Data, forecasted_value,lenTrainValidation):
    l=Timeseries_Data.shape[0]
    lenTest=l-lenTrainValidation
    # MAE on Train & Validation Set
    trainMAE=0;
    for i in range (lenTrainValidation):
        trainMAE=trainMAE+np.abs(forecasted_value.iloc[i,0]-Timeseries_Data.iloc[i,0]) 
    trainMAE=(trainMAE/(lenTrainValidation));
    # MAE on Test Set
    testMAE=0;
    for i in range (lenTrainValidation,l,1):
        testMAE=testMAE+np.abs(forecasted_value.iloc[i,0]-Timeseries_Data.iloc[i,0])
    testMAE=(testMAE/lenTest);
    return trainMAE, testMAE

In [10]:
def Find_Fitness(x,y,lenValid,lenTest,model):
    NOP=y.shape[0]
    lenTrain=NOP-lenValid-lenTest
    xTrain=x.iloc[0:lenTrain,:]
    xValid=x.iloc[lenTrain:(lenTrain+lenValid),:]
    xTest=x.iloc[(lenTrain+lenValid):NOP,:]
    yTrain=y.iloc[0:lenTrain,0]
    yValid=y.iloc[lenTrain:(lenTrain+lenValid),0]
    yTest=y.iloc[(lenTrain+lenValid):NOP,0]
    model.fit(xTrain, yTrain)
    yhatNorm=model.predict(x).flatten().reshape(x.shape[0],1)
    return pd.DataFrame(yhatNorm)

In [12]:
#Read the Time Series Dataset
forGraph = {}
Timeseries_Data=pd.read_csv('AQI.csv',header=None)
LagLength=24
h=1
lt=Timeseries_Data.shape[0]
lenTrain=int(round(lt*0.7))
lenValidation=int(round(lt*0.15))
lenTest=int(lt-lenTrain-lenValidation)
# NORMALIZE THE DATA
normalizedData=minmaxNorm(Timeseries_Data,lenTrain+lenValidation);
# Transform the Time Series into Patterns Using Sliding Window
X, y = get_Patterns(normalizedData, LagLength, h)
model=MLPRegressor(hidden_layer_sizes=(100))
name='MLP'
file1='./'+str(name)+"_Accuracy.xlsx"
file2='./'+str(name)+"_Forecasts.xlsx"
Forecasts=pd.DataFrame()
Accuracy=pd.DataFrame()
ynorm1=Find_Fitness(X,y,lenValidation,lenTest,model)
ynorm=pd.DataFrame(normalizedData.iloc[0:(LagLength+h-1),0])
# ynorm=ynorm.append(ynorm1,ignore_index = True)
ynorm = pd.concat([ynorm,ynorm1])
yhat=minmaxDeNorm(Timeseries_Data, ynorm, lenTrain+lenValidation)
Accuracy.loc[1,0],Accuracy.loc[1,1],forGraph=findRMSE( Timeseries_Data,yhat,lenTrain+lenValidation)
Accuracy.loc[1,2],Accuracy.loc[1,3]=findMAE( Timeseries_Data,yhat,lenTrain+lenValidation)
Accuracy.to_excel(file1,sheet_name='Accuracy',index=False)
yhat.to_excel(file2,sheet_name='Forecasts',index=False)
print(Accuracy)

           0          1          2         3
1  22.675426  12.730907  12.254384  8.640218


In [69]:
!pip install openpyxl

Collecting openpyxl
  Downloading openpyxl-3.1.2-py2.py3-none-any.whl (249 kB)
     ---------------------------------------- 0.0/250.0 kB ? eta -:--:--
     - -------------------------------------- 10.2/250.0 kB ? eta -:--:--
     - -------------------------------------- 10.2/250.0 kB ? eta -:--:--
     ---- -------------------------------- 30.7/250.0 kB 217.9 kB/s eta 0:00:02
     ---- -------------------------------- 30.7/250.0 kB 217.9 kB/s eta 0:00:02
     ------------- ----------------------- 92.2/250.0 kB 403.5 kB/s eta 0:00:01
     ------------- ----------------------- 92.2/250.0 kB 403.5 kB/s eta 0:00:01
     ----------------------- ------------ 163.8/250.0 kB 490.7 kB/s eta 0:00:01
     ------------------------- ---------- 174.1/250.0 kB 499.5 kB/s eta 0:00:01
     ---------------------------- ------- 194.6/250.0 kB 471.4 kB/s eta 0:00:01
     ---------------------------- ------- 194.6/250.0 kB 471.4 kB/s eta 0:00:01
     ------------------------------------ 250.0/250.0 kB 494

In [44]:
import pickle 
pickle.dump(model,open('tseriesmodel.pkl','wb'))

In [82]:
import random

In [32]:
j = []
for i in range(0,24):
      j.append(random.randrange(9))
      
j = [j]
     
print(model.predict(j)[0])

NameError: name 'random' is not defined

In [18]:
%store LagLength

Stored 'LagLength' (int)


In [19]:
%store 

Stored variables and their in-db values:
LagLength             -> 22


In [45]:
import json
lagvar = json.dumps(LagLength)
with open("LagLength.json", "w") as f:
    f.write(lagvar)

In [46]:
import pandas as pd
tet = {'':[33,55,77,99,11,45,345,23423,2323,24234,23465,6786,789,67867,5658,56856,6796,68568,3346,336,33456,5459,7474,52625]}
test = pd.DataFrame(tet)
test.to_csv('tester.csv',index=False,encoding='UTF-8')

In [13]:
forGraph

{'Actual': [653,
  645,
  532,
  561,
  567,
  506,
  488,
  435,
  430,
  398,
  413,
  407,
  353,
  330,
  387,
  456,
  450,
  446,
  475,
  480,
  484,
  488,
  494,
  501,
  548,
  554,
  558,
  562,
  550,
  540,
  529,
  513,
  449,
  435,
  422,
  409,
  403,
  404,
  402,
  402,
  400,
  398,
  393,
  387,
  382,
  376,
  370,
  364,
  309,
  271,
  208,
  142,
  103,
  101,
  101,
  101,
  101,
  101,
  98,
  90,
  92,
  95,
  96,
  97,
  99,
  107,
  122,
  135,
  149,
  162,
  181,
  205,
  233,
  256,
  277,
  293,
  324,
  325,
  326,
  354,
  354,
  354,
  355,
  356,
  356,
  357,
  358,
  339,
  316,
  316,
  318,
  319,
  322,
  324,
  325,
  324,
  321,
  320,
  320,
  320,
  320,
  322,
  322,
  324,
  324,
  326,
  328,
  330,
  331,
  332,
  332,
  332,
  332,
  330,
  328,
  326,
  324,
  322,
  320,
  320,
  320,
  320,
  320,
  320,
  319,
  320,
  320,
  320,
  320,
  320,
  319,
  319,
  319,
  314,
  314,
  313,
  312,
  315,
  316,
  318,
  319,
  320,
  3

In [14]:
toGraph = pd.DataFrame(forGraph)
toGraph

Unnamed: 0,Actual,Predicted,Error
0,653,653.000000,0.000000
1,645,645.000000,0.000000
2,532,532.000000,0.000000
3,561,561.000000,0.000000
4,567,567.000000,0.000000
...,...,...,...
420,367,366.813200,-0.186800
421,366,367.068672,1.068672
422,368,366.561451,-1.438549
423,368,365.499083,-2.500917


In [15]:
toGraph.to_csv('ConfidenceInt.csv',index=False,encoding='UTF-8')