<a href="https://colab.research.google.com/github/shohan007/DIS-soft-sensing-benchmarking-with-SVR-ANN/blob/main/SVM_and_ANN_one_response.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [58]:
from pandas import DataFrame
from pandas import concat
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.svm import SVR
from sklearn import preprocessing

#One-Step Univariate Forecasting function
 
def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
	"""
	Frame a time series as a supervised learning dataset.
	Arguments:
		data: Sequence of observations as a list or NumPy array.
		n_in: Number of lag observations as input (X).
		n_out: Number of observations as output (y).
		dropnan: Boolean whether or not to drop rows with NaN values.
	Returns:
		Pandas DataFrame of series framed for supervised learning.
	"""
	n_vars = 1 if type(data) is list else data.shape[1]
	df = DataFrame(data)
	cols, names = list(), list()
	# input sequence (t-n, ... t-1)
	for i in range(n_in, 0, -1):
		cols.append(df.shift(i))
		names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
	# forecast sequence (t, t+1, ... t+n)
	for i in range(0, n_out):
		cols.append(df.shift(-i))
		if i == 0:
			names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
		else:
			names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]
	# put it all together
	agg = concat(cols, axis=1)
	agg.columns = names
	# drop rows with NaN values
	if dropnan:
		agg.dropna(inplace=True)
	return agg

In [106]:
#Data preprocessing
dataset=pd.read_excel('/content/vetch for svm_5data.xlsx')
values=dataset.iloc[:,16]
values=pd.DataFrame(values)
#values.head(10)

# data splitting (last 10 row (2 days) test data)

data = series_to_supervised(values,5)   # changing the numeric value changes the time lag difference (2-5 should be allowable as we are using maximum of 5 data from each day)
#print(data)
#scale= preprocessing.MinMaxScaler()
#data[data.columns] = scale.fit_transform(data)
#data

X=data.iloc[:,1:-1]  
y=pd.DataFrame(data.iloc[:,-1]) #Last row as Y value

#Training Test split

X_train=X.iloc[1:-10,:].values
y_train=y.iloc[1:-10,:].values             #last 10 row test and prediction data

X_test=X.iloc[X_train.shape[0]:,:].values
y_test=y.iloc[y_train.shape[0]:,:].values  

y_train_df=y.iloc[1:-10,:] #making dataframe for using in graph
y_test_df=y.iloc[y_train.shape[0]:,:] #making dataframe for using in graph


#data.head()

# Create the SVR regressor
regressor = SVR(C=1, epsilon=.1, kernel = 'linear')
sor = regressor.fit(X_train, y_train) #single output regression

# Generate predictions for testing data
y_pred = sor.predict(X_test)

#prediction of test sata
predictions_y=pd.DataFrame(y_pred)
predictions_y.set_index(y_test_df.index, inplace=True)
predictions_y.rename(columns={0: 'DeltaP'}, inplace=True)
#y_test_df.head()  

#predictions_y
predictions_y.head()

#predictions of training data
predictions_x=pd.DataFrame(sor.predict(X_train))
#predictions_x.head()

# Calculate the root mean square error (RMSE), normalized root mean square error (nRMSE) for test and training data
N = len(y_test)
rmse_test = np.sqrt(np.sum((np.array(y_test).flatten() - np.array(predictions_y).flatten())**2)/N)
nRMSE_test=rmse_test/(y_test_df.max().values-y_test_df.min().values)
print("nRMSE TEST: ", nRMSE_test)    

N = len(y_train)
rmse_train = np.sqrt(np.sum((np.array(y_train).flatten() - np.array(predictions_x).flatten())**2)/N)
nRMSE_train=rmse_test/(y_train_df.max().values-y_train_df.min().values)
print("nRMSE train: ", nRMSE_train) 

print("RMSE TEST: ", rmse_test) 
print("RMSE train: ", rmse_train)

nRMSE TEST:  [0.25657078]
nRMSE train:  [0.14562569]
RMSE TEST:  0.2273217090096336
RMSE train:  0.30514969727556746


  y = column_or_1d(y, warn=True)


In [109]:
#With cross validation

from numpy import absolute
from numpy import mean
from numpy import std
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RepeatedKFold

# define the evaluation procedure
cv = RepeatedKFold(n_splits=5, n_repeats=3, random_state=1)

# evaluate the model and collect the scores
n_scores = cross_val_score(sor, X, y, scoring='neg_mean_absolute_error', cv=cv, n_jobs=-1)

# force the scores to be positive
n_scores = absolute(n_scores)
# summarize performance
print('MAE: %.3f (%.3f)' % (mean(n_scores), std(n_scores)))

MAE: 0.231 (0.041)


##ANN

In [112]:
from numpy import asarray
from sklearn.datasets import make_regression
from keras.models import Sequential
from keras.layers import Dense

# data splitting (last 20 row test data)

data = series_to_supervised(values,5)
#print(data)
#scale= preprocessing.MinMaxScaler()
#data[data.columns] = scale.fit_transform(data)
#data

X=data.iloc[:,1:-1]  
y=pd.DataFrame(data.iloc[:,-1]) #Last 10 column Y value

#Training Test split

X_train=X.iloc[1:-10,:].values
X_test=X.iloc[X_train.shape[0]:,:].values

y_train=y.iloc[1:-10,:].values      #last 10 row test and prediction data
y_test=y.iloc[y_train.shape[0]:,:].values

y_train_df=y.iloc[1:-10,:]
y_test_df=y.iloc[y_train.shape[0]:,:]

data.head()

Unnamed: 0,var1(t-5),var1(t-4),var1(t-3),var1(t-2),var1(t-1),var1(t)
5,1.798,1.842,1.681,1.659,1.597,2.684
6,1.842,1.681,1.659,1.597,2.684,2.764
7,1.681,1.659,1.597,2.684,2.764,2.461
8,1.659,1.597,2.684,2.764,2.461,2.669
9,1.597,2.684,2.764,2.461,2.669,2.486


In [113]:
# use mlp for prediction on multi-output regression

# get the model
def get_model(n_inputs, n_outputs):
	model = Sequential()
	model.add(Dense(8, input_dim=n_inputs, kernel_initializer='he_uniform', activation='relu'))
	model.add(Dense(12, kernel_initializer='he_uniform', activation='relu')) 
	model.add(Dense(n_outputs, kernel_initializer='he_uniform'))
	model.compile(loss='mae', optimizer='adam')
	return model
 
# load dataset
n_inputs, n_outputs = X.shape[1], y.shape[1]

# get model
model = get_model(n_inputs, n_outputs)

# fit the model on all data
model.fit(X_train, y_train, verbose=0, epochs=5000)

# make a prediction for new data
yhat = model.predict(X_test)
#print('Predicted: %s' % yhat[0])

#prediction of test data
predictions_y=pd.DataFrame(yhat)
predictions_y.set_index(y_test_df.index, inplace=True)
predictions_y.rename(columns={0: 'DeltaP'}, inplace=True)
#predictions_y
#y_test_df.head()  

#predictions of training data
predictions_x=pd.DataFrame(model.predict(X_train))

# Calculate the root mean square error (RMSE) for test and training data
N = len(y_test)
rmse_test = np.sqrt(np.sum((np.array(y_test).flatten() - np.array(predictions_y).flatten())**2)/N)
nRMSE_test=rmse_test/(y_test_df.max().values-y_test_df.min().values)
print("nRMSE TEST: ", nRMSE_test)    

N = len(y_train)
rmse_train = np.sqrt(np.sum((np.array(y_train).flatten() - np.array(predictions_x).flatten())**2)/N)
nRMSE_train=rmse_train/(y_train_df.max().values-y_train_df.min().values)
print("nRMSE train: ", nRMSE_train)

print("RMSE TEST: ", rmse_test) 
print("RMSE train: ", rmse_train)


nRMSE TEST:  [0.23661637]
nRMSE train:  [0.18292488]
RMSE TEST:  0.2096421029778815
RMSE train:  0.2855457327950463


In [114]:
# Calculate the root mean square error (RMSE) for test and training data

#prediction of test sata
predictions_y=pd.DataFrame(yhat)
predictions_y.set_index(y_test_df.index, inplace=True)
predictions_y.rename(columns={0: 'DeltaP'}, inplace=True)
#predictions_y
#y_test_df.head()  

#predictions of training data
predictions_x=pd.DataFrame(model.predict(X_train))

# Calculate the root mean square error (RMSE) for test and training data
N = len(y_test)
rmse_test = np.sqrt(np.sum((np.array(y_test).flatten() - np.array(predictions_y).flatten())**2)/N)
nRMSE_test=rmse_test/(y_test_df.max().values-y_test_df.min().values)
print("nRMSE TEST: ", nRMSE_test)    

N = len(y_train)
rmse_train = np.sqrt(np.sum((np.array(y_train).flatten() - np.array(predictions_x).flatten())**2)/N)
nRMSE_train=rmse_train/(y_train_df.max().values-y_train_df.min().values)
print("nRMSE train: ", nRMSE_train)

print("RMSE TEST: ", rmse_test) 
print("RMSE train: ", rmse_train)

nRMSE TEST:  [0.23661637]
nRMSE train:  [0.18292488]
RMSE TEST:  0.2096421029778815
RMSE train:  0.2855457327950463
