### linear Regression models

### Use advertising data to build a model to predict sales based on  predictor variables

In [1]:
import pandas                       as     pd
import numpy                        as     np
import scipy.stats                  as     stats

import seaborn                      as     sns
import matplotlib.pyplot            as     plt
import matplotlib

matplotlib.rcParams.update({'font.size': 12})

import astropy.table                as     Table
import statsmodels.api              as     sm
import statsmodels.stats.api        as     sms
from   statsmodels.compat           import lzip

from   sklearn.cross_validation     import train_test_split
from   sklearn                      import model_selection

from   sklearn.linear_model         import LinearRegression
from   sklearn.linear_model         import Ridge
from   sklearn.linear_model         import Lasso
from   sklearn.tree                 import DecisionTreeRegressor

from   sklearn.neighbors            import KNeighborsRegressor
from   sklearn.svm                  import SVR

from   sklearn.model_selection      import GridSearchCV
from   sklearn.model_selection      import cross_val_score




from   sklearn.metrics              import mean_squared_error, mean_absolute_error
from   statsmodels.compat           import lzip
from   statsmodels.stats            import diagnostic as diag

from  statsmodels.stats.outliers_influence import variance_inflation_factor



In [2]:
def mean_absolute_percentage_error(y_true, y_pred): 
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

In [3]:
carseats_df       = pd.read_csv('D:/RRD/data/Carseats.csv', \
                                usecols = ['CompPrice', 'Income', 'Advertising', 'Population', 'Price',\
                                           'Age', 'Education','Sales'])
print(carseats_df.shape)
print(carseats_df.head().T)

(400, 8)
                 0       1       2      3       4
Sales          9.5   11.22   10.06    7.4    4.15
CompPrice    138.0  111.00  113.00  117.0  141.00
Income        73.0   48.00   35.00  100.0   64.00
Advertising   11.0   16.00   10.00    4.0    3.00
Population   276.0  260.00  269.00  466.0  340.00
Price        120.0   83.00   80.00   97.0  128.00
Age           42.0   65.00   59.00   55.0   38.00
Education     17.0   10.00   12.00   14.0   13.00


In [None]:
https://raw.githubusercontent.com/LearnDataSci/article-resources/master/Housing%20Price%20Index%20Regression

### Check for missing values

In [4]:
carseats_df .info() 

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 400 entries, 0 to 399
Data columns (total 8 columns):
Sales          400 non-null float64
CompPrice      400 non-null int64
Income         400 non-null int64
Advertising    400 non-null int64
Population     400 non-null int64
Price          400 non-null int64
Age            400 non-null int64
Education      400 non-null int64
dtypes: float64(1), int64(7)
memory usage: 25.1 KB


In [5]:
carseats_df.isnull().sum()

Sales          0
CompPrice      0
Income         0
Advertising    0
Population     0
Price          0
Age            0
Education      0
dtype: int64

__            = carseats_df.apply(lambda x: x.replace(to_replace = 0, value = np.NaN))
carseats_df_  =  pd.DataFrame(__.dropna())

In [7]:
print(type(carseats_df))

<class 'pandas.core.frame.DataFrame'>


In [8]:
carseats_df.shape

(400, 8)

### Identify and remove variables of near zero variance

In [9]:
carseats_df.var()

Sales              7.975626
CompPrice        235.147243
Income           783.218239
Advertising       44.227343
Population     21719.813935
Price            560.584436
Age              262.449618
Education          6.867168
dtype: float64

In [10]:
carseats_df.columns

Index(['Sales', 'CompPrice', 'Income', 'Advertising', 'Population', 'Price',
       'Age', 'Education'],
      dtype='object')

### Remove Education which is having 7.05 as the variance

## There are no missing values

## Split the data set into dependent and independent variables, X and y

In [12]:
X          =   carseats_df[['CompPrice', 'Income', 'Advertising', 'Population', 'Price','Age', 'Education']]
y          =   carseats_df['Sales']
print(X.shape)                            
print(y.shape) 

(400, 7)
(400,)


## Build the model using statsmodel using the entire data to check assumptions

In [13]:
import statsmodels.formula.api as smf
model         = smf.ols('Sales ~ CompPrice + Income + Advertising + Population + Price + Age + Education', data = carseats_df)
results       = model.fit() ## OLS(output, input)

In [14]:
results .summary()

0,1,2,3
Dep. Variable:,Sales,R-squared:,0.542
Model:,OLS,Adj. R-squared:,0.533
Method:,Least Squares,F-statistic:,66.18
Date:,"Sat, 01 Jun 2019",Prob (F-statistic):,1.41e-62
Time:,08:03:52,Log-Likelihood:,-826.32
No. Observations:,400,AIC:,1669.0
Df Residuals:,392,BIC:,1701.0
Df Model:,7,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,7.7077,1.118,6.896,0.000,5.510,9.905
CompPrice,0.0939,0.008,11.980,0.000,0.079,0.109
Income,0.0129,0.003,3.703,0.000,0.006,0.020
Advertising,0.1309,0.015,8.654,0.000,0.101,0.161
Population,-0.0001,0.001,-0.180,0.857,-0.001,0.001
Price,-0.0925,0.005,-18.314,0.000,-0.102,-0.083
Age,-0.0450,0.006,-7.485,0.000,-0.057,-0.033
Education,-0.0400,0.037,-1.077,0.282,-0.113,0.033

0,1,2,3
Omnibus:,8.263,Durbin-Watson:,1.969
Prob(Omnibus):,0.016,Jarque-Bera (JB):,7.705
Skew:,0.288,Prob(JB):,0.0212
Kurtosis:,2.639,Cond. No.,4050.0


In [22]:
prediction = model.predict(params = 0.05, exog=X) 
prediction

array([[6.9 , 3.65, 0.55, ..., 6.  , 2.1 , 0.85],
       [5.55, 2.4 , 0.8 , ..., 4.15, 3.25, 0.5 ],
       [5.65, 1.75, 0.5 , ..., 4.  , 2.95, 0.6 ],
       ...,
       [8.1 , 1.3 , 0.6 , ..., 7.95, 2.  , 0.9 ],
       [5.  , 3.95, 0.35, ..., 4.75, 2.5 , 0.6 ],
       [6.7 , 1.85, 0.  , ..., 6.  , 2.45, 0.8 ]])

### 1) No outliers

Firstly we try to get the studentized residuals using get_influence( ). 

In [None]:
X_.boxplot(rot = 90)
plt.show()

In [None]:
influence     = model.get_influence()  
resid_student = influence.resid_studentized_external

In [None]:
all(x > 3 for x in resid_student) 

In [None]:
resid = pd.concat([X_, pd.Series(resid_student,name = "Studentized Residuals")],axis = 1)
resid.head()

If the absolute value of studentized residuals is more than 3 then that observation is considered as an outlier and hence should be removed.

In [None]:
print(X_[np.absolute(resid['Studentized Residuals'] > 3)]) 

### There are no outliers

### 2) No multi-collinearity

https://www.listendata.com/2018/01/linear-regression-in-python.html

Multi-collinearity increases the estimate of standard error of regression coefficients which makes some variables statistically insignificant when they should be significant.

We can detect multi-collinearity by:
+ By plotting scatter plots between predictor variables to have a visual description of their relationship.
+ By calculating the correlation coefficients between the variables we learn the extent of multi-collinearity in the data.
+ By calculating the Variable Inflation Factor (VIF) for each variable. 
VIF measures how much the variance of an estimated regression coefficients increases if your predictors are correlated.  The higher the value of VIF for the regressor, the more it is highly correlated to other variables.

VIF for a predictor variable is given by $\frac{1}{1 - R^2}$.
Here we take one of the explanatory variables as the target variable and all others as independent variables. So we run a regression between one of those independent variables with remaining independent variables. 

####  Detecting and Removing Multicollinearity 

##### We use the statsmodels library to calculate VIF

In [None]:
def calculate_vif(x):
    thresh = 5.0
    output = pd.DataFrame()
    k = x.shape[1]
    vif = [variance_inflation_factor(x.values, j) for j in range(x.shape[1])]
    for i in range(1,k):
        print("Iteration no.")
        print(i)
        print(vif)
        a = np.argmax(vif)
        print("Max VIF is for variable no.:")
        print(a)
        if vif[a] <= thresh :
            break
        if i == 1 :          
            output = x.drop(x.columns[a], axis = 1)
            vif = [variance_inflation_factor(output.values, j) for j in range(output.shape[1])]
        elif i > 1 :
            output = output.drop(output.columns[a],axis = 1)
            vif = [variance_inflation_factor(output.values, j) for j in range(output.shape[1])]
    return(output)


In [None]:
X_pure = calculate_vif(X) 

In [None]:
X_pure.shape 

In [None]:
X_pure.head()

#### There is no multi-collinearity as their value is below 5

### 3) Constant variance

Checking heteroscedasticity Using Goldfeld Quandt we test for heteroscedasticity.
Null Hypothesis: Error terms are homoscedastic
Alternative Hypothesis: Error terms are heteroscedastic.

In [None]:
name = ['F statistic', 'p-value']
test = sms.het_goldfeldquandt(model.resid, model.model.exog)
lzip(name, test)

The p-value is 0.2993 hence we can say that the residuals have constant variance. 

### 4) No autocorrelation

#### Checking for autocorrelation To ensure the absence of autocorrelation we use Ljungbox test.

####  Null Hypothesis: Autocorrelation is absent.
#### Alternative Hypothesis: Autocorrelation is present.

In [None]:
diag.acorr_ljungbox(model.resid, lags = 1) 

Since p-value is 0.8539 thus we can accept the null hypothesis and can say that autocorrelation is absent.

### 5) Normality of the residuals

#### We use Jarque-Bera test  from scipy library to check the normality of residuals.

#### Null Hypothesis: The residuals are normally distributed.

####  Alternative Hypothesis: The residuals are not normally distributed.

In [None]:
jb_stat, jb_pval = stats.jarque_bera(model.resid)
print('Jarque-Bera test P value is %1.4f' % jb_pval)

In [None]:
fig     = plt.figure()
ax1     = fig.add_subplot(211)
prob    = stats.probplot(model.resid, dist = stats.norm, plot = ax1)
ax1.set_xlabel('')
ax1.set_title('Probplot against normal distribution')
plt.show()

In [None]:
sns.kdeplot(model.resid, shade=True);
plt.show()

## The residuals are normally distributed since the p-value (0.0503) is >  0.05

https://dataunderthehood.com/2018/01/15/box-cox-transformation-with-python/

https://www.listendata.com/2018/01/linear-regression-in-python.html

### 6) Linearity

The residual vs fitted values plot is used to check for constant variance and linearity, and to identify potential outliers in the data.

In [None]:
residuals  =  model.resid
# Plot the residuals after fitting a linear model
ax         = sns.residplot(y, residuals, lowess = True, color = "g")

ax.set(xlabel='Fitted Value', ylabel='Residuals', title = 'Residual Vs Fitted values PLOT \n')
plt.show()

The residual plot indicates that the model’s residuals are restricting to mean of zero to a great extent exhibiting linearity.

### Split the data into train and test datasets

* Use the train data to build a model.
* Use the test data to evaluate the model performance.
* Slit the data into 80:20 ratio to create train and test data
* Set a random seed to ensure repeatability of the results

In [None]:
x_train, x_test,  y_train, y_test = train_test_split(X_pure, y, test_size = 0.30, random_state = 123)

In [None]:
x_train.var()

In [None]:
df_names      = ['x_train shape', 'x_test shape', 'y_train shape', 'y_test shape']
shapes        = (x_train.shape, x_test.shape,  y_train.shape, y_test.shape)
types         = (type(x_train), type(x_test), type(y_train),type(y_test))
lzip(df_names,shapes, types)

In [None]:
x_train_            = sm.add_constant(x_train)
lm                  = sm.OLS(y_train, x_train_, hasconst = False).fit()
print(lm.summary())

The regression equation is given by:

Sales = 3.86577 + 0.09269 * TV + 0.03462 * Radio + 0.01597 * Newspaper -0.000466 * $TV^2$ + 0.0015105 * TV X Radio - 0.0002586 * TV X Newspaper - 9.39196 * $Radio^2$ - 0.0007483 X Radio X Newspaper + 0.0002933 * $Newspaper^2$  + 0.000000081533 * $TV^3$ - 0.0000016885 * $TV^2$ X Radio - 0.00000100052 * $TV^2$ X Newspaper - 0.0000023998 * $Radio^2$ X TV - 0.00000198309 * $TV$ * $Radio$ * $Newspaper$ - 0.000000332817 * $Newspaper^2$ * TV - 0.00000907886 * $Radio^3$ + 0.0000097386 * $Radio^2$ * Newspaper + 0.0000052069 * $Newspaper^2$ X Radio - 0.000003027 * $Newspaper^3$ 

### Model evaluation

We will evaluate our model using RMSE, MAPE and R2-score.

### Prediction Accuracy

Prediction error or residuals is the difference between the predicted target variable values and the actual target variable vaues.

Most popular measure to evaluate the model performance is Root Mean Square Error (RMSE) which is the arithmatic mean of the sum of the residuals.

The model with low RMSE is the best model among many other models.

In [None]:
# model evaluation for training set

y_train_predict       = lm.predict(x_train_)

rmse_train            = np.sqrt(mean_squared_error(y_train, y_train_predict))
mape_train            = mean_absolute_percentage_error(y_train, y_train_predict)

print("The model performance for training set")

print("--------------------------------------")

print('RMSE is {}'.format(rmse_train))

print('MAPE is {}'.format(mape_train))
print('R square is %1.3f' % lm.rsquared_adj)



In [None]:
print("\n")

print("The model performance for testing set")

print("--------------------------------------")

# model evaluation for testing set
x_test_                  =   sm.add_constant(x_test) 
y_test_predict           =   lm.predict(x_test_)
rmse_test                =   (np.sqrt(mean_squared_error(y_test, y_test_predict)))
mape_test                =   mean_absolute_percentage_error(y_test, y_test_predict)

print('RMSE is {}'.format(rmse_test))
print('MAPE is {}'.format(mape_test))

Let us see if ridge or lasso regression is better.

### Ridge Regression

For ridge regression, we introduce GridSearchCV, which allow us to automatically perform 5-fold cross-validation with a range of different regularization parameters in order to find the optimal value of alpha.
Ref: https://towardsdatascience.com/how-to-perform-lasso-and-ridge-regression-in-python-3b3b75541ad8

In [None]:
ridge    =  Ridge()

In [None]:
param_dict      =  {'alpha' : [1e-10, 1e-8, 1e-4, 1e-3, 1e-2, 1, 5, 20, 30]}
ridge_regressor =  GridSearchCV(ridge, param_dict, scoring = 'neg_mean_squared_error', cv = 5)
ridge_regressor.fit(x_train, y_train)

In [None]:
print(ridge_regressor.best_params_)
print(ridge_regressor.best_score_)

In [None]:
clf = Ridge(alpha = 30,  fit_intercept = False, random_state = 12345)
clf.fit(x_train, y_train)

## Get the coefficients of the Ridge model

In [None]:
coef_dict_ridge = {}
for coef, feat in zip(clf.coef_, x_train.columns):
    coef_dict_ridge[feat] = coef
print(coef_dict_ridge)

In [None]:
rmse_train_rr    = (np.sqrt(mean_squared_error(y_true = y_train, y_pred = clf.predict(x_train))))
mape_train_rr    = mean_absolute_percentage_error(y_train,y_pred = clf.predict(x_train))

print("\n")
print("The model performance for training set")
print("--------------------------------------")
print('RMSE    is {}'.format(rmse_train_rr))
print('MAPE    is {}'.format(mape_train_rr))
print('Rsquare is {}'.format(clf.score(x_train, y_train)))

In [None]:
rmse_test_rr    = (np.sqrt(mean_squared_error(y_true = y_test, y_pred = clf.predict(x_test))))
mape_test_rr    = mean_absolute_percentage_error(y_test, y_pred = clf.predict(x_test))

print("\n")
print("The model performance for testing set")
print("--------------------------------------")

print('RMSE is {}'.format(rmse_test_rr ))
print('MAPE is {}'.format(mape_test_rr ))

In [None]:
lasso                      =  Lasso()

In [None]:
param_dict       =  {'alpha' : [1e-15, 1e-10, 1e-8, 1e-4, 1e-3, 1e-2, 1, 5, 20]}
lasso_regressor  =  GridSearchCV(lasso, param_dict, scoring = 'neg_mean_squared_error', cv = 5)
lasso_regressor.fit(x_train, y_train)

In [None]:
print(lasso_regressor.best_params_)
print(lasso_regressor.best_score_)

In [None]:
clf1 = Lasso(alpha = 0.01)
clf1.fit(x_train, y_train)
print('Rsquare is {}'.format(clf1.score(x_train, y_train)))

### Get the coefficients of the Lasso model

In [None]:
coef_dict_lasso = {}
for coef, feat in zip(clf1.coef_, x_train.columns):
    coef_dict_lasso[feat] = coef
print(coef_dict_lasso)

In [None]:
rmse_train_lasso    = (np.sqrt(mean_squared_error(y_true = y_train, y_pred = clf1.predict(x_train))))
mape_train_lasso    = mean_absolute_percentage_error(y_train,y_pred = clf1.predict(x_train))

print("\n")
print("The model performance for training set")
print("--------------------------------------")
print('RMSE is {}'.format(rmse_train_lasso ))
print('MAPE is {}'.format(mape_train_lasso ))

In [None]:
rmse_test_lasso    = (np.sqrt(mean_squared_error(y_true = y_test, y_pred = clf1.predict(x_test))))
mape_test_lasso    = mean_absolute_percentage_error(y_test, y_pred = clf1.predict(x_test))

print("\n")
print("The model performance for testing set")
print("--------------------------------------")

print('RMSE is {}'.format(rmse_test_lasso ))
print('MAPE is {}'.format(mape_test_lasso ))

### Decision Trees (CART)

Decision trees or the Classification and Regression Trees (CART as they are known) use the training data to select the best points to split the data in order to minimize a cost metric. The default cost metric for regression decision trees is the mean squared error, specified in the criterion parameter.

You can create a CART model for regression using the DecisionTreeRegressor class.

For more details, refer https://machinelearningmastery.com/spot-check-regression-machine-learning-algorithms-python-scikit-learn/

In [None]:
X                      =   x_train
Y                      =   y_train
seed                   =   12345

## Create and fit the model

kfold                  = model_selection.KFold(n_splits = 10, random_state = seed)
model                  = DecisionTreeRegressor()
model.fit(x_train, y_train)  

In [None]:
### Predict from the model for training data

y_pred      =  model.predict(x_train)

print('Type of y_pred is %s' % type(y_pred))

print('\n Compare the difference between the actual and predicted values.')

df          =  pd.DataFrame({'Actual':y_train, 'Predicted':y_pred})  
print(df.head(5).T)

In [None]:
rmse_train_CART    = (np.sqrt(mean_squared_error(y_true = y_train, y_pred = model.predict(x_train))))
mape_train_CART    = mean_absolute_percentage_error(y_train,y_pred = model.predict(x_train))
score              = model.score(x_train, y_train)


print("\n")
print("The model performance for training set")
print("--------------------------------------")
print('RMSE is {}'.format(rmse_train_CART))
print('MAPE is {}'.format(mape_train_CART))
print('R Square is {}'.format(score))


In [None]:
coef_dict_CART = {}
for coef, feat in zip(model.coef_, x_train.columns):
    coef_dict_CART[feat] = coef
print(coef_dict_CART)

In [None]:
### Predict from the model for testing data

y_pred      =  model.predict(x_test)

print('Type of y_pred is %s' % type(y_pred))

print('\n Compare the difference between the actual and predicted values.')

df1          =  pd.DataFrame({'Actual':y_test, 'Predicted':y_pred})  
print(df1.head(5).T)

In [None]:
rmse_test_CART    = (np.sqrt(mean_squared_error(y_true = y_test, y_pred = model.predict(x_test))))
mape_test_CART    = mean_absolute_percentage_error(y_test, y_pred = model.predict(x_test))

print("\n")
print("The model performance for test set")
print("--------------------------------------")
print('RMSE is {}'.format(rmse_test_CART))
print('MAPE is {}'.format(mape_test_CART))

http://www.science.smith.edu/~jcrouser/SDS293/labs/lab10-py.html
https://theprofessionalspoint.blogspot.com/2019/02/implement-decision-tree-algorithm-in_22.html

### K-Nearest Neighbors (or KNN)

K-Nearest Neighbors (or KNN) locates the K most similar instances in the training dataset for a new data instance. From the K neighbors, a mean or median output variable is taken as the prediction. Of note is the distance metric used (the metric argument). The Minkowski distance is used by default, which is a generalization of both the Euclidean distance (used when all inputs have the same scale) and Manhattan distance (for when the scales of the input variables differ).
You can construct a KNN model for regression using the KNeighborsRegressor class.

In [None]:
seed                   =  12345
kfold                  =  model_selection.KFold(n_splits = 10, random_state = seed)
model                  =  KNeighborsRegressor()
scoring                =  'neg_mean_squared_error'
model.fit(x_train, y_train)  

In [None]:
### Predict from the model for training data

y_pred      =  model.predict(x_train)

print('Type of y_pred is %s' % type(y_pred))

print('\n Compare the difference between the actual and predicted values.')

df          =  pd.DataFrame({'Actual':y_train, 'Predicted':y_pred})  
print(df.head(5).T)

In [None]:
rmse_train_KNN     = (np.sqrt(mean_squared_error(y_true = y_train, y_pred = model.predict(x_train))))
mape_train_KNN     = mean_absolute_percentage_error(y_train,y_pred = model.predict(x_train))
score              = model.score(x_train, y_train)


print("\n")
print("The model performance for training set")
print("--------------------------------------")
print('RMSE is {}'.format(rmse_train_KNN))
print('MAPE is {}'.format(mape_train_KNN))
print('R Square is {}'.format(score))

In [None]:
### Predict from the model for testing data

y_pred      =  model.predict(x_test)

print('Type of y_pred is %s' % type(y_pred))

print('\n Compare the difference between the actual and predicted values.')

df1          =  pd.DataFrame({'Actual':y_test, 'Predicted':y_pred})  
print(df1.head(5).T)

In [None]:
rmse_test_KNN    = (np.sqrt(mean_squared_error(y_true = y_test, y_pred = model.predict(x_test))))
mape_test_KNN    = mean_absolute_percentage_error(y_test, y_pred = model.predict(x_test))

print("\n")
print("The model performance for test set")
print("--------------------------------------")
print('RMSE is {}'.format(rmse_test_KNN ))
print('MAPE is {}'.format(mape_test_KNN))

### Support Vector Machines - Regression

Support Vector Machines (SVM) were developed for binary classification. The technique has been extended for the prediction real-valued problems called Support Vector Regression (SVR). Like the classification example, SVR is built upon the LIBSVM library.
You can create an SVM model for regression using the SVR class.

In [None]:
seed                =  12345
kfold               =  model_selection.KFold(n_splits=10, random_state=seed)
model               =  SVR()
scoring             =  'neg_mean_squared_error'

model_selection.cross_val_score(model, X, Y, cv = kfold, scoring = scoring)
model.fit(x_train, y_train)  

In [None]:
### Predict from the model for training data

y_pred      =  model.predict(x_train)

print('Type of y_pred is %s' % type(y_pred))

print('\n Compare the difference between the actual and predicted values.')

df          =  pd.DataFrame({'Actual':y_train, 'Predicted':y_pred})  
print(df.head(5).T)

In [None]:
rmse_train_SVR     = (np.sqrt(mean_squared_error(y_true = y_train, y_pred = model.predict(x_train))))
mape_train_SVR     = mean_absolute_percentage_error(y_train,y_pred = model.predict(x_train))
score              = model.score(x_train, y_train)

print("\n")
print("The model performance for training set")
print("--------------------------------------")
print('RMSE is {}'.format(rmse_train_SVR ))
print('MAPE is {}'.format(mape_train_SVR))
print('R Square is {}'.format(score))

In [None]:
### Predict from the model for testing data

y_pred      =  model.predict(x_test)

print('Type of y_pred is %s' % type(y_pred))

print('\n Compare the difference between the actual and predicted values.')

df1          =  pd.DataFrame({'Actual':y_test, 'Predicted':y_pred})  
print(df1.head(5).T)

In [None]:
rmse_test_SVR    = (np.sqrt(mean_squared_error(y_true = y_test, y_pred = model.predict(x_test))))
mape_test_SVR    = mean_absolute_percentage_error(y_test, y_pred = model.predict(x_test))

print("\n")
print("The model performance for test set")
print("--------------------------------------")
print('RMSE is {}'.format(rmse_test_SVR ))
print('MAPE is {}'.format(mape_test_SVR))