## **<실습예제 1> Abalone Dataset을 활용하여 회귀분석을 수행하자.**

## **해당 데이터를 불러와 회귀분석을 수행하고, 각 변수의 회귀계수를 산출하여 보자.**
**Abalone(전복 나이 예측) Dataset** : https://archive.ics.uci.edu/ml/datasets/abalone



The age of abalone is determined by cutting the shell through the cone,  
staining it, and counting the number of rings through a microscope -- a boring and time-consuming task.  
Other measurements, which are easier to obtain, are used to predict the age. Further information,  
such as weather patterns and location (hence food availability) may be required to solve the problem.  

	Name		Data Type	Meas.	Description
	----		---------	-----	-----------
	Sex		nominal			M, F, and I (infant)
	Length		continuous	mm	Longest shell measurement
	Diameter	continuous	mm	perpendicular to length
	Height		continuous	mm	with meat in shell
	Whole weight	continuous	grams	whole abalone
	Shucked weight	continuous	grams	weight of meat
	Viscera weight	continuous	grams	gut weight (after bleeding)
	Shell weight	continuous	grams	after being dried
	Rings		integer			+1.5 gives the age in years


In [2]:
import os
import pandas as pd 
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn import metrics
import scipy.stats as stats
import itertools
import time
import matplotlib.pyplot as plt
import statsmodels.api as sm
import seaborn as sns
import pylab

## **데이터불러오기**

In [3]:
Data = pd.read_csv('./data/regression_abalone.csv')

In [4]:
##### EDA
# Data format
print("number of observation in data : {}".format(Data.shape[0]))
print("number of Variables in Data  : {}".format(Data.shape[1]))

number of observation in data : 4177
number of Variables in Data  : 8


## **학습데이터와 검증데이터 나누기** (8:2)

In [6]:
Data.head()

Unnamed: 0,Length,Diameter,Height,Whole_weight,Shucked_weight,Viscera_weight,Shell_weight,Y
0,0.37,0.27,0.09,0.1855,0.07,0.0425,0.065,7
1,0.58,0.475,0.155,0.974,0.4305,0.23,0.285,10
2,0.605,0.465,0.165,1.056,0.4215,0.2475,0.34,13
3,0.215,0.15,0.03,0.0385,0.0115,0.005,0.01,5
4,0.38,0.29,0.105,0.257,0.099,0.051,0.085,10


In [9]:
X = Data.drop(labels='Y', axis=1)   # Independent Variables
Y = Data['Y']   # Target

In [10]:
X_train, X_test, y_train, y_test = train_test_split(X,Y , test_size=0.2)
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

(3341, 7) (836, 7) (3341,) (836,)


## **StatModels 라이브러리를 활용하여 선형 회귀 분석 모형을 적합**

In [12]:
# Training the Multivariate Linear Regression Model
lm = sm.OLS(y_train, X_train)

In [13]:
lm_trained = lm.fit()

### **적합된 모델의 각 회귀 계수를 알아보자**

In [17]:
################## Multiple Linear Regression without Logarithm
lm_trained.summary()

# or
lm_trained.params

Length             6.669515
Diameter          13.152903
Height            12.852340
Whole_weight       7.929765
Shucked_weight   -19.874822
Viscera_weight   -11.910712
Shell_weight       8.187825
dtype: float64

### **Training 데이터와 Test 데이터에 각각 MSE를 계산하여 모델의 정확도를 확인해보자.**

In [24]:
# prediction y_train
y_pred_train = lm_trained.predict(X_train)


# prediction y_test
y_pred_test = lm_trained.predict(X_test)


In [25]:
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error

In [26]:
# MSE: Mean of Squared error
mse_train = mean_squared_error(y_train, y_pred_train)
mse_test = mean_squared_error(y_test, y_pred_test)
print("mse_train: %.2f, mse_test: %.2f"
      %(mse_train,mse_test))

mse_train: 5.18, mse_test: 4.57


## **<실습예제 2> mortgate Dataset을 활용하여 회귀분석을 수행하자.**

## **해당 데이터를 불러와 회귀분석을 수행하고, 변수선택법을 통해서 주요한 변수만의 회귀모델을 수행해보자.**
**Mortgage Dataset** : https://www.fhfa.gov/DataTools/Downloads/Pages/Public-Use-Databases.aspx


## **데이터불러오기**

In [27]:
Data = pd.read_csv('./data/mortgage.csv')

In [28]:
##### EDA
# Data format
print("number of observation in data : {}".format(Data.shape[0]))
print("number of Variables in Data  : {}".format(Data.shape[1]))

number of observation in data : 1049
number of Variables in Data  : 16


In [30]:
Data.head()

Unnamed: 0,OneMonthCDRate,OneY.CMaturityRate,ThreeM.Rate.AuctionAverage,ThreeM.Rate.SecondaryMarket,ThreeY.CMaturityRate,FiveY.CMaturityRate,BankCredit,Currency,DemandDeposits,FederalFunds,MoneyStock,CheckableDeposits,LoansLeases,SavingsDeposits,TradeCurrencies,ThirtyY.CMortgageRate
0,8.72,90.729,9.69,7.62,7.6,7.72,7.69,2605.8,223.4,279.6,8.52,794.4,564.8,2020.2,894.7,7.66
1,13.85,109.392,17.19,12.06,12.47,13.94,13.82,1347.4,124.4,230.8,14.35,443.0,314.8,1033.8,343.9,13.73
2,6.59,87.979,9.94,5.74,5.67,7.42,7.73,2280.2,198.9,287.2,6.77,755.9,551.0,1743.9,936.5,6.65
3,17.43,96.064,15.07,15.2,15.01,13.13,12.89,1237.4,115.5,241.9,18.12,410.3,291.3,933.1,377.5,14.24
4,3.16,85.121,8.21,2.97,2.94,4.93,5.85,2937.9,288.7,336.4,2.96,1012.5,715.6,2110.2,1179.5,3.5


## **학습데이터와 검증데이터 나누기** (8:2)

In [62]:
# bias 추가
Data = sm.add_constant(Data, has_constant='add')
Data.head()

Unnamed: 0,const,OneMonthCDRate,OneY.CMaturityRate,ThreeM.Rate.AuctionAverage,ThreeM.Rate.SecondaryMarket,ThreeY.CMaturityRate,FiveY.CMaturityRate,BankCredit,Currency,DemandDeposits,FederalFunds,MoneyStock,CheckableDeposits,LoansLeases,SavingsDeposits,TradeCurrencies,ThirtyY.CMortgageRate
0,1.0,8.72,90.729,9.69,7.62,7.6,7.72,7.69,2605.8,223.4,279.6,8.52,794.4,564.8,2020.2,894.7,7.66
1,1.0,13.85,109.392,17.19,12.06,12.47,13.94,13.82,1347.4,124.4,230.8,14.35,443.0,314.8,1033.8,343.9,13.73
2,1.0,6.59,87.979,9.94,5.74,5.67,7.42,7.73,2280.2,198.9,287.2,6.77,755.9,551.0,1743.9,936.5,6.65
3,1.0,17.43,96.064,15.07,15.2,15.01,13.13,12.89,1237.4,115.5,241.9,18.12,410.3,291.3,933.1,377.5,14.24
4,1.0,3.16,85.121,8.21,2.97,2.94,4.93,5.85,2937.9,288.7,336.4,2.96,1012.5,715.6,2110.2,1179.5,3.5


In [63]:
X = Data.drop('ThirtyY.CMortgageRate',axis=1)    # Independent Variables
Y = Data['ThirtyY.CMortgageRate']   # Target
feature_columns = list(Data.columns.difference(['ThirtyY.CMortgageRate']))

In [64]:
train_x, test_x, train_y, test_y = train_test_split(X,Y , test_size=0.2)
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

(839, 15) (210, 15) (839,) (210,)


# 선형회귀모형 모델링 y = f(x)

In [65]:
# Train the MLR
full_model = sm.OLS(train_y, train_x)
fitted_full_model = full_model.fit()

## **변수선택법을 통해 최적의 모델을 찾아보자.**

### processSubset: 해당 feature_set에 대한 모델생성 및 AIC 계산

In [66]:
def processSubset(X,y, feature_set):
            model = sm.OLS(y,X[list(feature_set)]) # Modeling
            regr = model.fit() # 모델 학습
            AIC = regr.aic # 모델의 AIC
            return {"model":regr, "AIC":AIC}
        
print(processSubset(X=train_x, y=train_y, feature_set = feature_columns[0:5]))

{'model': <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x0000021F5AEE4278>, 'AIC': 1847.824755629826}


### getBest: 가장 높은 AIC를 가지는 모델 선택 및 저장

In [67]:
def getBest(X,y,k):
    tic = time.time() # 시작시간
    results = [] # 결과 저장공간
    for combo in itertools.combinations(X.columns.difference(['const']), k): # 각 변수조합을 고려한 경우의 수
        results.append(processSubset(X,y,feature_set=combo))  # 모델링된 것들을 저장
    models = pd.DataFrame(results) # 데이터 프레임으로 변환
    # 가장 낮은 AIC를 가지는 모델 선택 및 저장
    best_model = models.loc[models['AIC'].argmin()] # index
    toc = time.time() # 종료시간
    print("Processed ", models.shape[0], "models on", k, "predictors in", (toc - tic),
          "seconds.")
    return best_model

print(getBest(X=train_x, y=train_y,k=2))

Processed  105 models on 2 predictors in 0.09374713897705078 seconds.
AIC                                                46.6395
model    <statsmodels.regression.linear_model.Regressio...
Name: 67, dtype: object


will be corrected to return the positional minimum in the future.
Use 'series.values.argmin' to get the position of the minimum now.
  


### 변수조합에 따른 모델링 시간

In [68]:
# 변수 선택에 따른 학습시간과 저장
models = pd.DataFrame(columns=["AIC", "model"])
tic = time.time()
for i in range(1,4):
    models.loc[i] = getBest(X=train_x,y=train_y,k=i)
toc = time.time()
print("Total elapsed time:", (toc-tic), "seconds.")

will be corrected to return the positional minimum in the future.
Use 'series.values.argmin' to get the position of the minimum now.
  


Processed  15 models on 1 predictors in 0.012966156005859375 seconds.
Processed  105 models on 2 predictors in 0.09175467491149902 seconds.
Processed  455 models on 3 predictors in 0.43084073066711426 seconds.
Total elapsed time: 0.5465307235717773 seconds.


In [69]:
models

Unnamed: 0,AIC,model
1,1313.785751,<statsmodels.regression.linear_model.Regressio...
2,46.639508,<statsmodels.regression.linear_model.Regressio...
3,-858.545098,<statsmodels.regression.linear_model.Regressio...


## 모델평가 지표

In [70]:
models.loc[3, "model"].summary()

0,1,2,3
Dep. Variable:,ThirtyY.CMortgageRate,R-squared:,1.0
Model:,OLS,Adj. R-squared:,1.0
Method:,Least Squares,F-statistic:,898000.0
Date:,"Mon, 08 Jul 2019",Prob (F-statistic):,0.0
Time:,18:44:08,Log-Likelihood:,432.27
No. Observations:,839,AIC:,-858.5
Df Residuals:,836,BIC:,-844.3
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
BankCredit,-0.9185,0.023,-40.359,0.000,-0.963,-0.874
FiveY.CMaturityRate,1.5495,0.028,54.447,0.000,1.494,1.605
ThreeY.CMaturityRate,0.3884,0.008,50.564,0.000,0.373,0.403

0,1,2,3
Omnibus:,325.611,Durbin-Watson:,1.97
Prob(Omnibus):,0.0,Jarque-Bera (JB):,1886.744
Skew:,1.661,Prob(JB):,0.0
Kurtosis:,9.552,Cond. No.,106.0


In [71]:
# 모든 변수들 모델링 한것보다 나은 결과를 가져옴
print("selected model Rsquared: ","{:.5f}".format(models.loc[3, "model"].rsquared))
print("selected model AIC: ","{:.5f}".format(models.loc[3, "model"].aic))
print("selected model MSE: ","{:.5f}".format(models.loc[3, "model"].mse_total))

selected model Rsquared:  0.99969
selected model AIC:  -858.54510
selected model MSE:  67.35228


## **전진선택법**

### forward: 전진선택법 (step=1)

In [72]:
def forward(X, y, predictors):
    # 데이터 변수들이 미리정의된 predictors에 있는지 없는지 확인 및 분류
    remaining_predictors = [p for p in X.columns.difference(['const']) if p not in predictors]
    tic = time.time()
    results = []
    for p in remaining_predictors:
        results.append(processSubset(X=X, y= y, feature_set=predictors+[p]+['const']))
    # 데이터프레임으로 변환
    models = pd.DataFrame(results)

    # AIC가 가장 낮은 것을 선택
    best_model = models.loc[models['AIC'].argmin()] # index
    toc = time.time()
    print("Processed ", models.shape[0], "models on", len(predictors)+1, "predictors in", (toc-tic))
    print('Selected predictors:',best_model['model'].model.exog_names,' AIC:',best_model[0] )
    return best_model

### forward_model: 전진선택법 모델

In [73]:
def forward_model(X,y):
    Fmodels = pd.DataFrame(columns=["AIC", "model"])
    tic = time.time()
    # 미리 정의된 데이터 변수
    predictors = []
    # 변수 1~10개 : 0~9 -> 1~10
    for i in range(1, len(X.columns.difference(['const'])) + 1):
        Forward_result = forward(X=X,y=y,predictors=predictors)
        if i > 1:
            if Forward_result['AIC'] > Fmodel_before:
                break
        Fmodels.loc[i] = Forward_result
        predictors = Fmodels.loc[i]["model"].model.exog_names
        Fmodel_before = Fmodels.loc[i]["AIC"]
        predictors = [ k for k in predictors if k != 'const']
    toc = time.time()
    print("Total elapsed time:", (toc - tic), "seconds.")

    return(Fmodels['model'][len(Fmodels['model'])])

In [74]:
Forward_best_model = forward_model(X=train_x, y= train_y)

Processed  15 models on 1 predictors in 0.014958381652832031
Selected predictors: ['ThreeY.CMaturityRate', 'const']  AIC: 1219.6985684603846
Processed  14 models on 2 predictors in 0.014959335327148438
Selected predictors: ['ThreeY.CMaturityRate', 'FiveY.CMaturityRate', 'const']  AIC: -295.4338662995615
Processed  13 models on 3 predictors in 0.013962507247924805
Selected predictors: ['ThreeY.CMaturityRate', 'FiveY.CMaturityRate', 'BankCredit', 'const']  AIC: -923.4600541175428
Processed  12 models on 4 predictors in 0.014957666397094727
Selected predictors: ['ThreeY.CMaturityRate', 'FiveY.CMaturityRate', 'BankCredit', 'OneY.CMaturityRate', 'const']  AIC: -1026.9259143822574
Processed  11 models on 5 predictors in 0.01196742057800293
Selected predictors: ['ThreeY.CMaturityRate', 'FiveY.CMaturityRate', 'BankCredit', 'OneY.CMaturityRate', 'FederalFunds', 'const']  AIC: -1081.8132124604335
Processed  10 models on 6 predictors in 0.01196742057800293
Selected predictors: ['ThreeY.CMaturityR

will be corrected to return the positional minimum in the future.
Use 'series.values.argmin' to get the position of the minimum now.
  if sys.path[0] == '':


In [75]:
Forward_best_model.aic

-1192.9239799600182

## **후진소거법**

### backward: 후진소거법(step=1)

In [76]:
def backward(X,y,predictors):
    tic = time.time()
    results = []
    # 데이터 변수들이 미리정의된 predictors 조합 확인
    for combo in itertools.combinations(predictors, len(predictors) - 1):
        results.append(processSubset(X=X, y= y,feature_set=list(combo)+['const']))
    models = pd.DataFrame(results)
    # 가장 낮은 AIC를 가진 모델을 선택
    best_model = models.loc[models['AIC'].argmin()]
    toc = time.time()
    print("Processed ", models.shape[0], "models on", len(predictors) - 1, "predictors in",
          (toc - tic))
    print('Selected predictors:',best_model['model'].model.exog_names,' AIC:',best_model[0] )
    return best_model

### backward: 후진소거법모델

In [77]:
def backward_model(X, y):
    Bmodels = pd.DataFrame(columns=["AIC", "model"], index = range(1,len(X.columns)))
    tic = time.time()
    predictors = X.columns.difference(['const'])
    Bmodel_before = processSubset(X,y,predictors)['AIC']
    while (len(predictors) > 1):
        Backward_result = backward(X=train_x, y= train_y, predictors = predictors)
        if Backward_result['AIC'] > Bmodel_before:
            break
        Bmodels.loc[len(predictors) - 1] = Backward_result
        predictors = Bmodels.loc[len(predictors) - 1]["model"].model.exog_names
        Bmodel_before = Backward_result['AIC']
        predictors = [ k for k in predictors if k != 'const']

    toc = time.time()
    print("Total elapsed time:", (toc - tic), "seconds.")
    return (Bmodels['model'].dropna().iloc[0])

In [78]:
Backward_best_model = backward_model(X=train_x,y=train_y)

Processed  15 models on 14 predictors in 0.016953706741333008
Selected predictors: ['BankCredit', 'CheckableDeposits', 'Currency', 'DemandDeposits', 'FederalFunds', 'FiveY.CMaturityRate', 'LoansLeases', 'OneMonthCDRate', 'OneY.CMaturityRate', 'SavingsDeposits', 'ThreeM.Rate.AuctionAverage', 'ThreeM.Rate.SecondaryMarket', 'ThreeY.CMaturityRate', 'TradeCurrencies', 'const']  AIC: -1194.8622684117868
Processed  14 models on 13 predictors in 0.018948793411254883
Selected predictors: ['BankCredit', 'CheckableDeposits', 'Currency', 'FederalFunds', 'FiveY.CMaturityRate', 'LoansLeases', 'OneMonthCDRate', 'OneY.CMaturityRate', 'SavingsDeposits', 'ThreeM.Rate.AuctionAverage', 'ThreeM.Rate.SecondaryMarket', 'ThreeY.CMaturityRate', 'TradeCurrencies', 'const']  AIC: -1195.5278282829395
Processed  13 models on 12 predictors in 0.016953706741333008
Selected predictors: ['BankCredit', 'CheckableDeposits', 'Currency', 'FederalFunds', 'FiveY.CMaturityRate', 'OneMonthCDRate', 'OneY.CMaturityRate', 'Savin

will be corrected to return the positional minimum in the future.
Use 'series.values.argmin' to get the position of the minimum now.
  if __name__ == '__main__':


In [79]:
Backward_best_model.aic

-1195.5278282829395

## **단계적 선택법**

### stepwise_model: 단계적 선택법 모델

In [80]:
def Stepwise_model(X,y):
    Stepmodels = pd.DataFrame(columns=["AIC", "model"])
    tic = time.time()
    predictors = []
    Smodel_before = processSubset(X,y,predictors+['const'])['AIC']
    # 변수 1~10개 : 0~9 -> 1~10
    for i in range(1, len(X.columns.difference(['const'])) + 1):
        Forward_result = forward(X=X, y=y, predictors=predictors) # constant added
        print('forward')
        Stepmodels.loc[i] = Forward_result
        predictors = Stepmodels.loc[i]["model"].model.exog_names
        predictors = [ k for k in predictors if k != 'const']
        Backward_result = backward(X=X, y=y, predictors=predictors)
        if Backward_result['AIC']< Forward_result['AIC']:
            Stepmodels.loc[i] = Backward_result
            predictors = Stepmodels.loc[i]["model"].model.exog_names
            Smodel_before = Stepmodels.loc[i]["AIC"]
            predictors = [ k for k in predictors if k != 'const']
            print('backward')
        if Stepmodels.loc[i]['AIC']> Smodel_before:
            break
        else:
            Smodel_before = Stepmodels.loc[i]["AIC"]
    toc = time.time()
    print("Total elapsed time:", (toc - tic), "seconds.")
    return (Stepmodels['model'][len(Stepmodels['model'])])

In [81]:
Stepwise_best_model=Stepwise_model(X=train_x,y=train_y)

will be corrected to return the positional minimum in the future.
Use 'series.values.argmin' to get the position of the minimum now.
  if sys.path[0] == '':
will be corrected to return the positional minimum in the future.
Use 'series.values.argmin' to get the position of the minimum now.
  if __name__ == '__main__':


Processed  15 models on 1 predictors in 0.012965679168701172
Selected predictors: ['ThreeY.CMaturityRate', 'const']  AIC: 1219.6985684603846
forward
Processed  1 models on 0 predictors in 0.0019965171813964844
Selected predictors: ['const']  AIC: 4279.780084100832
Processed  14 models on 2 predictors in 0.015954971313476562
Selected predictors: ['ThreeY.CMaturityRate', 'FiveY.CMaturityRate', 'const']  AIC: -295.4338662995615
forward
Processed  2 models on 1 predictors in 0.002992868423461914
Selected predictors: ['ThreeY.CMaturityRate', 'const']  AIC: 1219.6985684603846
Processed  13 models on 3 predictors in 0.013961076736450195
Selected predictors: ['ThreeY.CMaturityRate', 'FiveY.CMaturityRate', 'BankCredit', 'const']  AIC: -923.4600541175428
forward
Processed  3 models on 2 predictors in 0.003989219665527344
Selected predictors: ['ThreeY.CMaturityRate', 'FiveY.CMaturityRate', 'const']  AIC: -295.4338662995615
Processed  12 models on 4 predictors in 0.012966156005859375
Selected pred

In [82]:
Stepwise_best_model.aic



-1195.5278282829377

# **성능평가**

In [99]:
# the number of params
print('Full model parameter : {}'.format(len(fitted_full_model.params)))

Full model parameter : 16
(16,) (14,) (14,)


In [100]:
# the number of params
print('Number of Full model parameter : {}'.format(len(fitted_full_model.params)))
print('Number of Forward model parameter : {}'.format(len(Forward_best_model.params)))
print('Number of Backward model parameter : {}'.format(len(Backward_best_model.params)))
print('Number of Stepwise model parameter : {}'.format(len(Stepwise_best_model.params)))



Number of Full model parameter : 16
Number of Forward model parameter : 16
Number of Backward model parameter : 14
Number of Stepwise model parameter : 14


In [101]:
# 모델에 의해 예측된/추정된 값 <->  test_y
pred_y_full = fitted_full_model.predict(test_x)
pred_y_forward = Forward_best_model.predict(test_x[Forward_best_model.model.exog_names])
pred_y_backward = Backward_best_model.predict(test_x[Backward_best_model.model.exog_names])
pred_y_stepwise = Stepwise_best_model.predict(test_x[Stepwise_best_model.model.exog_names])

In [102]:
perf_mat = pd.DataFrame(columns=["ALL", "FORWARD", "BACKWARD", "STEPWISE"],
                        index =['MSE', 'RMSE','MAE', 'MAPE'])

In [103]:
def mean_absolute_percentage_error(y_true, y_pred):
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

In [104]:
# 성능지표
perf_mat.loc['MSE']['ALL'] = metrics.mean_squared_error(test_y,pred_y_full)
perf_mat.loc['MSE']['FORWARD'] = metrics.mean_squared_error(test_y,pred_y_forward)
perf_mat.loc['MSE']['BACKWARD'] = metrics.mean_squared_error(test_y,pred_y_backward)
perf_mat.loc['MSE']['STEPWISE'] = metrics.mean_squared_error(test_y,pred_y_stepwise)

perf_mat.loc['RMSE']['ALL'] = np.sqrt(metrics.mean_squared_error(test_y, pred_y_full))
perf_mat.loc['RMSE']['FORWARD'] = np.sqrt(metrics.mean_squared_error(test_y, pred_y_forward))
perf_mat.loc['RMSE']['BACKWARD'] = np.sqrt(metrics.mean_squared_error(test_y, pred_y_backward))
perf_mat.loc['RMSE']['STEPWISE'] = np.sqrt(metrics.mean_squared_error(test_y, pred_y_stepwise))

perf_mat.loc['MAE']['ALL'] = metrics.mean_absolute_error(test_y, pred_y_full)
perf_mat.loc['MAE']['FORWARD'] = metrics.mean_absolute_error(test_y, pred_y_forward)
perf_mat.loc['MAE']['BACKWARD'] = metrics.mean_absolute_error(test_y, pred_y_backward)
perf_mat.loc['MAE']['STEPWISE'] = metrics.mean_absolute_error(test_y, pred_y_stepwise)

perf_mat.loc['MAPE']['ALL'] = mean_absolute_percentage_error(test_y, pred_y_full)
perf_mat.loc['MAPE']['FORWARD'] = mean_absolute_percentage_error(test_y, pred_y_forward)
perf_mat.loc['MAPE']['BACKWARD'] = mean_absolute_percentage_error(test_y, pred_y_backward)
perf_mat.loc['MAPE']['STEPWISE'] = mean_absolute_percentage_error(test_y, pred_y_stepwise)

print(perf_mat)

            ALL    FORWARD   BACKWARD   STEPWISE
MSE    0.014309   0.014309  0.0144343  0.0144343
RMSE    0.11962    0.11962   0.120143   0.120143
MAE   0.0821647  0.0821647  0.0823837  0.0823837
MAPE    1.15203    1.15203    1.15555    1.15555


In [105]:
perf_mat

Unnamed: 0,ALL,FORWARD,BACKWARD,STEPWISE
MSE,0.014309,0.014309,0.0144343,0.0144343
RMSE,0.11962,0.11962,0.120143,0.120143
MAE,0.0821647,0.0821647,0.0823837,0.0823837
MAPE,1.15203,1.15203,1.15555,1.15555
