# 회귀모형 예측성능 비교

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
df=pd.read_csv('copdcat.csv' ,header=0)
X=df.drop(["CATScore"],axis=1)
y=df["CATScore"]

## 데이터 구분 : 학습 데이터와 검증데이터

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0)

### 선형 회귀분석

In [None]:
from sklearn.linear_model import LinearRegression
r_linear = LinearRegression(fit_intercept = True)
r_linear.fit(X_train ,y_train)
linear_pred= r_linear.predict(X_test)

### Test 데이터 실제 y값과 예측 y값

In [None]:
sns.scatterplot(y=linear_pred, x=y_test, alpha=0.5)
plt.plot([0, 30], [0, 30], color='red')
plt.xlabel("Predicted y values")
plt.ylabel("Actual y values")
plt.show()

### Alpha=0.0, min_impurity_decrease=0.007,  min_samples_split=2  트리

In [None]:
from sklearn.tree import DecisionTreeRegressor
r_tree = DecisionTreeRegressor(ccp_alpha=0.0,min_impurity_decrease=0.007,min_samples_split=2,random_state=0)
r_tree.fit(X_train, y_train)
tree_pred= r_tree.predict(X_test)

### Test 데이터 실제 y값과 예측 y값

In [None]:
sns.scatterplot(y=tree_pred, x=y_test, alpha=0.5)
plt.plot([0, 30], [0, 30], color='red')
plt.xlabel("Predicted y values")
plt.ylabel("Actual y values")
plt.show()

## 예측 $R^2$

In [None]:
from sklearn.metrics import r2_score
print('linear regression :',np.round(r2_score(y_test, linear_pred),3))
print('regression tree :',np.round(r2_score(y_test, tree_pred),3))

## MAE

In [None]:
from sklearn.metrics import mean_absolute_error
print('linear regression :',np.round(mean_absolute_error(y_test, linear_pred),3))
print('regression tree :',np.round(mean_absolute_error(y_test, tree_pred),3))

## MAPE

In [None]:
from sklearn.metrics import mean_absolute_percentage_error
print('linear regression :',np.round(mean_absolute_percentage_error(y_test, linear_pred),3))
print('regression tree :',np.round(mean_absolute_percentage_error(y_test, tree_pred),3))

## MSE

In [None]:
from sklearn.metrics import mean_squared_error
print('linear regression :',np.round(mean_squared_error(y_test, linear_pred),3))
print('regression tree :',np.round(mean_squared_error(y_test, tree_pred),3))

# 모든 회귀모형 예측력 비교

### 신경망분석

In [None]:
from sklearn.neural_network import MLPRegressor
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
r_nn = MLPRegressor(hidden_layer_sizes=(5), random_state=0, max_iter = 1000)
r_nn.fit(X_train_scaled, y_train)

### SVR

In [None]:
from sklearn.svm import SVR, SVC
r_svr = SVR(kernel='rbf') 
r_svr.fit(X_train_scaled, y_train)

### 랜덤포레스트

In [None]:
from sklearn.ensemble import RandomForestRegressor
r_rf = RandomForestRegressor(random_state=0)
r_rf.fit(X_train,y_train)

## 예측 $R^2$

In [None]:
from sklearn.metrics import r2_score as r2
print('Linear Regression:',np.round(r2(y_test,r_linear.predict(X_test)),2))
print('Pruned Tree      :',np.round(r2(y_test,r_tree.predict(X_test)),2))
print('Neural Nerwork   :',np.round(r2(y_test,r_nn.predict(X_test_scaled)),2))
print('SV Regression    :',np.round(r2(y_test,r_svr.predict(X_test_scaled)),2))
print('Random Forest    :',np.round(r2(y_test,r_rf.predict(X_test)),2))

## MAE

In [None]:
from sklearn.metrics import mean_absolute_error as mae
print('Linear Regression:',np.round(mae(y_test,r_linear.predict(X_test)),2))
print('Pruned Tree      :',np.round(mae(y_test,r_tree.predict(X_test)),2))
print('Neural Nerwork   :',np.round(mae(y_test,r_nn.predict(X_test_scaled)),2))
print('SV Regression    :',np.round(mae(y_test,r_svr.predict(X_test_scaled)),2))
print('Random Forest    :',np.round(mae(y_test,r_rf.predict(X_test)),2))

## MAPE

In [None]:
from sklearn.metrics import mean_absolute_percentage_error as mape
print('Linear Regression:',np.round(mape(y_test,r_linear.predict(X_test)),2))
print('Pruned Tree      :',np.round(mape(y_test,r_tree.predict(X_test)),2))
print('Neural Nerwork   :',np.round(mape(y_test,r_nn.predict(X_test_scaled)),2))
print('SV Regression    :',np.round(mape(y_test,r_svr.predict(X_test_scaled)),2))
print('Random Forest    :',np.round(mape(y_test,r_rf.predict(X_test)),2))

## MSE

In [None]:
from sklearn.metrics import mean_squared_error as mse
print('Linear Regression:',np.round(mse(y_test,r_linear.predict(X_test)),2))
print('Pruned Tree      :',np.round(mse(y_test,r_tree.predict(X_test)),2))
print('Neural Nerwork   :',np.round(mse(y_test,r_nn.predict(X_test_scaled)),2))
print('SV Regression    :',np.round(mse(y_test,r_svr.predict(X_test_scaled)),2))
print('Random Forest    :',np.round(mse(y_test,r_rf.predict(X_test)),2))