# Iris
## Regression
### 사용 라이브러리

In [17]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import koreanize_matplotlib
from sklearn.datasets import load_iris
from sklearn.tree import DecisionTreeRegressor, plot_tree
from sklearn.ensemble import RandomForestRegressor
from lightgbm import LGBMRegressor, plot_importance
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error

### Data Load

In [3]:
iris = load_iris()
iris_data = iris.data # feature만으로 된 데이터
iris_label = iris.target # label 데이터

df_iris = pd.DataFrame(data=iris_data, columns=iris.feature_names)
df_iris["label"] = iris.target
df_iris.sample(5)

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),label
75,6.6,3.0,4.4,1.4,1
147,6.5,3.0,5.2,2.0,2
33,5.5,4.2,1.4,0.2,0
101,5.8,2.7,5.1,1.9,2
56,6.3,3.3,4.7,1.6,1


원목표는 분류지만, 회귀를 이용해 꽃잎과 꽃 받침의 길이나 폭을 구해볼 예정

In [16]:
def reg_score(y_true, y_pred):
    MSE = mean_squared_error(y_true, y_pred)
    RMSE = np.sqrt(mean_squared_error(y_true,y_pred))
    MAE = np.mean( np.abs((y_true - y_pred) / y_true) )
    NMAE =mean_absolute_error(y_true, y_pred)/ np.mean( np.abs(y_true) )
    MAPE = np.mean( np.abs((y_true - y_pred) / y_true) ) *100
    R2 = r2_score(y_true, y_pred)
  
    return {'MSE' : np.round(MSE, 3), 
            'RMSE' : np.round(RMSE, 3),
            'MAE' : np.round(MAE, 3),
            'NMAE' : np.round(NMAE, 3),
            'MAPE' : np.round(MAPE, 3),
            'R2' : np.round(R2, 3)}
        

### 꽃잎의 길이 (petal length (cm))

In [4]:
label = "petal length (cm)"
feature_list = df_iris.columns.tolist()
feature_list.remove(label)

In [10]:
X_train, X_test, y_train, y_test = train_test_split(df_iris[feature_list], df_iris[label], test_size=0.1, shuffle=True)

print(f"X_train: {X_train.shape}\ny_train: {y_train.shape}\nX_test: {X_test.shape}\ny_test: {y_test.shape}")

X_train: (135, 4)
y_train: (135,)
X_test: (15, 4)
y_test: (15,)


#### Decision Tree

In [12]:
reg_dt = DecisionTreeRegressor()

reg_dt.fit(X_train, y_train)

pred_dt = reg_dt.predict(X_test)

In [18]:
reg_score(pred_dt, y_test)

{'MSE': 0.061,
 'RMSE': 0.246,
 'MAE': 0.056,
 'NMAE': 0.046,
 'MAPE': 5.608,
 'R2': 0.976}

#### Random Forest

In [19]:
reg_rf = RandomForestRegressor()

reg_rf.fit(X_train, y_train)

pred_rf = reg_rf.predict(X_test)

In [23]:
reg_score(pred_rf, y_test)

{'MSE': 0.057,
 'RMSE': 0.239,
 'MAE': 0.058,
 'NMAE': 0.05,
 'MAPE': 5.755,
 'R2': 0.975}

#### LightGBM

In [21]:
reg_lgbm = LGBMRegressor()

reg_lgbm.fit(X_train, y_train)

pred_lgbm = reg_lgbm.predict(X_test)

In [22]:
reg_score(pred_lgbm, y_test)

{'MSE': 0.121,
 'RMSE': 0.348,
 'MAE': 0.064,
 'NMAE': 0.063,
 'MAPE': 6.406,
 'R2': 0.944}

### 꽃잎의 넓이 (petal width (cm))

In [24]:
label = "petal width (cm)"
feature_list = df_iris.columns.tolist()
feature_list.remove(label)

X_train, X_test, y_train, y_test = train_test_split(df_iris[feature_list], df_iris[label], test_size=0.1, shuffle=True)

print(f"X_train: {X_train.shape}\ny_train: {y_train.shape}\nX_test: {X_test.shape}\ny_test: {y_test.shape}")

X_train: (135, 4)
y_train: (135,)
X_test: (15, 4)
y_test: (15,)


#### Decision Tree

In [25]:
reg_dt = DecisionTreeRegressor()

reg_dt.fit(X_train, y_train)

pred_dt = reg_dt.predict(X_test)

reg_score(pred_dt, y_test)

{'MSE': 0.028,
 'RMSE': 0.167,
 'MAE': 0.153,
 'NMAE': 0.097,
 'MAPE': 15.261,
 'R2': 0.936}

#### Random Forest

In [26]:
reg_rf = RandomForestRegressor()

reg_rf.fit(X_train, y_train)

pred_rf = reg_rf.predict(X_test)

reg_score(pred_rf, y_test)

{'MSE': 0.019,
 'RMSE': 0.137,
 'MAE': 0.139,
 'NMAE': 0.091,
 'MAPE': 13.864,
 'R2': 0.955}

#### LightGBM

In [27]:
reg_lgbm = LGBMRegressor()

reg_lgbm.fit(X_train, y_train)

pred_lgbm = reg_lgbm.predict(X_test)

reg_score(pred_lgbm, y_test)

{'MSE': 0.013,
 'RMSE': 0.115,
 'MAE': 0.115,
 'NMAE': 0.08,
 'MAPE': 11.548,
 'R2': 0.969}

### 꽃 받침 길이 (sepal length (cm))

In [28]:
label = "sepal length (cm)"
feature_list = df_iris.columns.tolist()
feature_list.remove(label)

X_train, X_test, y_train, y_test = train_test_split(df_iris[feature_list], df_iris[label], test_size=0.1, shuffle=True)

print(f"X_train: {X_train.shape}\ny_train: {y_train.shape}\nX_test: {X_test.shape}\ny_test: {y_test.shape}")

X_train: (135, 4)
y_train: (135,)
X_test: (15, 4)
y_test: (15,)


#### Decision Tree

In [29]:
reg_dt = DecisionTreeRegressor()

reg_dt.fit(X_train, y_train)

pred_dt = reg_dt.predict(X_test)

reg_score(pred_dt, y_test)

{'MSE': 0.097,
 'RMSE': 0.312,
 'MAE': 0.048,
 'NMAE': 0.047,
 'MAPE': 4.821,
 'R2': 0.887}

#### Random Tree

In [30]:
reg_rf = RandomForestRegressor()

reg_rf.fit(X_train, y_train)

pred_rf = reg_rf.predict(X_test)

reg_score(pred_rf, y_test)

{'MSE': 0.053,
 'RMSE': 0.229,
 'MAE': 0.034,
 'NMAE': 0.032,
 'MAPE': 3.35,
 'R2': 0.935}

#### LightGBM

In [31]:
reg_lgbm = LGBMRegressor()

reg_lgbm.fit(X_train, y_train)

pred_lgbm = reg_lgbm.predict(X_test)

reg_score(pred_lgbm, y_test)

{'MSE': 0.121,
 'RMSE': 0.348,
 'MAE': 0.052,
 'NMAE': 0.053,
 'MAPE': 5.226,
 'R2': 0.768}

#### 꽃 받침의 넓이 (sepal width (cm))

In [32]:
label = "sepal width (cm)"
feature_list = df_iris.columns.tolist()
feature_list.remove(label)

X_train, X_test, y_train, y_test = train_test_split(df_iris[feature_list], df_iris[label], test_size=0.1, shuffle=True)

print(f"X_train: {X_train.shape}\ny_train: {y_train.shape}\nX_test: {X_test.shape}\ny_test: {y_test.shape}")

X_train: (135, 4)
y_train: (135,)
X_test: (15, 4)
y_test: (15,)


#### Decision Tree

In [33]:
reg_dt = DecisionTreeRegressor()

reg_dt.fit(X_train, y_train)

pred_dt = reg_dt.predict(X_test)

reg_score(pred_dt, y_test)

{'MSE': 0.123,
 'RMSE': 0.351,
 'MAE': 0.096,
 'NMAE': 0.094,
 'MAPE': 9.598,
 'R2': 0.407}

#### Random Forest

In [34]:
reg_rf = RandomForestRegressor()

reg_rf.fit(X_train, y_train)

pred_rf = reg_rf.predict(X_test)

reg_score(pred_rf, y_test)

{'MSE': 0.058,
 'RMSE': 0.24,
 'MAE': 0.06,
 'NMAE': 0.06,
 'MAPE': 5.963,
 'R2': 0.333}

#### LightGBM

In [35]:
reg_lgbm = LGBMRegressor()

reg_lgbm.fit(X_train, y_train)

pred_lgbm = reg_lgbm.predict(X_test)

reg_score(pred_lgbm, y_test)

{'MSE': 0.038,
 'RMSE': 0.194,
 'MAE': 0.055,
 'NMAE': 0.055,
 'MAPE': 5.486,
 'R2': 0.608}