# 선형 회귀식의 계수를 찾는 법 - OLS VS. SGD
- 보스턴 집값 데이터 활용(RM VS Price)

### 필요한 모듈 import

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# 필요한 라이브러리 import 
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import SGDRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

# 1. LinearRegression 모델을 사용한 경우

In [2]:
boston = pd.read_csv('./data/boston_room_price.csv')
X = np.array(boston.RM.values).reshape(-1,1)
y = boston.price

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1)

lr = LinearRegression()
reg = lr.fit(X_train, y_train)

# regression model
print(f'y = {reg.coef_[0]}X(room) + {reg.intercept_}')

# evaluate
from score import evaluate_score
evaluate_score(reg, X_test, y_test)

y = 8.461091637115892X(room) + -30.571032410898336
MSE: 36.517214730838624
RMSE: 6.042947520112898
R2: 0.6015774471545623


# 2. SGDRegressor with hyperparameter

In [3]:
from sklearn.linear_model import SGDRegressor

reg = SGDRegressor(random_state=42)
reg.fit(X_train, y_train)

print(f'y = {reg.coef_}X + {reg.intercept_}')

evaluate_score(reg, X_test, y_test)

y = [4.17723973]X + [-3.58747787]
MSE: 55.12954251421951
RMSE: 7.424927104976823
R2: 0.39850688976103077


# 3. SGDRegressor with scaling

In [4]:
# scaling -> mean = 0, var = 1
train_mean = np.mean(X_train, axis = 0)
train_std = np.std(X_train, axis=0)
X_train_scaled = (X_train - train_mean) / train_std
X_test_scaled = (X_test - train_mean) / train_std

from sklearn.linear_model import SGDRegressor

reg = SGDRegressor(random_state=42)
reg.fit(X_train_scaled, y_train)

print(f'y = {reg.coef_}X + {reg.intercept_}')

evaluate_score(reg, X_test_scaled, y_test)

y = [5.84750366]X + [22.31897879]
MSE: 36.52260149848331
RMSE: 6.043393210646094
R2: 0.6015186746076305


# 4. SGDRegressor with StandardScaler()

In [5]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train_scaler = scaler.fit_transform(X_train)
X_test_scaler = scaler.transform(X_test)

from sklearn.linear_model import SGDRegressor

reg = SGDRegressor(random_state=42)
reg.fit(X_train_scaler, y_train)

print(f'y = {reg.coef_}X + {reg.intercept_}')

evaluate_score(reg, X_test_scaler, y_test)

y = [5.84750366]X + [22.31897879]
MSE: 36.52260149848331
RMSE: 6.043393210646094
R2: 0.6015186746076305


In [6]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train_scaler = scaler.fit_transform(X_train)
X_test_scaler = scaler.transform(X_test)

from sklearn.linear_model import SGDRegressor

reg = SGDRegressor(max_iter=10000,eta0=0.079625418225, random_state=42) # max_iter = default(1000)
reg.fit(X_train_scaler, y_train)

print(f'y = {reg.coef_}X + {reg.intercept_}')
evaluate_score(reg, X_test_scaler, y_test)

y = [6.510171]X + [22.2162479]
MSE: 34.942559162513675
RMSE: 5.911223152826636
R2: 0.618757790617463


In [8]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train_scaler = scaler.fit_transform(X_train)
X_test_scaler = scaler.transform(X_test)

from sklearn.linear_model import SGDRegressor

reg = SGDRegressor(max_iter=10000,eta0=0.079625418226, random_state=42) # max_iter = default(1000)
reg.fit(X_train_scaler, y_train)

print(f'y = {reg.coef_}X + {reg.intercept_}')

evaluate_score(reg, X_test_scaler, y_test)

y = [6.510171]X + [22.2162479]
MSE: 34.942559162497226
RMSE: 5.911223152825245
R2: 0.6187577906176426
