# 직선 학습하기

In [22]:
from sklearn.linear_model import LinearRegression
import pandas as pd

df = pd.read_csv('./datasets/BostonHousing.csv')
pd.DataFrame(df.nunique())

Unnamed: 0,0
crim,504
zn,26
indus,76
chas,2
nox,81
rm,446
age,356
dis,412
rad,9
tax,66


In [23]:
X = df[['crim', 'rm', 'dis']]
y = df['medv']

In [24]:
model = LinearRegression()

In [25]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2)

model.fit(X_train, y_train)

In [26]:
model.intercept_

-29.63133844317301

In [27]:
model.coef_

array([-0.26366886,  8.3100005 ,  0.20133572])

In [28]:
model.predict(X_test)[0]*1000

8445.072284765256

In [29]:
y_test.values[0]*1000

13800.0

In [30]:
model.coef_[0]*1000

-263.668864625959

In [31]:
from sklearn.metrics import mean_squared_error
import numpy as np

y_pred = model.predict(X_test)
np.sqrt(mean_squared_error(y_test, y_pred))

5.623736137891624

In [50]:
from sklearn.model_selection import cross_val_score

model = LinearRegression()
X = df[['crim', 'rm', 'dis']]
y = df['medv']

cross_val_score(model, X, y, cv=5, scoring='neg_mean_squared_error').mean()*-1

48.40279981143159

# 교차 특성 다루기

In [96]:
import pandas as pd
from sklearn.metrics import mean_squared_error
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures


df = pd.read_csv('./datasets/BostonHousing.csv')
X = df[['crim', 'rm', 'dis']]
y = df['medv']

poly = PolynomialFeatures(degree=3, include_bias=False, interaction_only=True)
X_poly = poly.fit_transform(X)

model = LinearRegression()

cross_val_score(model, X_poly, y, cv=5, scoring='neg_mean_squared_error').mean()*-1

45.9309435384367

# 비선형 관계 학습하기

In [108]:
import pandas as pd
from sklearn.metrics import mean_squared_error
import numpy as np
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures


df = pd.read_csv('./datasets/BostonHousing.csv')
X = df[['crim', 'rm', 'dis']]
y = df['medv']

poly = PolynomialFeatures(degree=2, include_bias=True, interaction_only=False)
X_poly = poly.fit_transform(X)

model = LinearRegression()
cross_val_score(model, X_poly, y, cv=5, scoring='neg_mean_squared_error').mean()*-1

36.94488369094482

# 규제로 분산 줄이기

In [116]:
from sklearn.linear_model import Ridge, Lasso
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import cross_val_score

df = pd.read_csv('./datasets/BostonHousing.csv')
X = df[['crim', 'rm', 'dis']]
y = df['medv']

X_scaled = StandardScaler().fit_transform(X)

model = Ridge()
cross_val_score(model, X_scaled, y, cv=5, scoring='neg_mean_squared_error').mean()*-1

48.402509646262274

In [128]:
from sklearn.linear_model import RidgeCV
import numpy as np

ridge_cv = RidgeCV(alphas=np.linspace(0.1, 10, 30), cv=5)
model_cv = ridge_cv.fit(X_scaled, y)
model_cv.coef_

array([-2.15451229,  5.74280004,  0.29384897])

In [129]:
model_cv.alpha_

10.0

In [130]:
model = Ridge(alpha=model_cv.alpha_)
cross_val_score(model, X_scaled, y, cv=5, scoring='neg_mean_squared_error').mean()*-1

48.42372795103246

# 라소 회귀로 특성 줄이기

In [131]:
from sklearn.linear_model import Ridge, Lasso
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import cross_val_score

df = pd.read_csv('./datasets/BostonHousing.csv')
X = df[['crim', 'rm', 'dis']]
y = df['medv']

X_scaled = StandardScaler().fit_transform(X)

model = Lasso()
cross_val_score(model, X_scaled, y, cv=5, scoring='neg_mean_squared_error').mean()*-1

52.41899383342876

In [132]:
model = Lasso(alpha=10)
cross_val_score(model, X_scaled, y, cv=5, scoring='neg_mean_squared_error').mean()*-1

96.03278248852249

In [133]:
from sklearn.linear_model import LassoCV
import numpy as np

lasso_cv = LassoCV(alphas=np.linspace(0.1, 10, 30), cv=5)
model_cv = lasso_cv.fit(X, y)
model_cv.coef_

array([-0.25766371,  8.13866517,  0.11202806])

In [134]:
model_cv.alpha_

0.1

In [135]:
model = Lasso(alpha=model_cv.alpha_)
cross_val_score(model, X_scaled, y, cv=5, scoring='neg_mean_squared_error').mean()*-1

48.28761489702431