# 다중 회귀
- 여러 개의 특성을 이용한 선형 회귀다.
- y = a * 특성1 + b * 특성 2 + 절편

In [37]:
import pandas as pd
df = pd.read_csv('data/Fish.csv')
p_data = df.loc[df['Species'] == 'Perch']
p_full = p_data[['Length2', 'Height', 'Width']]
p_weight = p_data[['Weight']]

In [38]:
from sklearn.model_selection import train_test_split
train_input, test_input, train_target, test_target = train_test_split(p_full, p_weight)

In [39]:
from sklearn.preprocessing import PolynomialFeatures
poly = PolynomialFeatures(include_bias = False)
poly.fit([[3,5]])
poly.transform([[3,5]])

array([[ 3.,  5.,  9., 15., 25.]])

In [40]:
# train_input을 이용해 특성을 만들어낸다.

poly = PolynomialFeatures(include_bias = False)
poly.fit(train_input)
train_poly = poly.transform(train_input)
print(train_poly.shape)
poly.get_feature_names_out()

(42, 9)


array(['Length2', 'Height', 'Width', 'Length2^2', 'Length2 Height',
       'Length2 Width', 'Height^2', 'Height Width', 'Width^2'],
      dtype=object)

In [41]:
from sklearn.linear_model import LinearRegression
lr = LinearRegression()
lr.fit(train_poly, train_target)
lr.score(train_poly, train_target)

0.990543534899636

In [42]:
test_poly = poly.transform(test_input)
lr.score(test_poly, test_target)

0.9663413534145496

In [43]:
# 5 제곱까지 특성을 만들어낸다.

poly = PolynomialFeatures(degree = 5, include_bias = False)
poly.fit(train_input)
train_poly = poly.transform(train_input)
test_poly = poly.transform(test_input)

In [44]:
lr.fit(train_poly, train_target)

In [45]:
lr.score(train_poly, train_target)

0.9999999999997007

In [46]:
# 과대 적합이 일어난다.

lr.score(test_poly, test_target)

-1689.7119039667732

In [47]:
# 과대 적합을 방지하기 위해 규제를 만들어낸다.

from sklearn.preprocessing import StandardScaler
ss = StandardScaler()
ss.fit(train_poly)
train_scaled = ss.transform(train_poly)
test_scaled = ss.transform(test_poly)

In [52]:
# 릿지
from sklearn.linear_model import Ridge
ridge = Ridge(alpha = 0.1)
ridge.fit(train_scaled, train_target)
ridge.score(train_scaled, train_target)

0.9915914671169322

In [53]:
ridge.score(test_scaled, test_target)

0.9684847433279388

In [54]:
# 라쏘
from sklearn.linear_model import Lasso
lasso = Lasso()
lasso.fit(train_scaled, train_target)
lasso.score(train_scaled, train_target)

0.9895426004306058

In [55]:
lasso.score(test_scaled, test_target)

0.9734261157183133