## [1] 학습 파이프라인

In [1]:
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

X, y = load_breast_cancer(return_X_y=True, as_frame=True)
X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.33,random_state=1234)

In [3]:
scaler = StandardScaler()
clf = LogisticRegression(random_state=1234, solver='sag')
#pipe = Pipeline(steps=[ scaler, clf])
pipe = Pipeline(steps=[("scaler", scaler), ("clf", clf)])

pipe.fit(X_train, y_train)



In [4]:
y_pred = pipe.predict(X_test)

print(f'정확도:{(y_pred == y_test).mean() * 100: .2f}%')

정확도: 94.68%


## [2] 다중 로지스틱 회귀 예제 1

In [5]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score

X, y = load_iris(return_X_y=True, as_frame=True)
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.33,random_state=1234)

In [6]:
scaler = MinMaxScaler()
clf = LogisticRegression(random_state=1234, solver='saga')
pipe = Pipeline(steps=[("scaler", scaler), ("clf", clf)])
y_pred = pipe.fit(X_train, y_train).predict(X_test)
pre = precision_score(y_test, y_pred, average = None)
print(f'정밀도:{pre[1]*100: .2f}%')

정밀도: 94.12%


## [3] 다중 로지스틱 회귀 예제 2

In [7]:
import numpy as np
import pandas as pd

n = 300
np.random.seed(1234)
X1 = np.random.normal(0, 1, size=n)
X2 = np.random.normal(0, 1, size=n)
X3 = np.random.normal(0, 1, size=n)
X4 = np.random.normal(0, 1, size=n)
X5 = np.random.normal(0, 1, size=n)

X = pd.DataFrame({'X1': X1, 'X2': X2, 'X3': X3, 'X4': X4, 'X5': X5})

In [21]:
ys = []
intercept = -2
beta = np.array([[-2, -2, 3, 4, 5]]).reshape(-1, 1)

np.random.seed(1111)
for i in range(n):
    xb = np.exp((intercept + (X.iloc[i].values.reshape(1, -1) @ beta)[0][0]) + 
                np.random.normal(0, 7.5))
    pi = xb / (1 + xb)
    if pi >= 0.5: y = 1
    else: y = 0
    ys.append(y)
y = pd.Series(ys)

In [22]:
from sklearn.linear_model import LogisticRegression

clf = LogisticRegression(random_state=1234)
clf = clf.fit(X, y)

print(f'절편: {clf.intercept_[0]}')
print(f'계수: {clf.coef_[0]}')

y_pred = clf.predict(X)

print(f'정확도:{(y == y_pred).mean() * 100: .2f}%')

절편: -0.578686940895635
계수: [-0.52203159 -0.51494024  0.71392631  1.05416056  1.06007824]
정확도: 78.00%
