# 이진 분류기 훈련하기

In [2]:
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_iris
from sklearn.preprocessing import StandardScaler

iris = load_iris()
X = iris.data[:100, :]
y = iris.target[:100]

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

model = LogisticRegression()
model.fit(X_scaled, y)

In [3]:
observation = [[.5, .5, .5, .5]]
model.predict(observation)

array([1])

In [4]:
model.predict_proba(observation)

array([[0.17738424, 0.82261576]])

# 다중 클래스 분류기 훈련하기

In [23]:
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_iris
from sklearn.preprocessing import StandardScaler

X, y = load_iris(return_X_y=True)

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [24]:
model = LogisticRegression(random_state=0, multi_class='ovr')
model.fit(X_scaled, y)
observation = [[.5, .5, .5, .5]]
print(model.predict(observation))
print(model.predict_proba(observation))

[2]
[[0.0387617  0.40669108 0.55454723]]


In [25]:
model = LogisticRegression(random_state=0, multi_class='multinomial')
model.fit(X_scaled, y)
observation = [[.5, .5, .5, .5]]
print(model.predict(observation))
print(model.predict_proba(observation))

[1]
[[0.01982185 0.74491886 0.23525928]]


In [26]:
model = LogisticRegression(random_state=0)
model.fit(X_scaled, y)
observation = [[.5, .5, .5, .5]]
print(model.predict(observation))
print(model.predict_proba(observation))

[1]
[[0.01982185 0.74491886 0.23525928]]


In [27]:
import numpy as np
np.sum([0.01982185, 0.74491886, 0.23525928])

0.99999999

# 규제로 분산 줄이기

In [49]:
from sklearn.linear_model import LogisticRegressionCV
from sklearn.datasets import load_iris
from sklearn.preprocessing import StandardScaler

X, y = load_iris(return_X_y=True)

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

model = LogisticRegressionCV(penalty='l2', Cs=[5.75,5.8], cv=5, random_state=0, n_jobs=-1)
model.fit(X_scaled, y)
model.C_

array([5.75, 5.75, 5.75])

In [61]:
model = LogisticRegressionCV(solver='saga', penalty='elasticnet', Cs=[5.75], cv=5, 
                             l1_ratios=[0, 0.01, 0.05], random_state=0, n_jobs=-1)
model.fit(X_scaled, y)



In [62]:
model.l1_ratio_

array([0, 0, 0])

# 대용량 데이터에서 분류기 훈련하기

In [92]:
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_iris
from sklearn.preprocessing import StandardScaler

X, y = load_iris(return_X_y=True)
X_scaled = StandardScaler().fit_transform(X)

model = LogisticRegression(random_state=0, solver='saga', penalty='l1')
model.fit(X_scaled, y)



# 불균형한 클래스 다루기

In [93]:
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_iris
from sklearn.preprocessing import StandardScaler

X, y = load_iris(return_X_y=True)
X = X[40:, :]
y = y[40:]

y = np.where((y==0), 0, 1)

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

model = LogisticRegression(random_state=0, class_weight='balanced')
model.fit(X_scaled, y)