In [1]:
import numpy as np
from sklearn.linear_model import LogisticRegression
studyH = [0.5,0.75,1.0,1.25,1.5,1.75,1.75,2.0,2.25,2.5,2.75,3.0,3.25,3.5,4.0,4.25,4.5,4.75,5.0,5.5]
passR = [0,0,0,0,0,0,1,0,1,0,1,0,1,0,1,1,1,1,1,1]
X = np.array(studyH).reshape(-1,1) #因為使用scikit-learn要求X必須為2維陣列,所以需要先將list轉換為ndarray,其維度為2
y = np.array(passR).reshape(-1,)   #將list轉換為ndarray,其維度為1
model = LogisticRegression(C=1000000.) #C是Inverse of regularization strength,越大表示模型要越擬合訓練資料,意含overfit可能性會增加

In [2]:
X.shape

(20, 1)

In [3]:
y.shape

(20,)

In [4]:
y

array([0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1])

In [5]:
model.fit(X,y)
model.predict(X)

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])

In [6]:
#將model.predict(X)的結果與y進行比較,計算出accuracy
model.score(X,y) 

0.8

In [7]:
print('coef', model.coef_)
print('intercept', model.intercept_)
print('max iteration',model.n_iter_)

coef [[1.50464059]]
intercept [-4.07769916]
max iteration [5]


# y = 1.50464059*X + -4.07769916

In [8]:
#model.predict(2)如果出現錯誤,應該是新版本的scikit-learn要求輸入的X必須是2維陣列
#model.predict([[2]])
model.predict(np.array([2]).reshape(-1,1))

array([0])

In [9]:
model.predict_proba(2)

array([[0.74429594, 0.25570406]])

In [10]:
print('y:{}'.format(1.50464059*2 + -4.07769916))

y:-1.06841798


In [11]:
def sigmoid_h(h):    
    p = -4.07769916+(1.50464059*h)    
    return 1.0 / (1.0 + np.exp(-p))

In [12]:
sigmoid_h(2)

0.255704057032568

# (0.2557 < 0.5) -> 0

In [13]:
model.predict(3)

array([1])

In [14]:
model.predict_proba(3)

array([[0.39264142, 0.60735858]])

In [15]:
print(1.50464059*3 + -4.07769916)

0.43622260999999973


In [16]:
sigmoid_h(3)

0.6073585871706797

# (0.60736 >= 0.5) -> 1

In [17]:
h = np.arange(-10,10,0.1)
s_h = sigmoid_h(h)

In [18]:
import matplotlib.pyplot as plt
plt.plot(h,s_h)
plt.axvline(0.0,color='k')
plt.axhspan(0.0,1.0, facecolor='1.0', alpha=1.0, ls='dotted')
plt.axhline(y=0.5, ls = 'dotted', color='k') 
plt.yticks([0.0, 0.5, 1.0]) 
plt.ylim(-0.1, 1.1) 
plt.xlabel('h')
plt.ylabel('$S(h)$')

plt.show()

<Figure size 640x480 with 1 Axes>

In [19]:
model_c10 = LogisticRegression(C=10.)    #c越大,在訓練階段所得的model越擬合訓練資料
model_c10.fit(X,y)
print(model_c10.coef_,model_c10.intercept_)
for h in range(0,5):
    print(h,model_c10.predict(h))

[[1.19941739]] [-3.16923655]
0 [0]
1 [0]
2 [0]
3 [1]
4 [1]


In [20]:
model_c1 = LogisticRegression(C=1)
model_c1.fit(X,y)
print(model_c1.coef_,model_c1.intercept_)
for h in range(0,5):
    print(h,model_c1.predict(h))

[[0.61126347]] [-1.36550178]
0 [0]
1 [0]
2 [0]
3 [1]
4 [1]


In [21]:
model_cP1 = LogisticRegression(C=0.1)
model_cP1.fit(X,y)
print(model_cP1.coef_,model_cP1.intercept_)
for h in range(0,5):
    print(h,model_cP1.predict(h))

[[0.22351894]] [-0.20201568]
0 [0]
1 [1]
2 [1]
3 [1]
4 [1]
