In [1]:
import numpy as np
import matplotlib.pyplot as plt

In [73]:
X = np.array([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
              [1, 1, 2, 1, 3, 0, 5, 10, 1, 2],
              [500, 700, 750, 600, 1450,
               800, 1500, 2000, 450, 1000],
              [1, 1, 2, 1, 2, 
               1, 3, 3, 1, 2]], dtype = np.float64)
y = np.array([0, 0, 1, 0, 1, 0, 1, 0, 1, 1], dtype = np.float64)

In [3]:
X

array([[1.00e+00, 1.00e+00, 1.00e+00, 1.00e+00, 1.00e+00, 1.00e+00,
        1.00e+00, 1.00e+00, 1.00e+00, 1.00e+00],
       [1.00e+00, 1.00e+00, 2.00e+00, 1.00e+00, 3.00e+00, 0.00e+00,
        5.00e+00, 1.00e+01, 1.00e+00, 2.00e+00],
       [5.00e+02, 7.00e+02, 7.50e+02, 6.00e+02, 1.45e+03, 8.00e+02,
        1.50e+03, 2.00e+03, 4.50e+02, 1.00e+03],
       [1.00e+00, 1.00e+00, 2.00e+00, 1.00e+00, 2.00e+00, 1.00e+00,
        3.00e+00, 3.00e+00, 1.00e+00, 2.00e+00]])

In [4]:
y

array([0., 0., 1., 0., 1., 0., 1., 0., 1., 1.])

In [5]:
def calc_std_feat(x):
  res = (x - x.mean()) / x.std()
  return res

In [6]:
X_st = X.copy()
X_st[2, :] = calc_std_feat(X[2, :])

In [7]:
X_st

array([[ 1.        ,  1.        ,  1.        ,  1.        ,  1.        ,
         1.        ,  1.        ,  1.        ,  1.        ,  1.        ],
       [ 1.        ,  1.        ,  2.        ,  1.        ,  3.        ,
         0.        ,  5.        , 10.        ,  1.        ,  2.        ],
       [-0.97958969, -0.56713087, -0.46401617, -0.77336028,  0.97958969,
        -0.36090146,  1.08270439,  2.11385144, -1.08270439,  0.05155735],
       [ 1.        ,  1.        ,  2.        ,  1.        ,  2.        ,
         1.        ,  3.        ,  3.        ,  1.        ,  2.        ]])

In [8]:
def calc_logloss(y, y_pred):
  err = - np.mean(y * np.log(y_pred) + (1.0 - y) * np.log(1.0 - y_pred))
  return err

In [9]:
def sigmoid(z):
  res = 1 / (1 + np.exp(-z))
  return res

In [10]:
def eval_model(X, y, iterations, alpha=1e-4):
  np.random.seed(42)
  W = np.random.randn(X.shape[0])
  n = X.shape[1]
  for i in range(1, iterations+1):
    z = np.dot(W, X)
    y_pred = sigmoid(z)
    err = calc_logloss(y, y_pred)
    W -= alpha * (1/n * np.dot((y_pred - y), X.T))
    if i % (iterations / 10) == 0:
      print(i, W, err)
  return W

Подберите аргументы функции eval_model для логистической регрессии таким образом, чтобы log loss был минимальным.

In [11]:
W = eval_model(X_st, y, iterations=25000, alpha=1e-0)

2500 [-18.31764414  -2.04856375  -4.52708874  15.19228685] 0.19390243306466634
5000 [-26.0937231   -2.7307529   -6.81720322  21.49765648] 0.15090120005015614
7500 [-31.59798266  -3.21692572  -8.41100105  25.94721672] 0.12960482586400426
10000 [-35.907977    -3.59554341  -9.65060282  29.4178451 ] 0.11663474835747886
12500 [-39.49182502  -3.90685783 -10.67930205  32.29154567] 0.10771109660127542
15000 [-42.59127593  -4.17216434 -11.56950237  34.76580371] 0.10106445661346644
17500 [-45.34622871  -4.40399802 -12.3624602   36.95502   ] 0.09583301669667077
20000 [-47.84444435  -4.61034064 -13.08373895  38.93106184] 0.09154635956227136
22500 [-50.1443636   -4.7965915  -13.75017686  40.74188723] 0.08792534173525937
25000 [-52.28671965  -4.96658396 -14.37338788  42.4210246 ] 0.08479343466760672


Создайте функцию calc_pred_proba, возвращающую предсказанную вероятность класса 1 (на вход подаются W, который уже посчитан функцией eval_model и X, на выходе - массив y_pred_proba).

In [30]:
def calc_pred_proba(W, X):
    z = np.dot(W, X)
    return sigmoid(z) 

In [31]:
y_pred_proba = calc_pred_proba(W, X_st)

In [32]:
y_pred_proba

array([0.32032983, 0.00125334, 1.        , 0.02374222, 0.97276276,
       0.00920904, 1.        , 0.00622652, 0.67477262, 1.        ])

Создайте функцию calc_pred, возвращающую предсказанный класс (на вход подаются W, который уже посчитан функцией eval_model и X, на выходе - массив y_pred).

In [64]:
def calc_pred(W, X, threshold=0.5):
    m = X.shape[1]
    y_pred = np.zeros(m)
    
    y_pred_proba = calc_pred_proba(W, X)
    
    for i, num in enumerate(y_pred_proba):
                
        if num < threshold:
            y_pred[i] = 0
            
        else:
            y_pred[i] = 1
                      
    return y_pred

In [65]:
y_pred = calc_pred(W, X_st)

In [74]:
y_pred

array([0., 0., 1., 0., 1., 0., 1., 0., 1., 1.])

In [75]:
y

array([0., 0., 1., 0., 1., 0., 1., 0., 1., 1.])

In [76]:
np.sum(y == y_pred)

10

Посчитайте Accuracy, матрицу ошибок, точность и полноту, а также F1 score.

In [79]:
def accuracy_(y, y_pred):
    return np.sum(y == y_pred) / y.shape[0]

In [80]:
acc = accuracy_(y, y_pred)
acc

1.0

In [106]:
def confusion_matrix(y, y_pred):
    TP = np.sum((y == 1) & (y == y_pred))
    FP = np.sum((y == 0) & (y != y_pred))
    FN = np.sum((y == 1) & (y != y_pred))
    TN = np.sum((y == 0) & (y == y_pred))
       
    return TP, FP, FN, TN

def confusion_matrix_print(y, y_pred):
    TP, FP, FN, TN = confusion_matrix(y, y_pred)
    
    print(TP, FP)
    print(FN, TN)

In [108]:
confusion_matrix_print(y, y_pred)

5 0
0 5


In [107]:
confusion_matrix(y, y_pred)

(5, 0, 0, 5)

In [109]:
def score(y, y_pred):
    TP, FP, FN, TN = confusion_matrix(y, y_pred)
    
    precision = TP / (TP + FP)
    recall = TP / (TP + FN)
    
    F_1 = 2 * precision * recall / (precision + recall)
    
    return precision, recall, F_1

In [110]:
precision, recall, F_1 = score(y, y_pred)

In [111]:
precision, recall, F_1

(1.0, 1.0, 1.0)