## Logistic Regression
$$ g\big(x\big) = \frac{1}{1 + e^{-(w.x + b)}} $$

In [19]:
import numpy as np
from modules.models.logistic_regression import *
from modules.eda_functions import *

In [20]:
path = '../data/lags/'

In [21]:
all_data_as_dict = import_all_files_as_dict(path)

In [22]:
all_data_as_dict

{'ABEO':               Close  lag_1 label
 Date                            
 2021-10-20  22.5000  22.50  down
 2021-10-21  22.5000  22.50  down
 2021-10-22  21.5000  22.50    up
 2021-10-25  21.0000  21.50    up
 2021-10-26  22.0000  21.00  down
 ...             ...    ...   ...
 2021-10-13  22.0000  22.50    up
 2021-10-14  22.2500  22.00  down
 2021-10-15  22.7500  22.25  down
 2021-10-18  22.2500  22.75    up
 2022-08-19   4.2508  22.25    up
 
 [1258 rows x 3 columns],
 'ABIO':              Close  lag_1 label
 Date                           
 2021-10-20  2.7600   2.74  down
 2021-10-21  2.7700   2.76  down
 2021-10-22  2.6700   2.77    up
 2021-10-25  2.7000   2.67  down
 2021-10-26  2.6900   2.70    up
 ...            ...    ...   ...
 2021-10-13  2.8700   2.75  down
 2021-10-14  2.8300   2.87    up
 2021-10-15  2.7900   2.83    up
 2021-10-18  2.7300   2.79    up
 2022-08-19  2.3226   2.73    up
 
 [1258 rows x 3 columns],
 'ABUS':             Close  lag_1 label
 Date            

## Cost Function
$$ J\big(w, b\big) = -\frac{1}{m}\displaystyle\sum_{i=1}^{m}\big(y^{(i)}log\big(f_{w, b}(x^{(i)})\big) + \big(1 - y^{(i)}\big)\big(log\big(1 - f_{w, b}(x^{(i)})\big)\big)  $$
Regularized:
$$ J\big(w, b\big) = -\frac{1}{m}\displaystyle\sum_{i=1}^{m}\big(y^{(i)}log\big(f_{w, b}(x^{(i)})\big) + \big(1 - y^{(i)}\big)\big(log\big(1 - f_{w, b}(x^{(i)})\big)\big) + \frac{\lambda}{2m}\displaystyle\sum_{j=1}^{n}w_{j}^{2} $$

In [23]:
np.random.seed(1)
X_tmp = np.random.rand(5,6)
y_tmp = np.array([0,1,0,1,0])
w_tmp = np.random.rand(X_tmp.shape[1]).reshape(-1,) * 2
b_tmp = 0.5
lambda_tmp = 0.7
cost_tmp = compute_cost_logistic_regression(X_tmp, y_tmp, w_tmp, b_tmp, lambda_tmp)

print("Regularized cost:", cost_tmp)

Regularized cost: 2.0543906339569977


## Gradient Descent
$$ w_{j} = w_{j} - \alpha\frac{1}{m}\displaystyle\sum_{i=1}^{m}\big(f_{w,b}\big(x^{(i)}\big)-y^{(i)}\big)x_{j}^{(i)} + \frac{\lambda}{m}w_{j} $$
$$ b = b - \alpha\frac{1}{m}\displaystyle\sum_{i=1}^{m}\big(f_{w,b}\big(x^{(i)}\big)-y^{(i)}\big) $$

In [24]:
np.random.seed(1)
X_tmp = np.random.rand(5,3)
y_tmp = np.array([0,1,0,1,0])
w_tmp = np.random.rand(X_tmp.shape[1])
b_tmp = 0.5
lambda_tmp = 0.7
dj_dw_tmp, dj_db_tmp = compute_gradient_logistic_regression(X_tmp, y_tmp, w_tmp, b_tmp, lambda_tmp)

print(f"dj_db: {dj_db_tmp}", )
print(f"Regularized dj_dw:\n {dj_dw_tmp.tolist()}", )

dj_db: 0.341798994972791
Regularized dj_dw:
 [0.17380012933994293, 0.3200750788156695, 0.10776313396851497]


In [25]:
num_iters = 10000
alpha = 5.0e-5
w_final, b_final = gradient_descent(X_tmp, y_tmp, w_tmp, b_tmp, alpha, lambda_tmp, num_iters)
print(f"Final w: {w_final.tolist()}", )
print(f"Final b:\n {b_final}", )

Final w: [0.6403681057234213, 0.34577216203443556, 0.54508714113052]
Final b:
 -0.9243437114863322


In [26]:
cost_final = compute_cost_logistic_regression(X_tmp, y_tmp, w_final, b_final, lambda_tmp)
print(f'Cost:\n{cost_final}')

Cost:
0.7098603640827459
