In [1]:
import numpy as np
import pandas as pd
import os
import sys

from sklearn.model_selection import train_test_split

# Adding src directory to the system path
current_dir = os.getcwd()
src_dir = os.path.join(current_dir, '..', 'src')
sys.path.insert(0, os.path.abspath(src_dir))

from utils import z_score_normalize
from logistic_regressor import compute_logistic_cost, compute_logistic_gradients, logistic_gradient_descent, evaluate_model_performance

# Logistic Regression with L2 Regularization

### Introduction
This includes the documentation of the logistic regression model with L2 regularization implemented in this notebook.

### Cost function
The cost function used for logistic regression is
$$
J(\vec{w}, b) = -\frac{1}{m} \sum_{i=0}^{m-1} \left[ y^{(i)} \log \left( f_{\vec{w}, b} (\vec{x}^{(i)}) \right) + (1 - y^{(i)}) \log \left( 1 - f_{\vec{w}, b} (\vec{x}^{(i)}) \right) \right] + \frac{\lambda}{2m} \sum_{j=0}^{n-1} w_{j} ^ 2
$$
where *m* is the number of training examples, *n* is the number of features, and \\(\lambda\\) is the regularization parameter.

Also, $$ f_{\vec{w},b}(\mathbf{x}^{(i)}) = sigmoid(\vec{w} \cdot \mathbf{x}^{(i)} + b)  $$ 

### Gradient Descent Algorithm
$$\begin{align*}
&\text{repeat until convergence:} \; \lbrace \\
&  \; \; \;w_j = w_j -  \alpha \frac{\partial J(\vec{w},b)}{\partial w_j}   \; & \text{for j := 0..n-1} \\ 
&  \; \; \;  \; \;b = b -  \alpha \frac{\partial J(\vec{w},b)}{\partial b} \\
&\rbrace
\end{align*}$$
where \\(\vec{w}, b\\) are updated simultaneously.

The gradient is defined as:
$$
\frac{\partial J(\vec{w}, b)}{\partial w_{j}} = \frac{1}{m} \sum_{i=0}^{m-1} \left( f_{\vec{w},b}(\vec{x}^{(i)}) - y^{(i)} \right) x_{j}^{(i)} + \frac{\lambda}{m}  w_{j}
$$

$$
\frac{\partial J(w, b)}{\partial b} = \frac{1}{m} \sum_{i=0}^{m-1} \left( f_{\vec{w},b}(\vec{x}^{(i)}) - y^{(i)} \right)
$$

In [2]:
# Example usage
X = np.array([[1, 2], [3, 4], [5, 6]])
y = np.array([0, 1, 0])
w = np.array([0.1, 0.2])
b = 0.1
lambda_ = 0.01

cost = compute_logistic_cost(X, y, w, b, lambda_)
print("Computed cost:", cost)

Computed cost: 1.0846660094499971


In [3]:
np.random.seed(1)
X_tmp = np.random.rand(5,3)
y_tmp = np.array([0,1,0,1,0])
w_tmp = np.random.rand(X_tmp.shape[1])
b_tmp = 0.5
lambda_tmp = 0.7
dj_db_tmp, dj_dw_tmp =  compute_logistic_gradients(X_tmp, y_tmp, w_tmp, b_tmp, lambda_tmp)

print(f"dj_db: {dj_db_tmp}", )
print(f"Regularized dj_dw:\n {dj_dw_tmp.tolist()}", )

dj_db: [0.17380013 0.32007508 0.10776313]
Regularized dj_dw:
 0.341798994972791


**Expected Output**
```
dj_db: 0.341798994972791
Regularized dj_dw:
 [0.17380012933994293, 0.32007507881566943, 0.10776313396851499]
 ```

In [4]:
# Ensure the file path is correct and the file exists
file_path = 'C:/GitHub/linear-logistic-regression-implementation/data/weather_forecast_data.csv'
if not os.path.exists(file_path):
    raise FileNotFoundError(f"The file {file_path} does not exist.")

# Read the CSV file
df = pd.read_csv(file_path)

# Assign features and target values
X = df.iloc[:, :-1].values
y = df.iloc[:, -1].values

# Convert labels to boolean (True for 'rain', False for 'no rain')
y = np.array((y == 'rain').astype(int))  # Ensuring y is an integer array

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train = z_score_normalize(X_train)
X_test = z_score_normalize(X_test)

# Initialize parameters
m, n = X.shape
initial_w = np.zeros(n)
initial_b = 0
alpha = 0.3
num_iters = 100000
lambda_ = 0.01

# Perform gradient descent
w, b, J_history = logistic_gradient_descent(X_train, y_train, initial_w, initial_b, alpha, num_iters, lambda_)

print("Optimized weights:", w)
print("Optimized bias:", b)
print("Final cost:", J_history[-1])

Iteration: 0, Weights: [-0.02736185  0.03851381  0.00059367  0.03306878 -0.00066092], Bias: -0.11145, Cost: 0.6426291071172929
Iteration: 10000, Weights: [-2.06193018  2.7281239   0.02057885  2.45143232 -0.03232513], Bias: -5.379200920594113, Cost: 0.15162499022568948
Iteration: 20000, Weights: [-2.06197816  2.72818766  0.02057887  2.45148852 -0.03232654], Bias: -5.379318318507932, Cost: 0.15162499018890885
Iteration: 30000, Weights: [-2.06197816  2.72818767  0.02057887  2.45148852 -0.03232654], Bias: -5.379318327559568, Cost: 0.15162499018890885
Iteration: 40000, Weights: [-2.06197816  2.72818767  0.02057887  2.45148852 -0.03232654], Bias: -5.379318327559914, Cost: 0.15162499018890882
Iteration: 50000, Weights: [-2.06197816  2.72818767  0.02057887  2.45148852 -0.03232654], Bias: -5.379318327559914, Cost: 0.15162499018890882
Iteration: 60000, Weights: [-2.06197816  2.72818767  0.02057887  2.45148852 -0.03232654], Bias: -5.379318327559914, Cost: 0.15162499018890882
Iteration: 70000, Wei

In [5]:
# Compute accuracy on the training data
accuracy = evaluate_model_performance(X_test, y_test, w, b)
print("Test Accuracy:", accuracy)

Confusion Matrix:
 [[426  17]
 [ 15  42]]
Accuracy: 0.936
Precision: 0.711864406779661
Recall: 0.7368421052631579
F1 Score: 0.7241379310344828
Test Accuracy: 0.936
