In [77]:
import numpy as np
#Wine recognition dataset, 178 samples, 13 features
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report

In [78]:
dataset = load_wine()
X, y = dataset.data, dataset.target
mask = y <2
X.shape, y.shape

((178, 13), (178,))

In [79]:
X, y = X[mask], y[mask]
X.shape, y.shape

((130, 13), (130,))

In [80]:
strat = [0]*65 + [1]*65
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=strat) 

In [81]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [82]:
class LogisticRegression:
    def __init__(self, lr=1e-2, epoch=10000):
        """
        Initizes the logistic regression
        Args:
            lr: float, learning rate
            epoch: int, number of epoch
        """
        self.lr = lr
        self.epoch = epoch
        self.weights = None 
        self.bias = None
        self.loss=[] 

    def sigmoid(self, z):
        """
        Compute sigmoid function
        Args:
        z(array):input value of shape (n_samples,)
        Returns:
        array: sigmoid of input value of shape (n_samples,)
        """
        z = np.clip(z, -500, 500)
        return 1 / (1 + np.exp(-z))
    
    def compute_cost(self, X, y):
        """"
        Compute binary cross-entropy loss
        Args:
        X(array): input features of shape (n_samples, n_features)
        y(array): Target valus of shape (n_samples,)
        Returns:
        float: binary cross-entropy loss
        """
        num_samples = X.shape[0]
        z = np.dot(X, self.weights) + self.bias
        s = self.sigmoid(z)
        s = np.clip(s, 1e-15, 1-1e-15)
        cost = (-1/num_samples)*np.sum(y*np.log(s) + (1-y)*np.log(1-s))
        return cost
    
    def compute_gradient(self,X,y):
        """"
        Compute gradient of the cost function
        Args:
        X(array): input features of shape (n_samples, n_features)
        y(array): Target valus of shape (n_samples,)
        Returns:
        tuple: gradient of weights and bias
        """
        num_samples = X.shape[0]
        z = np.dot(X, self.weights) + self.bias
        s = self.sigmoid(z)
        dw = (1/num_samples)*np.dot(X.T, (s-y))
        db = (1/num_samples)*np.sum(s-y)
        return dw, db

    def train(self, X, y):
        """
        Train the logistic regression model
        Args:
        X(array): input features of shape (n_samples, n_features)
        y(array): Target valus of shape (n_samples,)

        """
        #Initialize parameters
        self.weights = np.zeros(X.shape[1])
        self.bias = 0
        #Gradient descent
        for i in range(self.epoch+1):
            #Compute gradient
            dw, db = self.compute_gradient(X, y)
            #Update parameters
            self.weights -= self.lr*dw
            self.bias -= self.lr*db

            #print loss at every 100th epoch
            if i%100 == 0:
                loss = self.compute_cost(X, y)
                self.loss.append(loss)
                print(f"Epoch {i}, Loss: {loss}")

    def predict(self, X,threshold=0.5):
        """
        Make predictions
        Args:
        X(array): input features of shape (n_samples, n_features)
        Returns:
        array: predicted valus of shape (n_samples,)
        """

        s= self.sigmoid(np.dot(X, self.weights) + self.bias)
        return s>=threshold # quy định luôn là lớn hơn 0.5 thì là 1, nhỏ hơn 0.5 thì là 0


In [83]:
model = LogisticRegression(epoch=10000)
model.train(X_train_scaled, y_train)

Epoch 0, Loss: 0.6848736739559116
Epoch 100, Loss: 0.3273874445643366
Epoch 200, Loss: 0.23379971729920307
Epoch 300, Loss: 0.18829159628285916
Epoch 400, Loss: 0.16009038086723426
Epoch 500, Loss: 0.14041326136178825
Epoch 600, Loss: 0.12570233552316268
Epoch 700, Loss: 0.11419524879928711
Epoch 800, Loss: 0.10489959409824146
Epoch 900, Loss: 0.09720546150841414
Epoch 1000, Loss: 0.09071372974730652
Epoch 1100, Loss: 0.08515062034776762
Epoch 1200, Loss: 0.08032130819294267
Epoch 1300, Loss: 0.07608298807238165
Epoch 1400, Loss: 0.07232838893819993
Epoch 1500, Loss: 0.06897524506689998
Epoch 1600, Loss: 0.06595933275704825
Epoch 1700, Loss: 0.06322972834635744
Epoch 1800, Loss: 0.06074549726481598
Epoch 1900, Loss: 0.058473331664593856
Epoch 2000, Loss: 0.05638583249852157
Epoch 2100, Loss: 0.054460238965335694
Epoch 2200, Loss: 0.052677474493330025
Epoch 2300, Loss: 0.0510214205479324
Epoch 2400, Loss: 0.049478356954312554
Epoch 2500, Loss: 0.04803652563656833
Epoch 2600, Loss: 0.046

In [85]:
y_pred = model.predict(X_test_scaled, threshold=0.5)
print(classification_report(y_test, y_pred)) #đánh giá mô hình

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        13
           1       1.00      1.00      1.00        13

    accuracy                           1.00        26
   macro avg       1.00      1.00      1.00        26
weighted avg       1.00      1.00      1.00        26

