In [1]:
import numpy as np

In [28]:
class LogisticRegression:
    """
    Description:
        Logistic Regression
    Args:
        n_input_features (int): # of features in the dataset
    Attirbutes:
        weights (np.ndarray)
        bias (float)
        fit (bool): Whether the model has been fit or not to training data. Default: False
    
    """
    
    def __init__(self, n_input_features: int):
        """
        Description:
            Initialize weights (W) and bias (b)    
        """
        self.weights = np.random.randn(n_input_features,1)*0.01 # Do not start with zeros. Font: Andrew NG course
        self.bias = np.zeros((1,1))
        
        self.fit = False # indicates the training state of the classifier
        
    
    def linear_transform(self, X: np.ndarray) -> np.ndarray:
        """
        Description:
            Linear component
            Z = W X + b
        
        Args:
            X (np.ndarray): Input data
        
        Returns:
            np.ndarray: transformed data Z
        
        W -> weights
        b -> bias
        X -> Input data
        """
        return np.matmul(X, self.weights) + self.bias # np.matmul = matrix multiplication line vs colunm
    
    def sigmoid(self, Z: np.ndarray) -> np.ndarray:
        """
        Description:
            Sigmoid function
            sigma(z) = 1 / (1 + exp(-z))
        
        Args:
            Z (np.ndarray): Linear transformed data
        
        Returns:
            np.ndarray: Data evaluated in a sigmoid function
        """
        return 1. / (1. + np.exp(-Z))
        
    
    def cost_cross_entropy(self, A: np.ndarray, Y: np.ndarray) -> float:
        """
        Description:
            Cross-Entropy Cost Function
            L(Y,A) = (1/m) \sum_n [(-Y log(A)) - (1-Y)(log(1-A))]
            
        Args:
            Y (np.ndarray): true label of the data
            A (np.ndarray): label "probability" 
        """
        m = Y.shape[0]
        epsilon = 1e-6
        
        cost = (-1/m) * np.sum( Y*np.log(A)  + \
                (1 - Y) * np.log(1-A+epsilon))
        
        # cost = np.squeeze(cost) make sure cost is in the correct shape (turn [[1]] into 1)
        return np.squeeze(cost)
    
    def gradient_descent(self, X: np.ndarray, A: np.ndarray, Y: np.ndarray,
                        learning_rate = 0.01) -> None:
        """
        Description:
            Compute the Gradient Descent
            dZ = (A - Y)
            dW = dZ . X 
            dB = dZ
                and update
            W = W - dW * learning_rate
            b = b - dB * learning_rate
        """
        m = A.shape[0]
        oneover_m = 1./m
        
        dZ = (A - Y)
        dW = np.mean(dZ * X, axis=0, keepdims=True).T
        dB = np.mean(dZ, axis=0, keepdims=True).T
        
#         dW = (oneover_m * np.sum( dZ * X)).T
#         dB = (oneover_m * np.sum( dZ )).T
        
        # Update
        self.weights -= dW * learning_rate
        self.bias -= dB * learning_rate
        
        return
        
        
    def train(self, X: np.ndarray, Y: np.ndarray,
             epochs: int = 100, learning_rate: float = 0.01, batch_size: int = 10,
             verbose: bool = False) -> np.ndarray:
        
        """
        Description:
            Fit the logistic regression model to training data
            Use minibatch GD.
            
        Args:
            X (np.ndarray): Training dataset
            Y (np.ndarray): Training targets
            epochs (int, optional; default = 100): Number of iterations
            learning_rate (float, optional; default = 0.01): Learning rate step size
            batch_size (int, optional; default = 10): Size of batch for GD
            verbose (bool, optional; default = False): __description__
        
        Raises:
            Attribute: Raises error if the model is already fitted
            ValueError: Raises error if the number of features dosen't match the instantiated feature count.
            
        Returns:
            np.ndarray: The cost history
        
        """
        # Raise flags
        if self.fit:
            raise AttributeError("Error: Model already fitted")
        self.fit = True
        
        if not X.shape[-1] == self.weights.shape[0]:
            raise ValueError("Shape of X is different from Weights")
        
        if Y.ndim == 1:
            Y = np.expand_dims(Y, axis=1)
        
        # Fit the model
        cost_hist = []
        accuracies = []
        weight_hist = []
        bias_hist = []
        
        for _ in range(epochs):
            weight_hist.append(self.weights[:,0].copy())
            bias_hist.append(self.bias.copy())
            
            
            if batch_size:
                batch_indices = np.random.choice(
                    X.shape[0], size = batch_size, replace = False
                )
                X_batch, Y_batch = X[batch_indices], Y[batch_indices]
            else:
                X_batch, Y_batch = X, Y
                
            
            # Linear Transformation
            Z = self.linear_transform(X_batch)
            
            # Sigmoid activation
            A = self.sigmoid(Z)
            
            # Cost function (Cross- Entropy)
            cost = self.cost_cross_entropy(A, Y_batch)
            
            # Perform GD
            self.gradient_descent(X_batch, A, Y_batch, learning_rate = learning_rate)
            
            if verbose:
                print(f'Epoch: {_}, Cost: {cost: 0.3f}                  ', end='\r')
                
            cost_hist.append(cost)
            accuracies.append(self.accuracy(self.predict(X), Y[:,0]))
        
        if verbose:
            print(f'Final cost: {cost:0.2f}                  ')
            
        self.fit = True
        
        return np.array(cost_hist), np.array(accuracies), np.array(weight_hist).T, np.array(bias_hist).T[0,0]
    
    def predict(self, X: np.ndarray) -> np.ndarray:
        """
        Description:
            Predict the labels
            
        Args:
            X (np.ndarray): Data for predictions
            
        Returns:
            np.ndarray: Prediction for each sample
        """
        if not self.fit:
            raise AttributeError("Error: This classifier is not trained")
        
        Z = self.linear_transform(X)
        A = self.sigmoid(Z)
        
        return A.T[0]
    
    def accuracy(self, predictions: np.ndarray, labels: np.ndarray) -> float:
        """
        Description:
            Accuracy of prediction
            
        Args:
            predictions (np.ndarray): predict (function) output
            labels (np.ndarray): True labels
            
        Returns:
            float: prediction accuracy
        """
        
        overlap = (predictions >= 0.5) == labels
        accuracy = overlap.sum() / predictions.shape[0] * 100
        
        return accuracy

DataSet = UCI Heart Disease Dataset

In [23]:
import pandas as pd

df = pd.read_csv("Dataset/heart.csv")

# Target are 0=yes and 1=no, let's change
df.target = df.target.replace({0: 1, 1: 0})
print(df.head())
print(df.tail())

   age  sex  cp  trestbps  chol  fbs  restecg  thalach  exang  oldpeak  slope  \
0   63    1   3       145   233    1        0      150      0      2.3      0   
1   37    1   2       130   250    0        1      187      0      3.5      0   
2   41    0   1       130   204    0        0      172      0      1.4      2   
3   56    1   1       120   236    0        1      178      0      0.8      2   
4   57    0   0       120   354    0        1      163      1      0.6      2   

   ca  thal  target  
0   0     1       0  
1   0     2       0  
2   0     2       0  
3   0     2       0  
4   0     2       0  
     age  sex  cp  trestbps  chol  fbs  restecg  thalach  exang  oldpeak  \
298   57    0   0       140   241    0        1      123      1      0.2   
299   45    1   3       110   264    0        1      132      0      1.2   
300   68    1   0       144   193    1        1      141      0      3.4   
301   57    1   0       130   131    0        1      115      1      1.2   
3

Transform the data to feed the model

In [24]:
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

targets = df.pop("target")

x_train, x_test, y_train, y_test = train_test_split(
    df, targets, test_size = 0.25, random_state = 42
)

features_to_standardize = ["age", "trestbps", "chol", "thalach", "oldpeak"]

column_transformer = ColumnTransformer(
    [("scaler", StandardScaler(), features_to_standardize)], remainder="passthrough"
)

x_train = column_transformer.fit_transform(x_train)
x_test = column_transformer.fit_transform(x_test)

Training and testing the model

In [29]:
model = LogisticRegression(n_input_features=x_train.shape[-1])

costs, accuracies, weights, bias = model.train(x_train, y_train,
                                              epochs = 5000,
                                              learning_rate=0.01,
                                              batch_size=None,
                                              verbose=True)

predictions = model.predict(x_test)

accuracy = model.accuracy(predictions, y_test)

print(f"Model test accuracy: {accuracy:0.2f}%")

Epoch: 0, Cost:  0.699                  Epoch: 1, Cost:  0.697                  Epoch: 2, Cost:  0.694                  Epoch: 3, Cost:  0.692                  Epoch: 4, Cost:  0.689                  Epoch: 5, Cost:  0.687                  Epoch: 6, Cost:  0.685                  Epoch: 7, Cost:  0.682                  Epoch: 8, Cost:  0.680                  Epoch: 9, Cost:  0.678                  Epoch: 10, Cost:  0.676                  Epoch: 11, Cost:  0.674                  Epoch: 12, Cost:  0.672                  Epoch: 13, Cost:  0.670                  Epoch: 14, Cost:  0.667                  Epoch: 15, Cost:  0.665                  Epoch: 16, Cost:  0.663                  Epoch: 17, Cost:  0.661                  Epoch: 18, Cost:  0.659                  Epoch: 19, Cost:  0.657                  Epoch: 20, Cost:  0.656                  Epoch: 21, Cost:  0.654                  Epoch: 22, Cost:  0.652                  Epoch: 23, Cost:  0.650                  Ep

Epoch: 617, Cost:  0.414                  Epoch: 618, Cost:  0.414                  Epoch: 619, Cost:  0.414                  Epoch: 620, Cost:  0.414                  Epoch: 621, Cost:  0.414                  Epoch: 622, Cost:  0.414                  Epoch: 623, Cost:  0.414                  Epoch: 624, Cost:  0.414                  Epoch: 625, Cost:  0.413                  Epoch: 626, Cost:  0.413                  Epoch: 627, Cost:  0.413                  Epoch: 628, Cost:  0.413                  Epoch: 629, Cost:  0.413                  Epoch: 630, Cost:  0.413                  Epoch: 631, Cost:  0.413                  Epoch: 632, Cost:  0.413                  Epoch: 633, Cost:  0.413                  Epoch: 634, Cost:  0.413                  Epoch: 635, Cost:  0.413                  Epoch: 636, Cost:  0.413                  Epoch: 637, Cost:  0.413                  Epoch: 638, Cost:  0.412                  Epoch: 639, Cost:  0.412                  Epoch: 640,

Epoch: 1099, Cost:  0.390                  Epoch: 1100, Cost:  0.390                  Epoch: 1101, Cost:  0.390                  Epoch: 1102, Cost:  0.390                  Epoch: 1103, Cost:  0.390                  Epoch: 1104, Cost:  0.390                  Epoch: 1105, Cost:  0.390                  Epoch: 1106, Cost:  0.390                  Epoch: 1107, Cost:  0.390                  Epoch: 1108, Cost:  0.390                  Epoch: 1109, Cost:  0.390                  Epoch: 1110, Cost:  0.390                  Epoch: 1111, Cost:  0.390                  Epoch: 1112, Cost:  0.390                  Epoch: 1113, Cost:  0.390                  Epoch: 1114, Cost:  0.390                  Epoch: 1115, Cost:  0.390                  Epoch: 1116, Cost:  0.390                  Epoch: 1117, Cost:  0.389                  Epoch: 1118, Cost:  0.389                  Epoch: 1119, Cost:  0.389                  Epoch: 1120, Cost:  0.389                  Epoch: 1121, Cost:  0.389       

Epoch: 1311, Cost:  0.384                  Epoch: 1312, Cost:  0.384                  Epoch: 1313, Cost:  0.384                  Epoch: 1314, Cost:  0.384                  Epoch: 1315, Cost:  0.384                  Epoch: 1316, Cost:  0.384                  Epoch: 1317, Cost:  0.384                  Epoch: 1318, Cost:  0.384                  Epoch: 1319, Cost:  0.384                  Epoch: 1320, Cost:  0.384                  Epoch: 1321, Cost:  0.384                  Epoch: 1322, Cost:  0.384                  Epoch: 1323, Cost:  0.384                  Epoch: 1324, Cost:  0.384                  Epoch: 1325, Cost:  0.384                  Epoch: 1326, Cost:  0.384                  Epoch: 1327, Cost:  0.384                  Epoch: 1328, Cost:  0.384                  Epoch: 1329, Cost:  0.384                  Epoch: 1330, Cost:  0.384                  Epoch: 1331, Cost:  0.384                  Epoch: 1332, Cost:  0.384                  Epoch: 1333, Cost:  0.384       

Epoch: 1672, Cost:  0.377                  Epoch: 1673, Cost:  0.377                  Epoch: 1674, Cost:  0.377                  Epoch: 1675, Cost:  0.377                  Epoch: 1676, Cost:  0.377                  Epoch: 1677, Cost:  0.377                  Epoch: 1678, Cost:  0.377                  Epoch: 1679, Cost:  0.377                  Epoch: 1680, Cost:  0.377                  Epoch: 1681, Cost:  0.377                  Epoch: 1682, Cost:  0.377                  Epoch: 1683, Cost:  0.377                  Epoch: 1684, Cost:  0.377                  Epoch: 1685, Cost:  0.377                  Epoch: 1686, Cost:  0.377                  Epoch: 1687, Cost:  0.377                  Epoch: 1688, Cost:  0.377                  Epoch: 1689, Cost:  0.377                  Epoch: 1690, Cost:  0.377                  Epoch: 1691, Cost:  0.377                  Epoch: 1692, Cost:  0.377                  Epoch: 1693, Cost:  0.377                  Epoch: 1694, Cost:  0.377       

Final cost: 0.36                           
Model test accuracy: 85.53%
