# Implementing Logistic Regression from Scratch

In [1]:
import pandas as pd
import numpy as np
import os

from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

import warnings
warnings.filterwarnings('ignore')

In [2]:
class LogisticRegression:
    def __init__(self, learning_rate=0.01, n_iter=1000):
        # Initialize class attributes
        self.learning_rate = learning_rate
        self.n_iter = n_iter
        self.W = None
        self.b = None

    def _sigmoid(self, x):
        # Sigmoid function to map to values between 0 and 1
        return 1 / (1 + np.exp(-x))

    def fit(self, X, y):
        # Getting the number of rows and columns of X
        n_rows, n_cols = X.shape

        # Initialize weights with random values between 0 and 1
        self.W = np.random.rand(n_cols)

        # Initialize the bias with a random value between 0 and 1
        self.b = np.random.rand()

        # Iterating a number n_iter of times
        for i in range(self.n_iter):
            # Calculate the output y
            y = np.dot(X, self.W) + self.b

            # Apply the sigmoid function to obtain the probabilities
            probs = self._sigmoid(y)

            # Calculate the gradient of weights and bias
            dW = np.dot(X.T, (probs - y)) / n_rows
            db = np.sum(probs - y) / n_rows

            # Update the weights using gradient descent
            self.W -= self.learning_rate * dW

            # Update the bias using gradient descent
            self.b -= self.learning_rate * db

    def predict(self, X):
        # Calculate the output y
        y = np.dot(X, self.W) + self.b

        # Apply the sigmoid function to obtain the probabilities
        probs = self._sigmoid(y)

        # Generate binary predictions based on the probabilities
        y_pred = np.where(probs >= 0.5, 1, 0)

        # Return y_pred
        return y_pred


In [3]:
# Importing data
maintenance_data = pd.read_csv('predictive_maintenance.csv')

In [4]:
# Dropping ID columns
maintenance_data.drop(columns = ['UDI', 'Product ID'], inplace = True)

In [5]:
# Mapping type column to numbers
maintenance_data['Type'] = np.where(maintenance_data['Type'] == 'L', 
                                    0, 
                                   np.where(maintenance_data['Type'] == 'M',
                                           1,
                                           2))

In [6]:
# Instancing LogisticRegression class
lr = LogisticRegression()

In [7]:
# Creating input and output variable
X = maintenance_data.drop(columns = ['Target', 'Failure Type'])
y = maintenance_data['Target']

In [8]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [9]:
# Fitting the model
lr.fit(X_train, y_train)

In [10]:
# Making predictions
y_pred = lr.predict(X_test)

In [11]:
# Accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.9695
