### Logistic Regression from Scratch 

In [16]:
import numpy as np

In [19]:
class LogisticRegression:
    def __init__(self, lr=0.001, n_iters=100):
        self.lr = lr  # Learning rate
        self.n_iters = n_iters  # Number of iterations
        self.weights = None  # Weights (coefficients)
        self.bias = None  # Bias term

    def fit(self, X, y):
        
        # Initialize weights and bias
        n_samples, n_features = X.shape
        self.weights = np.zeros(n_features)
        self.bias = 0

        # Gradient descent
        for _ in range(self.n_iters):
            linear_model = np.dot(X, self.weights) + self.bias
            y_predicted = self._sigmoid(linear_model)

            # Compute gradients
            dw = (1 / n_samples) * np.dot(X.T, (y_predicted - y))
            db = (1 / n_samples) * np.sum(y_predicted - y)

            # Update weights and bias
            self.weights -= self.lr * dw
            self.bias -= self.lr * db

    def predict(self, X):
        linear_model = np.dot(X, self.weights) + self.bias
        y_predicted = self._sigmoid(linear_model)
        y_predicted_cls = [1 if i > 0.5 else 0 for i in y_predicted]
        return y_predicted_cls

    def _sigmoid(self, x):
        return 1 / (1 + np.exp(-x))


# Practical Implementation of Logistic Regrssion from scratch using Data set

In [24]:
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.model_selection import train_test_split

In [25]:
tips_df = sns.load_dataset('tips')
tips_df.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


In [26]:
tips_df['tip_binary'] = (tips_df['tip'] >= 3).astype(int)

### Feature and target variable

In [27]:
X = tips_df[['total_bill']].values
y = tips_df['tip_binary'].values 

In [28]:
X

array([[16.99],
       [10.34],
       [21.01],
       [23.68],
       [24.59],
       [25.29],
       [ 8.77],
       [26.88],
       [15.04],
       [14.78],
       [10.27],
       [35.26],
       [15.42],
       [18.43],
       [14.83],
       [21.58],
       [10.33],
       [16.29],
       [16.97],
       [20.65],
       [17.92],
       [20.29],
       [15.77],
       [39.42],
       [19.82],
       [17.81],
       [13.37],
       [12.69],
       [21.7 ],
       [19.65],
       [ 9.55],
       [18.35],
       [15.06],
       [20.69],
       [17.78],
       [24.06],
       [16.31],
       [16.93],
       [18.69],
       [31.27],
       [16.04],
       [17.46],
       [13.94],
       [ 9.68],
       [30.4 ],
       [18.29],
       [22.23],
       [32.4 ],
       [28.55],
       [18.04],
       [12.54],
       [10.29],
       [34.81],
       [ 9.94],
       [25.56],
       [19.49],
       [38.01],
       [26.41],
       [11.24],
       [48.27],
       [20.29],
       [13.81],
       [

In [29]:
y

array([0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0,
       0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0,
       1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1,
       0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1,
       1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1,
       1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1,
       0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1,
       0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1,
       0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0,
       0, 1])

### Split the data into training and testing sets

In [30]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [31]:
X_test

array([[19.82],
       [ 8.77],
       [24.55],
       [25.89],
       [13.  ],
       [17.89],
       [28.44],
       [12.48],
       [14.78],
       [15.38],
       [18.29],
       [10.77],
       [10.65],
       [14.48],
       [10.07],
       [18.64],
       [18.71],
       [21.58],
       [16.45],
       [41.19],
       [20.69],
       [20.65],
       [14.31],
       [ 9.55],
       [20.9 ],
       [11.69],
       [10.27],
       [25.28],
       [16.49],
       [48.17],
       [38.07],
       [ 7.74],
       [17.81],
       [20.29],
       [16.97],
       [24.08],
       [12.03],
       [43.11],
       [13.16],
       [22.67],
       [10.51],
       [14.52],
       [23.95],
       [10.33],
       [10.34],
       [ 3.07],
       [10.59],
       [18.69],
       [ 7.56]])

In [32]:
X_train

array([[13.28],
       [24.27],
       [27.28],
       [31.71],
       [15.98],
       [19.49],
       [13.03],
       [ 7.25],
       [17.82],
       [17.26],
       [15.69],
       [29.85],
       [17.31],
       [23.33],
       [16.27],
       [20.23],
       [20.92],
       [15.01],
       [ 7.25],
       [26.59],
       [24.52],
       [19.77],
       [28.15],
       [17.29],
       [19.65],
       [15.53],
       [20.45],
       [34.65],
       [25.29],
       [38.01],
       [29.8 ],
       [ 9.78],
       [25.71],
       [20.08],
       [11.17],
       [38.73],
       [16.21],
       [18.35],
       [15.42],
       [24.06],
       [21.7 ],
       [13.94],
       [29.03],
       [ 8.52],
       [16.32],
       [17.51],
       [10.29],
       [40.17],
       [17.47],
       [16.58],
       [17.92],
       [17.46],
       [18.24],
       [15.98],
       [22.76],
       [16.43],
       [13.37],
       [10.07],
       [13.27],
       [23.17],
       [15.48],
       [16.99],
       [

### Now Define the Logistic Regression Class

In [42]:
class LogisticRegression:
    def __init__(self, lr=0.01, n_iters=1000):
        self.lr = lr
        self.n_iters = n_iters
        self.weights = None
        self.bias = None

    def _sigmoid(self, x):
        return 1 / (1 + np.exp(-x))

    def fit(self, X, y):
        n_samples, n_features = X.shape
        self.weights = np.zeros(n_features)
        self.bias = 0

        for _ in range(self.n_iters):
            linear_model = np.dot(X, self.weights) + self.bias
            y_predicted = self._sigmoid(linear_model)

            dw = (1 / n_samples) * np.dot(X.T, (y_predicted - y))
            db = (1 / n_samples) * np.sum(y_predicted - y)

            self.weights -= self.lr * dw
            self.bias -= self.lr * db

    def predict(self, X):
        linear_model = np.dot(X, self.weights) + self.bias
        y_predicted = self._sigmoid(linear_model)
        y_predicted_cls = [1 if i > 0.5 else 0 for i in y_predicted]
        return np.array(y_predicted_cls)


### Train and Evaluate the Model

#### Initialize and train the logistic regression model

In [43]:
model = LogisticRegression(lr=0.01, n_iters=1000)
model.fit(X_train, y_train)

#### Make predictions

In [44]:
predictions = model.predict(X_test)

In [45]:
predictions

array([1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1,
       1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0,
       0, 0, 0, 1, 0])

#### Calculate accuracy

In [46]:
accuracy = np.mean(predictions == y_test)
accuracy

0.673469387755102