In [5]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
import numpy as np

In [3]:
# lets load the dataset - dataframe format
data = load_iris(as_frame=True)
df = data.frame
df.head()
# here 0 means setosa, 1 means versicolor and 2 means virginica

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


In [4]:
# split the data into train and test
X = df.drop(columns=['target'])
y = df['target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [6]:
# lets use the logistic regression model : from scratch
class LogisticRegression:
    def __init__(self, learning_rate=0.01, n_iterations=1000):
        self.learning_rate = learning_rate
        self.n_iterations = n_iterations
        self.weights = None
        self.bias = None

    def fit(self, X, y):
        n_samples, n_features = X.shape
        self.weights = np.zeros(n_features)
        self.bias = 0

        for _ in range(self.n_iterations):
            linear_model = np.dot(X, self.weights) + self.bias
            y_predicted = self._sigmoid(linear_model)

            dw = (1 / n_samples) * np.dot(X.T, (y_predicted - y))
            db = (1 / n_samples) * np.sum(y_predicted - y)

            self.weights -= self.learning_rate * dw
            self.bias -= self.learning_rate * db

    def predict(self, X):
        linear_model = np.dot(X, self.weights) + self.bias
        y_predicted = self._sigmoid(linear_model)
        y_predicted_cls = [1 if i > 0.5 else 0 for i in y_predicted]
        return np.array(y_predicted_cls)

    def _sigmoid(self, x):
        return 1 / (1 + np.exp(-x))

In [8]:
# use the model
model = LogisticRegression(learning_rate=0.1, n_iterations=1000)
model.fit(X_train.values, y_train.values)
predictions = model.predict(X_test.values)
print("Predictions:", predictions) 

# lets see the weights and bias
print("Weights:", model.weights)
print("Bias:", model.bias)

Predictions: [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]
Weights: [ 46.33164538 -18.48741776 131.28611458  56.89643471]
Bias: -0.7752707150257157


In [12]:
from sklearn.linear_model import LogisticRegression 

# use sklearn model
sklearn_model = LogisticRegression()
sklearn_model.fit(X_train, y_train)
sklearn_predictions = sklearn_model.predict(X_test)
print("Sklearn Predictions:", sklearn_predictions)
# accuracy
accuracy = np.sum(sklearn_predictions == y_test.values) / len(y_test)
print("Accuracy:", accuracy)


Sklearn Predictions: [1 0 2 1 1 0 1 2 1 1 2 0 0 0 0 1 2 1 1 2 0 2 0 2 2 2 2 2 0 0]
Accuracy: 1.0


In [13]:

# save the model
import pickle
with open('logistic_regression_model.pkl', 'wb') as f:
    pickle.dump(sklearn_model, f)

In [14]:
# lets see the weights and bias of sklearn model
print("Sklearn Weights:", sklearn_model.coef_)

Sklearn Weights: [[-0.39349364  0.96246717 -2.3751418  -0.99874814]
 [ 0.50845582 -0.25479812 -0.21300858 -0.77574559]
 [-0.11496218 -0.70766905  2.58815039  1.77449373]]


In [17]:
# lets use Gradient descent to find the best value of weights and bias
def gradient_descent(X, y, model, learning_rate=0.01, n_iterations=1000):
    m = X.shape[0]
    for i in range(n_iterations):
        model.fit(X, y)
        # Compute gradients
        gradients = model.coef_
        # Update weights
        model.coef_ -= learning_rate * gradients
    return model

# Using gradient descent to optimize sklearn model
optimized_model = gradient_descent(X_train.values, y_train.values, sklearn_model, learning_rate
=0.1, n_iterations=1000)
optimized_predictions = optimized_model.predict(X_test)
print("Optimized Sklearn Predictions:", optimized_predictions)

Optimized Sklearn Predictions: [1 0 2 1 1 0 1 2 1 1 1 0 0 0 0 1 2 1 1 2 0 1 0 2 2 2 2 2 0 0]




In [18]:
# weights after optimization
print("Optimized Sklearn Weights:", optimized_model.coef_)

Optimized Sklearn Weights: [[-0.35414428  0.86622045 -2.13762762 -0.89887332]
 [ 0.45761024 -0.22931831 -0.19170773 -0.69817103]
 [-0.10346596 -0.63690215  2.32933535  1.59704436]]
