<a href="https://colab.research.google.com/github/tayadeabhilash/ML-Notebooks/blob/main/my_logistic_regression_starter.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Starter code for implementing logistic regressio as a Scikit-Learn Estimator
# Gradient descent is used to find the minimal loss
# Reference: https://scikit-learn.org/stable/developers/develop.html
import numpy as np
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.utils.validation import check_X_y, check_array, check_is_fitted
from sklearn.utils.multiclass import unique_labels
from sklearn.metrics import euclidean_distances
class MyLogisticRegression(BaseEstimator, ClassifierMixin):
    def __init__(self, max_iter=100,  tol=0.0001, random_state=None):
        self.max_iter_ = max_iter
        self.tol_=tol
        self.random_state_ = random_state
        self.w_ = []
    def fit(self, X, y):
        # Check that X and y have correct shape
        X, y = check_X_y(X, y)
        # Store the classes seen during fit
        self.classes_ = unique_labels(y)
        # Make sure the labels are either 0 or 1
        self._checkBinaryLabels()
        self.X_ = X
        self.y_ = y
        # Return the classifier
        self.w_=self._logisticRegression(X, y)
        return self

    def predict(self, X):
        # Check if fit has been called
        check_is_fitted(self)
        # Input validation
        X = check_array(X)
        y = []

        probas = MyLogisticRegression._calc_probas(X, self.w_)
        return [1 if p >= 0.5 else 0 for p in probas]

    def _logisticRegression(self, X, y):
        rand = np.random.RandomState(self.random_state_)
        w = rand.randn(X.shape[1]+1)
        #
        # gradient descent logistric regression learning implementation
        #
        #
        return w

    @staticmethod
    def _calc_probas(X, w):
        D = np.array([(MyLogisticRegression._dotWithBias(x,w)) for x in X])
        return np.array([MyLogisticRegression._sigmoid(d)  for d in D])

    @staticmethod
    def _sigmoid(v):
        return 1/(1+np.exp(-v))

    @staticmethod
    def _dotWithBias(x, w):
      return np.dot(w, np.insert(x,0,1).transpose())

    def _checkBinaryLabels(self):
      self.classes_.sort()
      print(self.classes_)
      if (not ([0,1] == self.classes_.tolist())):
        raise Exception("Binary labels 0 and 1 expected!")


Driver code to use MyLogisticRegression and test its performance.

In [None]:
from google.colab import drive
import pandas as pd

SETOSA_URL_ = "/content/drive/My Drive/CMPE257-Shared/iris-setosa-labels.csv"
drive.mount('/content/drive',force_remount=True)
data = pd.read_csv(SETOSA_URL_, header = None)
print(data.shape)

from sklearn.model_selection import train_test_split

setosa_data = data.drop(data.columns[4], axis=1)
print(setosa_data.shape)
print(setosa_data)

setosa_labels = data[data.columns[4]]
print(setosa_labels.shape)
print(setosa_labels)

train_data, test_data, train_labels, test_labels = train_test_split(setosa_data, setosa_labels, random_state=1)

print(train_data.shape)
print(train_labels.shape)

from sklearn.metrics import accuracy_score

model = MyLogisticRegression()
model.fit(train_data, train_labels)


train_predicts = model.predict(train_data)
train_score = accuracy_score(train_predicts, train_labels)
print("train accuracy: ", train_score)

test_predicts = model.predict(test_data)
test_score = accuracy_score(test_predicts, test_labels)
print("test accuracy: ", test_score)


Mounted at /content/drive
(100, 5)
(100, 4)
      0    1    2    3
0   5.1  3.5  1.4  0.2
1   4.9  3.0  1.4  0.2
2   4.7  3.2  1.3  0.2
3   4.6  3.1  1.5  0.2
4   5.0  3.6  1.4  0.2
..  ...  ...  ...  ...
95  5.7  3.0  4.2  1.2
96  5.7  2.9  4.2  1.3
97  6.2  2.9  4.3  1.3
98  5.1  2.5  3.0  1.1
99  5.7  2.8  4.1  1.3

[100 rows x 4 columns]
(100,)
0     1
1     1
2     1
3     1
4     1
     ..
95    0
96    0
97    0
98    0
99    0
Name: 4, Length: 100, dtype: int64
(75, 4)
(75,)
[0 1]
w= [ 1.01231016  0.70728354 -0.82468805  1.72872652  0.80637294]
train accuracy:  0.5333333333333333
w= [ 1.01231016  0.70728354 -0.82468805  1.72872652  0.80637294]
test accuracy:  0.4
