In [17]:
import numpy as np

class LogisticRegression:
  def __init__(self, learning_rate=0.001, epochs=1000):
    self.learning_rate = learning_rate
    self.epochs = epochs
    self.weights = None
    self.bias = None

  def calculate_z(self, X):
    return np.dot(X, self.weights) + self.bias

  @staticmethod
  def sigmoid(z):
    return 1 / (1 + np.exp(-z))

  def fit(self, X, y):
    n_samples, n_features = X.shape
    self.weights = np.zeros(n_features)
    self.bias = 0

    for _ in range(self.epochs):
      y_pred = self.sigmoid(self.calculate_z(X))

      dw = (1/n_samples) * np.dot(X.T, (y_pred - y))
      db = (1/n_samples) * np.sum(y_pred - y)

      self.weights = self.weights - self.learning_rate * dw
      self.bias = self.bias - self.learning_rate * db

  def predict(self, X):
    y_pred = self.sigmoid(self.calculate_z(X))
    return y_pred


In [18]:
import seaborn as sns
import pandas as pd

In [19]:
df = sns.load_dataset('iris')
df.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [20]:
df['species'].unique()

array(['setosa', 'versicolor', 'virginica'], dtype=object)

In [21]:
df.isnull().sum()

Unnamed: 0,0
sepal_length,0
sepal_width,0
petal_length,0
petal_width,0
species,0


In [22]:
df = df[df['species'] != 'virginica']
df['species'].unique()

array(['setosa', 'versicolor'], dtype=object)

In [23]:
df['species'] = df['species'].map({'setosa':0, 'versicolor':1})
df.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


In [24]:
X = df.iloc[:, :-1]
y = df.iloc[:, -1]

In [25]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [26]:
model = LogisticRegression()
model.fit(X_train, y_train)
y_pred_probs = model.predict(X_test)
y_pred_probs

array([0.80954935, 0.74165868, 0.77475716, 0.41094443, 0.4140975 ,
       0.38948816, 0.33440625, 0.71297744, 0.36873568, 0.37366037,
       0.38372206, 0.42081897, 0.76470926, 0.32965434, 0.75557692,
       0.36875265, 0.77329755, 0.78887893, 0.40231504, 0.39502231,
       0.75575553, 0.71419096, 0.4091106 , 0.38852846, 0.71552414,
       0.33383322, 0.36903698, 0.72685498, 0.40531909, 0.79952489])

In [27]:
y_pred = [0 if y<=0.5 else 1 for y in y_pred_probs]
accuracy = np.sum(y_pred==y_test)/len(y_test)
accuracy

1.0