In [71]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from tqdm import tqdm
from sklearn import datasets

from utils import sigmoid

In [72]:
bc = datasets.load_breast_cancer()
X, y = bc.data, bc.target

In [73]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [74]:
class LogisticRegression:
	def __init__(self, n_features) -> None:
		self.w = np.zeros(n_features)
		self.b = 0

	def fit(self, X: np.ndarray, y: np.ndarray, epochs=1000, lr=0.001):
		
		for epoch in tqdm(range(epochs)):
			dw, db = self.gradient_descend(X, y)

			self.w -= lr * dw
			self.b -= lr * db

	def gradient_descend(self, X: np.ndarray, y: np.ndarray):
		n_samples, n_features = X.shape

		linear_model = np.dot(X, self.w) + self.b
		y_predicted = sigmoid(linear_model)

		dw = (1/n_samples) * np.dot(X.T, (y_predicted-y))
		db = (1/n_samples) * np.sum(y_predicted-y)

		return dw, db


	def predict(self, X: np.ndarray):
		linear_model = np.dot(X, self.w) + self.b
		y_predicted = sigmoid(linear_model)

		return y_predicted >= 0.5
	
	def score(self, X, y):
		res = self.predict(X) == y
		return np.sum(res) / len(res)


In [75]:
model = LogisticRegression(X.shape[1])

In [76]:
model.fit(X_train, y_train)

100%|██████████| 1000/1000 [00:00<00:00, 13779.78it/s]


In [77]:
model.score(X_test, y_test)	

0.9473684210526315