In [1]:
import pandas as pd
import numpy as np
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt

In [2]:
# linear regression model 
from abc import ABC, abstractmethod

class Model(ABC) : 

    @abstractmethod
    def train(self, X , Y) : pass

    @abstractmethod
    def infer(self, X) : pass


In [3]:
class LinearRegression(Model) : 

    def __init__(self):
        super().__init__() 
        # self.dim = None
        pass

    def train(self,X , Y,epochs = 3,  learning_rate = 0.1 ):
        N = X.shape[0]
        dim = X.shape[1]
        self.theta = np.random.rand(dim,1)  * 0.01
        loop = tqdm(range(epochs))
        loss_tracker = []
        for e in loop : 
            for i in range(N) : 
                x_i = X[i].reshape(1, -1)
                y_i = Y[i]
                y_cap = self.infer(x_i)
                error = (y_cap - y_i)      
                loss_tracker.append(error)
                update = (error * x_i.T)
                self.theta = self.theta - (learning_rate * update ) 
                loop.set_postfix(loss = error)
        return loss_tracker

    def infer(self, X):
        y_pred = X @ self.theta 
        return y_pred

In [4]:
# X = np.random.rand(100, 10)
# Y = np.ones(100,)
# # theta = np.random.rand(10,1)
# # pred =   theta @ X[0] 
# # theta - (pred * arr[0])  

np.random.seed(42)
N = 100 #samples
dim = 2 #features
X = 2 * np.random.rand(N, dim)
true_theta = np.array([[2], [3]]) 
Y = X @ true_theta + np.random.randn(N, 1) * 0.1

print(f"X shape: {X.shape}")
print(f"Y shape: {X.shape}")

X shape: (100, 2)
Y shape: (100, 2)


In [5]:
model = LinearRegression()
loss_ = model.train(X,Y, learning_rate= 0.01, epochs = 50)

  0%|          | 0/50 [00:00<?, ?it/s]

In [6]:
model.theta

array([[2.00669581],
       [3.00539384]])

In [None]:
class LogisticRegression(Model) : 
    


    def __init__(self):
        super().__init__() 
        pass
    

    def train(self,X , Y,epochs = 3,  learning_rate = 0.1 ):
        N = X.shape[0]
        dim = X.shape[1]
        self.theta = np.random.rand(dim,1)  * 0.01
        loop = tqdm(range(epochs))
        loss_tracker = []
        for e in loop : 
            for i in range(N) : 
                x_i = X[i].reshape(1, -1)
                y_i = Y[i]
                y_cap = self.infer(x_i)
                error = (y_cap - y_i) # derivate of cross entropy
                loss = - (y_i * np.log(y_cap) + (1 - y_i) * np.log(1 - y_cap))
                loss_tracker.append(loss)
                update = (error * x_i.T)
                self.theta = self.theta - (learning_rate * update ) 
                loop.set_postfix(loss = loss)
        return loss_tracker

    def infer(self, X):
        z =  X @ self.theta
        
        # limit the z for the exp function
        z = np.clip(z, -20, 20)

        y_pred = 1 / (1 + np.exp(-z))
        return y_pred

In [12]:
import numpy as np
import matplotlib.pyplot as plt

np.random.seed(42)
N = 200      # More samples helps visualization
dim = 2      # Features

X = np.random.randn(N, dim)

true_theta = np.array([[12], [-3]]) 
z = X @ true_theta + np.random.randn(N, 1) * 0.5
Y = (z > 0).astype(int)

print(f"X shape: {X.shape}")
print(f"Y shape: {Y.shape}")
print(f"Class balance: {np.sum(Y)} ones vs {N - np.sum(Y)} zeros")

X shape: (200, 2)
Y shape: (200, 1)
Class balance: 100 ones vs 100 zeros


In [13]:
model = LogisticRegression()
loss_ = model.train(X,Y, learning_rate= 0.01, epochs = 25)

  0%|          | 0/25 [00:00<?, ?it/s]

In [14]:
model.theta

array([[ 3.92910019],
       [-1.13441145]])