# Assignment: Implement Logistic Regression and Naive Bayes

In [1]:
import numpy as np

## Implement Logistic Regression

In [2]:
class LogisticRegression:
    def __init__(self, learning_rate=0.01, epochs=1000):
        self.learning_rate = learning_rate
        self.epochs = epochs
        self.weights = None
        self.bias = None
    
    def sigmoid(self, z):
        return 1 / (1 + np.exp(-z))
    
    def fit(self, X, y):
        m, n = X.shape
        self.weights = np.zeros(n)
        self.bias = 0
        
        for _ in range(self.epochs):
            # Linear model
            z = np.dot(X, self.weights) + self.bias
            y_pred = self.sigmoid(z)
            
            # Compute the gradients
            dw = (1/m) * np.dot(X.T, (y_pred - y))
            db = (1/m) * np.sum(y_pred - y)
            
            # Update the parameters
            self.weights -= self.learning_rate * dw
            self.bias -= self.learning_rate * db
    
    def predict(self, X):
        z = np.dot(X, self.weights) + self.bias
        y_pred = self.sigmoid(z)
        y_pred_class = [1 if i > 0.5 else 0 for i in y_pred]
        return np.array(y_pred_class)

In [38]:
from sklearn.metrics import accuracy_score
X = np.array([[1, 2], [2, 3], [3, 4], [4, 5], [1, 3], [2, 4], [3, 5], [4, 6]])
y = np.array([0, 0, 0, 0, 1, 1, 1, 1])

# Logistic Regression Model Test
model_log_reg = LogisticRegression(learning_rate=0.1, epochs=1000)
model_log_reg.fit(X, y)
y_pred_log_reg = model_log_reg.predict(X)

# Output the accuracy and predictions
print("Logistic Regression Test Case Accuracy:", accuracy_score(y, y_pred_log_reg))
print("Logistic Regression Predictions:", y_pred_log_reg)

Logistic Regression Test Case Accuracy: 1.0
Logistic Regression Predictions: [0 0 0 0 1 1 1 1]


## Implement Naive Bayes

In [39]:
class NaiveBayes:
    def fit(self, X, y):
        n_samples, n_features = X.shape
        self.classes = np.unique(y)
        n_classes = len(self.classes)
        
        # Initialize mean, variance, and prior
        self.mean = np.zeros((n_classes, n_features), dtype=np.float64)
        self.variance = np.zeros((n_classes, n_features), dtype=np.float64)
        self.priors = np.zeros(n_classes, dtype=np.float64)
        
        for idx, c in enumerate(self.classes):
            X_c = X[y == c]
            self.mean[idx, :] = X_c.mean(axis=0)
            self.variance[idx, :] = X_c.var(axis=0)
            self.priors[idx] = X_c.shape[0] / float(n_samples)
    
    def _calculate_likelihood(self, class_idx, x):
        mean = self.mean[class_idx]
        var = self.variance[class_idx]
        numerator = np.exp(-(x - mean)**2 / (2 * var))
        denominator = np.sqrt(2 * np.pi * var)
        return numerator / denominator
    
    def _calculate_posterior(self, x):
        posteriors = []
        for idx, _ in enumerate(self.classes):
            prior = np.log(self.priors[idx])
            likelihood = np.sum(np.log(self._calculate_likelihood(idx, x)))
            posterior = prior + likelihood
            posteriors.append(posterior)
        return self.classes[np.argmax(posteriors)]
    
    def predict(self, X):
        y_pred = [self._calculate_posterior(x) for x in X]
        return np.array(y_pred)

In [40]:
# Test case
X = np.array([[1, 1], [2, 2], [3, 3], [4, 4], [1, 4], [2, 5], [3, 6], [4, 7]])
y = np.array([0, 0, 0, 0, 1, 1, 1, 1])

# Naive Bayes Model Test
model_naive_bayes = NaiveBayes()
model_naive_bayes.fit(X, y)
y_pred_naive_bayes = model_naive_bayes.predict(X)

# Output the accuracy and predictions
print("Naive Bayes Test Case Accuracy:", accuracy_score(y, y_pred_naive_bayes))
print("Naive Bayes Predictions:", y_pred_naive_bayes)

Naive Bayes Test Case Accuracy: 0.875
Naive Bayes Predictions: [0 0 0 0 0 1 1 1]


## Performance comparison on Breast Cancer dataset from sklearn.datasets

In [41]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from sklearn.datasets import load_breast_cancer

# Load dataset
data = load_breast_cancer()
X, y = data.data, data.target

# Split dataset into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Logistic Regression
log_reg = LogisticRegression(learning_rate=0.01, epochs=1000)
log_reg.fit(X_train, y_train)
y_pred_lr = log_reg.predict(X_test)
print("Logistic Regression Accuracy:", accuracy_score(y_test, y_pred_lr))

# Naive Bayes
nb = NaiveBayes()
nb.fit(X_train, y_train)
y_pred_nb = nb.predict(X_test)
print("Naive Bayes Accuracy:", accuracy_score(y_test, y_pred_nb))


  return 1 / (1 + np.exp(-z))


Logistic Regression Accuracy: 0.9473684210526315
Naive Bayes Accuracy: 0.9649122807017544
