In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Load the training dataset
def load_train_data():
    
    data = pd.read_csv("C:\\Users\\santhosh\\Downloads\\project\\santhosh\\Logistic-Regression\\dataset\\bank-note\\train.csv")
    
    # Assuming the last column is the target variable
    X = data.iloc[:, :-1].values
    y = data.iloc[:, -1].values

    return X, y

# Load the test dataset
def load_test_data():
    
    data = pd.read_csv("C:\\Users\\santhosh\\Downloads\\project\\santhosh\\Logistic-Regression\\dataset\\bank-note\\test.csv")

    # Assuming the last column is the target variable
    X = data.iloc[:, :-1].values
    y = data.iloc[:, -1].values

    return X, y

# Logistic Regression functions
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

def compute_error(X, y, w):
    # Compute the logistic regression error
    y_pred = sigmoid(np.dot(X, w))
    error = -np.mean(y * np.log(y_pred) + (1 - y) * np.log(1 - y_pred))
    return error

# Main logistic regression with MAP estimation
def logistic_regression_MAP(X_train, y_train, X_test, y_test, prior_variance):
    # Standardize features
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

    # Initialize parameters
    num_epochs = 100
    w = np.zeros(X_train.shape[1])

    # Learning rate schedule parameters
    gamma0 = 0.01
    d = 10

    for epoch in range(num_epochs):
        # Shuffle training data
        permutation = np.random.permutation(len(y_train))
        X_train_shuffled = X_train[permutation]
        y_train_shuffled = y_train[permutation]

        for t, (x_n, y_n) in enumerate(zip(X_train_shuffled, y_train_shuffled)):
            # Compute sigmoid
            sigmoid_value = sigmoid(np.dot(w, x_n))

            # Compute gradient
            gradient = -(y_n - sigmoid_value) * x_n + (1 / prior_variance) * w

            # Update learning rate
            learning_rate = gamma0 / (1 + (gamma0 / d) * (epoch * len(y_train) + t))

            # Update weights
            w -= learning_rate * gradient

    # Evaluate on training and test sets
    train_error = compute_error(X_train, y_train, w)
    test_error = compute_error(X_test, y_test, w)

    return train_error, test_error

# Main code
if __name__ == "__main__":
    # Load training data
    X_train, y_train = load_train_data()

    # Load test data
    X_test, y_test = load_test_data()

    # Prior variances to test
    prior_variances = [0.01, 0.1, 0.5, 1, 3, 5, 10, 100]

    # Perform logistic regression for each prior variance
    for prior_variance in prior_variances:
        train_error, test_error = logistic_regression_MAP(X_train, y_train, X_test, y_test, prior_variance)
        print(f"Variance: {prior_variance}, Training Error: {train_error}, Test Error: {test_error}")


Variance: 0.01, Training Error: 0.691399056176157, Test Error: 0.6913098259267894
Variance: 0.1, Training Error: 0.675505135906775, Test Error: 0.6746391362091789
Variance: 0.5, Training Error: 0.6206348125855594, Test Error: 0.6173671808469793
Variance: 1, Training Error: 0.5720533480464409, Test Error: 0.5671284261135525
Variance: 3, Training Error: 0.46439709659056233, Test Error: 0.4579817876341264
Variance: 5, Training Error: 0.40656145363981167, Test Error: 0.4006901256645244
Variance: 10, Training Error: 0.32697792927938635, Test Error: 0.32306925215688365
Variance: 100, Training Error: 0.16234603773708622, Test Error: 0.16374857716664803
