In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Load the training dataset
def load_train_data():
    
    data = pd.read_csv("C:\\Users\\santhosh\\Downloads\\project\\santhosh\\Logistic-Regression\\dataset\\bank-note\\train.csv")

    # Assuming the last column is the target variable
    X = data.iloc[:, :-1].values
    y = data.iloc[:, -1].values

    return X, y

# Load the test dataset
def load_test_data():
    
    data = pd.read_csv("C:\\Users\\santhosh\\Downloads\\project\\santhosh\\Logistic-Regression\\dataset\\bank-note\\test.csv")

    # Assuming the last column is the target variable
    X = data.iloc[:, :-1].values
    y = data.iloc[:, -1].values

    return X, y

# Logistic Regression functions
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

def compute_error(X, y, w):
    # Compute the logistic regression error
    y_pred = sigmoid(np.dot(X, w))
    error = -np.mean(y * np.log(y_pred) + (1 - y) * np.log(1 - y_pred))
    return error

# Main logistic regression with ML estimation
def logistic_regression_ML(X_train, y_train, X_test, y_test):
    # Standardize features
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

    # Initialize parameters
    num_epochs = 100
    w = np.zeros(X_train.shape[1])

    # Learning rate schedule parameters
    gamma0 = 0.01
    d = 10

    for epoch in range(num_epochs):
        # Shuffle training data
        permutation = np.random.permutation(len(y_train))
        X_train_shuffled = X_train[permutation]
        y_train_shuffled = y_train[permutation]

        for t, (x_n, y_n) in enumerate(zip(X_train_shuffled, y_train_shuffled)):
            # Compute sigmoid
            sigmoid_value = sigmoid(np.dot(w, x_n))

            # Compute gradient
            gradient = -(y_n - sigmoid_value) * x_n

            # Update learning rate
            learning_rate = gamma0 / (1 + (gamma0 / d) * (epoch * len(y_train) + t))

            # Update weights
            w -= learning_rate * gradient

    # Evaluate on training and test sets
    train_error = compute_error(X_train, y_train, w)
    test_error = compute_error(X_test, y_test, w)

    return train_error, test_error

# Main code for ML estimation with different prior variances
if __name__ == "__main__":
    # Load training data
    X_train, y_train = load_train_data()

    # Load test data
    X_test, y_test = load_test_data()

    # Prior variances to test
    prior_variances = [0.01, 0.1, 0.5, 1, 3, 5, 10, 100]

    for prior_variance in prior_variances:
        print(f"\nVariance: {prior_variance}")
        
        # Perform logistic regression with ML estimation
        train_error, test_error = logistic_regression_ML(X_train, y_train, X_test, y_test)
        print(f"Training Error: {train_error}, Test Error: {test_error}")



Variance: 0.01
Training Error: 0.13470609127247415, Test Error: 0.13675505984189482

Variance: 0.1
Training Error: 0.13463728139231204, Test Error: 0.1366973154899521

Variance: 0.5
Training Error: 0.13455848104070045, Test Error: 0.13661158186034514

Variance: 1
Training Error: 0.13458173581836844, Test Error: 0.13663889673784446

Variance: 3
Training Error: 0.13461425511736783, Test Error: 0.13666465232535688

Variance: 5
Training Error: 0.13457055644598187, Test Error: 0.13661149390438065

Variance: 10
Training Error: 0.13459407414212837, Test Error: 0.13664396475232599

Variance: 100
Training Error: 0.13463118945805094, Test Error: 0.13668115840009648
