In [3]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import rcParams
from sklearn.model_selection import train_test_split
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score, accuracy_score
from sklearn.linear_model import LogisticRegression
import seaborn as sns
import pandas as pd

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

class OwnLogisticRegression():
    '''
    Description:
    ------------
    This is the class for the Logistic Regression classification code. 
    Because we have had some trouble with the GD/SGD code in Part A), this
    code is written from scratch to solve this task alone, which is why it 
    differs a little bit from the first GD/SGD code. The Cost function used 
    in classification cases, as specified in the report, is the CrossEntropy.
    This is also the starting point for calculating the gradients used here.

    Initialization:
    ---------------
        X_train (np.array): Training dataset
        X_test (np.array): Testing dataset
        Y_train (np.array): Target training dataset
        Y_test (np.array): Target testing dataset
        learning_rate: Eta value (step size) for the GD/SGD algorithms
        n_iterations: Number of epochs
        batch_size: Size of mini-batches for SGD 
        l2_lambda : L2 Regularization parameter, default to 0
    
    Methods:
    --------
        train_GD : Gradient Descent algorithm for finding the optimal weights and bias, 
            equivalent to beta-coefficients and intercept for linear regression. Takes in a l2_lambda
            regularization parameter as specified in the task, for Lasso regularization. Default value 0.

        train_mini_batch : Stochastic Gradient Descent algorithm for finding the optimal weights and bias, 
            equivalent to beta-coefficients and intercept for linear regression. Includes mini-batches which
            makes it stochastic. Also takes in a l2_lambda regularization parameter as specified in the task, 
            for L2 regularization. Default value 0.

        train : Based on the specifiec method, either GD or SGD, fits the model with the optimal parameters.
        predict : The method to predict on some test data, using the trained model.
        accuracy : A measure of number of correct classifications after the model has made a prediction/classification.
         
    '''
    def __init__(self, X_train,X_test,Y_train,Y_test, learning_rate, n_iterations, batch_size = 10,l2_lambda = 0):
        self.lr = learning_rate
        self.n_iter = n_iterations
        self.weights = None
        self.bias = None
        self.costs = []
        self.batch_size = batch_size
        self.X_train = X_train
        self.X_test = X_test
        self.Y_train = Y_train
        self.Y_test = Y_test
        self.l2_lambda = l2_lambda

        
    # GRADIENT DESCENT
    def train_GD(self):
        # Extracting inputs and features from X shape
        n_inputs, n_features = self.X_train.shape
        # Initialization weights and bias to zero
        self.weights = np.zeros(n_features)
        self.bias = 0

        for iteration in range(self.n_iter):
            linear_prediction = self.X_train @ self.weights + self.bias
            predictions = sigmoid(linear_prediction)

            # Gradients
            dCdW = 1/n_inputs * (self.X_train.T @ (predictions - self.Y_train)) + self.l2_lambda * self.weights
            dCdB = 1/n_inputs * np.sum(predictions - self.Y_train) + self.l2_lambda * self.bias
            # Update weights and bias
            self.weights -= self.lr * dCdW  
            self.bias -= self.lr * dCdB
        
    # STOCHASTIC GRADIENT DESCENT
    def train_mini_batch(self):
        n_inputs, n_features = self.X_train.shape
        self.weights = np.zeros(n_features)
        self.bias = 0

        for iteration in range(self.n_iter):
            # Randomly shuffle the data for each iteration
            permutation = np.random.permutation(n_inputs)
            X_shuffled = self.X_train[permutation, :]
            y_shuffled = self.Y_train[permutation]

            for i in range(0, n_inputs, self.batch_size):
                # Get mini-batch
                X_batch = X_shuffled[i:i+self.batch_size, :]
                y_batch = y_shuffled[i:i+self.batch_size]

                linear_prediction = X_batch @ self.weights + self.bias
                predictions = sigmoid(linear_prediction)
                            
                # Gradients
                dCdW = 1/len(X_batch) * (X_batch.T @ (predictions - y_batch)) + self.l2_lambda * self.weights
                dCdB = 1/len(X_batch) * np.sum(predictions - y_batch) + self.l2_lambda * self.bias

                # Update weights and bias
                self.weights -= self.lr * dCdW
                self.bias -= self.lr * dCdB


    # TRAIN METHOD, TAKES IN METHOD AS INPUT PARAMETER    
    def train(self, method='GD'):
        if method == 'GD':
            self.train_GD()
        elif method == 'SGD':
            self.train_mini_batch()
        else:
            raise ValueError("Invalid training method. Use 'batch', 'sgd', or 'mini_batch'.")

    # The prediction method
    def predict(self):
        linear_pred = self.X_test @ self.weights + self.bias
        y_pred = sigmoid(linear_pred)
        # Setting the threshold for True/False to 0.5
        class_predictions = [0 if y_true <=0.5 else 1 for y_true in y_pred ]
        return class_predictions

    # Method for Accuracy Score
    def accuracy(self,y_pred, y_test):
        return np.sum(y_pred == y_test) / len(y_test)

In [4]:
'''Studying Accuracy versus learning rates and L2 regularization parameter for SGD'''

# Setting a random seed, NB: Pay attention to this seed and restarting the kernel when running over!

np.random.seed(2023) 

# Loading Wisconcin Breast Cancer Dataset, and setting up the Design Matrix X, and target Y
dataset = datasets.load_breast_cancer()
X, y = dataset.data, dataset.target

# Splitting into training and testing sets
X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size=0.2) # From Sklearn.model_selection

# Scaling the data using the StandardScaler from Sklearn.preprocessing
Scaler = StandardScaler()
X_train = Scaler.fit_transform(X_train)
X_test = Scaler.transform(X_test)

In [None]:
'''Calculating accuracies over different learning rates and L2 parameters'''

# Defining model parameters
etas = np.logspace(-6,-1,10)
l2_lambdas = np.logspace(-6,-1,10)
n_iterations = 1000
acc_scores = np.zeros((len(etas), len(l2_lambdas)))

for eta_idx in range(len(etas)):
    eta = etas[eta_idx]
    for l2 in range(len(l2_lambdas)):  
        lam = l2_lambdas[l2]
        SGD_Test = OwnLogisticRegression(X_train, X_test, Y_train, Y_test, eta,n_iterations,10,lam)
        SGD_Test.train("SGD")
        y_pred = SGD_Test.predict()
        acc = accuracy_score(Y_test,y_pred)
        acc_scores[eta_idx,l2] = acc

In [None]:

'''Creating a Dataframe with Accuracy-data and plotting a heatmap of the results'''


# Setting font-family for Matplotlib to "Times New Roman" to match Overleaf' s Latex font.
sns.set(font='Times New Roman', style='whitegrid', font_scale=1.2)

# Creating the dataframe using Pandas as pd
acc_scores_df = pd.DataFrame(acc_scores, index = etas, columns = l2_lambdas)

# Plot using Seaborn and Matplotlib
title = "Heatmap of SGD Accuracy for different $\eta$ and $\lambda_2$ values "

# Define custom tickmark values
x_ticks = np.linspace(0, acc_scores_df.shape[1] - 1, 10, dtype=int)
y_ticks = np.linspace(0, acc_scores_df.shape[0] - 1, 10, dtype=int)

# Adjusting the figure size and annot font size
plt.figure(figsize=(10, 10))
ax = sns.heatmap(acc_scores_df, annot=True, cmap="crest",fmt=".3f", linewidths=0.5, annot_kws={"size": 12}, cbar_kws={'label': 'Accuracy Value'})

# Set x-axis and y-axis label font sizes
ax.set_xlabel("$\lambda_2$, Regularization Parameter", fontsize=12)  # Add your X-axis label and adjust font size
ax.set_ylabel("$\eta$, Learning Rate", fontsize=12)  # Add your Y-axis label and adjust font size

ax.set_xticks(x_ticks)
ax.set_yticks(y_ticks)
ax.set_xticklabels(np.around(acc_scores_df.columns[x_ticks], decimals=7))
ax.set_yticklabels(np.around(acc_scores_df.index[y_ticks], decimals=7))

# Adjust x and y tick font size
ax.tick_params(axis='x', labelsize=10)  # Adjust x-axis tick font size
ax.tick_params(axis='y', labelsize=10)  # Adjust y-axis tick font size

plt.xticks(rotation=45)  # Rotate x-axis labels for better readability
plt.yticks(rotation=0)   # Rotate y-axis labels for better readability
plt.title(title, fontsize=16)

# Adjust colorbar font size
cax = ax.collections[0].colorbar.ax
cax.set_ylabel('Accuracy Value', fontsize=12) 
cax.tick_params(labelsize=10)  # Adjust colorbar font size

plt.show()

In [None]:
'''
Comparing SGD, GD, and Scikit-Learn
-----------------------------------
    Based on the optimal range of lambdas and etas, from the Heatmap above,
    a new comparison (using optimal values) are done with the different methods:
    SGD, GD, and Scikit-Learn
'''

# Specifying parameters, and learning rate array
eta = 0.0001
lam = 0.01 # from the results above
n_iterations = range(1,600+1,1)

# Empty arrays to store the accuracies
acc_array_SGD = np.zeros(len(n_iterations))
acc_array_GD = np.zeros(len(n_iterations))
acc_array_Scikit = np.zeros(len(n_iterations))

for i in range(len(n_iterations)):
    # Specifying the learning rate
    iteration = n_iterations[i]

    # Running the SGD Logistic Regression with the specified learning rate
    SGD_test3 = OwnLogisticRegression(X_train, X_test, Y_train, Y_test, eta,iteration,10,lam)
    SGD_test3.train("SGD")
    y_pred3 = SGD_test3.predict()
    acc_test_SGD = accuracy_score(Y_test,y_pred3)
    acc_array_SGD[i] = acc_test_SGD

    # Running the GD Logistic Regression with the specified learning rate
    GD_test3 = OwnLogisticRegression(X_train, X_test, Y_train, Y_test, eta,iteration,10,lam)
    GD_test3.train("GD")
    y_pred4 = GD_test3.predict()
    acc_test_GD = accuracy_score(Y_test,y_pred4)
    acc_array_GD[i] = acc_test_GD


In [None]:
'''Plotting the results versus learning rates'''

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib import font_manager as fm



# Create the DataFrames for each method
df_SGD = pd.DataFrame({"Number of epochs": n_iterations, 'Accuracy, %': acc_array_SGD, 'Method': 'Stochastic Gradient Descent'})
df_GD = pd.DataFrame({"Number of epochs": n_iterations, 'Accuracy, %': acc_array_GD, 'Method': 'Gradient Descent'})
#df_Scikit = pd.DataFrame({"Number of epochs": n_iterations, 'Accuracy, %': acc_array_Scikit, 'Method': 'Scikit-Learn'})

# Concatenate DataFrames
Accuracy_data = pd.concat([df_SGD, df_GD])

# Set Seaborn style and color palette
sns.set(style="whitegrid")
sns.set_palette("deep")

# Create the plot
plt.figure(figsize=(10, 6))

# Set Matplotlib font family to Times New Roman
plt.rcParams['font.family'] = 'serif'
sns.lineplot(x='Number of epochs',y='Accuracy, %', hue='Method', data=Accuracy_data)

# Set titles and labels
plt.xlabel("Number of Epochs", fontsize=14)
plt.ylabel("Accuracy Score", fontsize=14)

# Set legend
plt.legend(title='Method', title_fontsize='14', fontsize='12')

# Show the plot
plt.show()


In [19]:
'''Printing the Accuracy score for the different methods'''

# Fixing parameters according to earlier tuning tests
lam = 0.001
eta = 0.01

# Setting number of iterations to 1000
n_iterations = 600

# Calculating accuracy for Gradient Descent with optimal parameters for eta and lambda2
GD_Test2 = OwnLogisticRegression(X_train, X_test, Y_train, Y_test, eta,n_iterations,10,lam)
GD_Test2.train("GD")
y_pred_GD = GD_Test2.predict()
accGD = accuracy_score(Y_test,y_pred_GD)

# Calculating accuracy for Stochastic Gradient Descent with optimal parameters for eta and lambda2
SGD_Test2 = OwnLogisticRegression(X_train, X_test, Y_train, Y_test, eta,n_iterations,10,lam)
SGD_Test2.train("SGD")
y_pred_SGD = SGD_Test2.predict()
accSGD = accuracy_score(Y_test,y_pred_SGD)

# Calculating accuracy for Scikit-Learn with optimal parameters for eta and lambda2
model_scikit = LogisticRegression(C=1/lam, max_iter=600, tol=eta)
model_scikit.fit(X_train,Y_train)
pred_sklearn = model_scikit.predict(X_test)
accuracy_scikit = accuracy_score(Y_test,pred_sklearn)



In [20]:
'''Comparing the results with Scikit-Learn'''

print("Statistics using Scikit-Learn: ")
print("-----------")

print(f"The accuracy using Scikit:",accuracy_scikit,'\n')
print("The best accuracy with GD: ",accGD,'\n')
print("The best accuracy with SGD: ",accSGD)

Statistics using Scikit-Learn: 
-----------
The accuracy using Scikit: 0.9473684210526315 

The best accuracy with GD:  0.9736842105263158 

The best accuracy with SGD:  0.9824561403508771
