<a href="https://colab.research.google.com/github/simulate111/Supervised-Machine-Learning/blob/main/quiz03.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Quiz 3 - Programming Excercises

## Logistic Regression

In this exercise, you will implement a Logistic Regression model and explore different stepsize policies for the Stochastic Gradient Descent algorithm using the [Breast Cancer Wisconsin dataset](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_breast_cancer.html).

The code below prepares the data required for this exercise.

In [1]:
import numpy as np
from sklearn.datasets import load_breast_cancer

# load the data
X, y = load_breast_cancer(return_X_y=True)

print(f"Shape of input data: {X.shape}")
print(f"Shape of output data: {y.shape}")

mdata, ndim = X.shape

# convert the output space from {0,1} into {-1,+1}
y = 2*y - 1

# normalization
X /= np.outer(np.ones(mdata), np.max(np.abs(X),0))

Shape of input data: (569, 30)
Shape of output data: (569,)


**Task 1**: Complete the `logreg_sgd_cls` class provided below to implement a Logistic Regression class.

<div class="alert alert-info">
<b>Note:</b> Be aware that the Logistic Regression algorithm presented in Lecture 5 may differ from the one implemented in Sklearn. Please use the version presented in the Lecture.
</div>

In [10]:
class logreg_sgd_cls:
    """
    Logistic Regression Classifier
    """
    def __init__(self):
        self.nitermax = 100     # maximum iteration
        self.eta = 0.1          # initial step size
        self.w = None           # weights to learn

    ## ------------------------------------------
    def fit(self, X, y, eta, nitermax, diminish=0):
        """
        Train the logistic regression model
        with stochastic gradient descent algorithm

        Input:  X         2D array where each row represents an input example
                y         1D array(vector) of +1,-1 labels
                eta       initial step size
                nitermax  maximum number of iterations
                diminish  =0 constant step size, =1 diminishing step size
        """
        mtrain, ndim = X.shape

        # initialize the weights
        w = np.zeros(ndim)

        # iterations on the full data
        for t in range(nitermax):

            # select the stepsize
            if diminish == 1:       # diminishing stepsize policy
                etat = eta/(t+1)    # t+1 to avoid division by zero
            else:                   # constant stepsize policy
                etat = eta

            # perform one step of stochastic gradient descent on each training example
            for i in range(mtrain):
                xi = X[i]
                yi = y[i]
                gradient = -yi * xi / (1 + np.exp(yi * np.dot(w, xi)))
                w -= etat * gradient

        # save the final weights to the model
        self.w  = w

        return w

    ## ------------------------------------------
    def predict(self, X):
        """
        Predict the labels

        Input:
            X   2D array where each row represents an input example

        Output:
            y   1D array of predicted labels
        """
        y_pred = np.sign(np.dot(X, self.w))
        return y_pred


**Task 2**: Complete the helper function below for running n-Fold cross validation.

In [11]:
from sklearn.model_selection import KFold
from sklearn.metrics import f1_score

def learning_cycle(X, y, niteration, eta, stepsize_type, nfold):
    """
    Helper function for running n-Fold cross validation
    Input:  X              2d array of inputs
            y              1d array of outputs
            niteration     number of iteration for gradient descent
            eta            initial stepsize
            stepsize_type  =0 constant, =1 diminishing
            nfold          number of folds for cross-validation
    Output: xf1score       1D array of size [nfold] containing the f1 scores for each fold
    """

    # split the data into n folds
    cselection = KFold(n_splits=nfold, random_state=None, shuffle=False)

    # array to collect the results for each fold
    xf1score = np.zeros(nfold)

    # create an instance of the logistic regression model
    clogreg = logreg_sgd_cls()

    # perform n-fold cross-validation
    for ifold, (index_train, index_test) in enumerate(cselection.split(X)):
        # train the model
        clogreg.fit(X[index_train], y[index_train], eta, niteration, stepsize_type)

        # predict on the test set
        y_pred = clogreg.predict(X[index_test])

        # evaluate using F1 score for each fold
        xf1score[ifold] = f1_score(y[index_test], y_pred)

    return xf1score


**Task 3**: Execute n-fold cross-validation and answer question 4 according to the results.

In [12]:
# Learning hyperparameters
# create the list of initial step sizes (learning rates)
neta = 40   # number of different step size
eta0 = 0.2
# list of initial step sizes
leta = [ eta0*(i+1) for i in range(neta)]

# number of iterations in gradient descent
iteration = 50

# number of folds
nfold = 5

# fix the random seed
rng = np.random.default_rng(12345)

# number of different stepsize policies
nstepsize_type = 2

# array to collect the test scores for the two stepsize policies
xmean_test_results = np.zeros((neta, nstepsize_type))

# Run cross-validation
# enumerate stepsize policies
for istepsize in [0, 1]:    # 0 - constant, 1 - diminishing

    xf1score_eta = np.zeros(neta)

    # enumerate the (initial) stepsizes
    for ieta in range(neta):
        eta = leta[ieta]

        # n-fold cross-validation
        xf1score = learning_cycle(X, y, iteration, eta, istepsize, nfold)
        # compute the mean of the test accuracies
        xf1score_eta[ieta] = np.mean(xf1score)

    xmean_test_results[:, istepsize] = xf1score_eta

# values of the curves in the graph
for i in range(nstepsize_type):
    for j in range(neta):
        print('%6.4f'%leta[j],'%6.4f'%xmean_test_results[j, i])


  gradient = -yi * xi / (1 + np.exp(yi * np.dot(w, xi)))
  gradient = -yi * xi / (1 + np.exp(yi * np.dot(w, xi)))
  gradient = -yi * xi / (1 + np.exp(yi * np.dot(w, xi)))
  gradient = -yi * xi / (1 + np.exp(yi * np.dot(w, xi)))
  gradient = -yi * xi / (1 + np.exp(yi * np.dot(w, xi)))
  gradient = -yi * xi / (1 + np.exp(yi * np.dot(w, xi)))
  gradient = -yi * xi / (1 + np.exp(yi * np.dot(w, xi)))
  gradient = -yi * xi / (1 + np.exp(yi * np.dot(w, xi)))
  gradient = -yi * xi / (1 + np.exp(yi * np.dot(w, xi)))
  gradient = -yi * xi / (1 + np.exp(yi * np.dot(w, xi)))
  gradient = -yi * xi / (1 + np.exp(yi * np.dot(w, xi)))
  gradient = -yi * xi / (1 + np.exp(yi * np.dot(w, xi)))
  gradient = -yi * xi / (1 + np.exp(yi * np.dot(w, xi)))
  gradient = -yi * xi / (1 + np.exp(yi * np.dot(w, xi)))
  gradient = -yi * xi / (1 + np.exp(yi * np.dot(w, xi)))
  gradient = -yi * xi / (1 + np.exp(yi * np.dot(w, xi)))
  gradient = -yi * xi / (1 + np.exp(yi * np.dot(w, xi)))
  gradient = -yi * xi / (1 + np

0.2000 0.6971
0.4000 0.6971
0.6000 0.6971
0.8000 0.6971
1.0000 0.6971
1.2000 0.6971
1.4000 0.6971
1.6000 0.6971
1.8000 0.6971
2.0000 0.6971
2.2000 0.6971
2.4000 0.6971
2.6000 0.6971
2.8000 0.6971
3.0000 0.6971
3.2000 0.6971
3.4000 0.6971
3.6000 0.6971
3.8000 0.6971
4.0000 0.6971
4.2000 0.6971
4.4000 0.6971
4.6000 0.6971
4.8000 0.6971
5.0000 0.6971
5.2000 0.6971
5.4000 0.6971
5.6000 0.6971
5.8000 0.6971
6.0000 0.6971
6.2000 0.6971
6.4000 0.6971
6.6000 0.6971
6.8000 0.6971
7.0000 0.6971
7.2000 0.6971
7.4000 0.6971
7.6000 0.6971
7.8000 0.6971
8.0000 0.6971
0.2000 0.9302
0.4000 0.9302
0.6000 0.9291
0.8000 0.9302
1.0000 0.9302
1.2000 0.9302
1.4000 0.9302
1.6000 0.9302
1.8000 0.9302
2.0000 0.9302
2.2000 0.9302
2.4000 0.9302
2.6000 0.9302
2.8000 0.9302
3.0000 0.9302
3.2000 0.9302
3.4000 0.9302
3.6000 0.9302
3.8000 0.9302
4.0000 0.9302
4.2000 0.9302
4.4000 0.9302
4.6000 0.9302
4.8000 0.9302
5.0000 0.9302
5.2000 0.9302
5.4000 0.9302
5.6000 0.9302
5.8000 0.9302
6.0000 0.9302
6.2000 0.9302
6.4000

In [13]:
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import KFold
from sklearn.metrics import f1_score
from sklearn.linear_model import SGDClassifier

# Load the dataset
data = load_breast_cancer()
X, y = data.data, data.target
y = np.where(y == 0, -1, 1)  # Convert labels to {-1, +1}

# Hyperparameters
nstepsize_type = 3
nfold = 5
iteration = 50
eta0 = 0.2
neta = 40
rng = np.random.default_rng(12345)
leta = [eta0 * (i + 1) for i in range(neta)]

# Initialize KFold
kf = KFold(n_splits=nfold, shuffle=True, random_state=12345)

# Function to train and evaluate the model
def train_and_evaluate(stepsize_policy):
    f1_scores = []
    for train_index, test_index in kf.split(X):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]

        if stepsize_policy == 'constant':
            clf = SGDClassifier(loss='log_loss', learning_rate='constant', eta0=eta0, max_iter=iteration, random_state=12345)
        elif stepsize_policy == 'diminishing':
            clf = SGDClassifier(loss='log_loss', learning_rate='optimal', max_iter=iteration, random_state=12345)
        elif stepsize_policy == 'random':
            eta = rng.choice(leta)
            clf = SGDClassifier(loss='log_loss', learning_rate='constant', eta0=eta, max_iter=iteration, random_state=12345)

        clf.fit(X_train, y_train)
        y_pred = clf.predict(X_test)
        f1_scores.append(f1_score(y_test, y_pred))

    return np.mean(f1_scores)

# Evaluate all stepsize policies
constant_f1 = train_and_evaluate('constant')
diminishing_f1 = train_and_evaluate('diminishing')
random_f1 = train_and_evaluate('random')

print(f"Constant stepsize F1 score: {constant_f1}")
print(f"Diminishing stepsize F1 score: {diminishing_f1}")
print(f"Random stepsize F1 score: {random_f1}")


Constant stepsize F1 score: 0.6839748477103473
Diminishing stepsize F1 score: 0.8369865070775194
Random stepsize F1 score: 0.779808679248253
