In [1]:
import os
import matplotlib.pyplot as plt
from LogisticRegression import logistic_regression
from LRM import logistic_regression_multiclass
from DataReader import *
import numpy as np

In [2]:
data_dir = "../data/"
train_filename = "training.npz"
test_filename = "test.npz"

In [3]:
def visualize_features(X, y, plot_name):
    '''This function is used to plot a 2-D scatter plot of training features. 

    Args:
        X: An array of shape [n_samples, 2].
        y: An array of shape [n_samples,]. Only contains 1 or -1.

    Returns:
        No return. Save the plot to 'train_features.*' and include it
        in submission.
    '''
    ### YOUR CODE HERE
    plt.rcParams.update({'figure.figsize':(6,6), 'figure.dpi':100})
    plt.rcParams["figure.autolayout"] = True
    classes = ["-1","1"]
    print(X.shape, y.shape)
    scatter = plt.scatter(X[:, 0], X[:, 1], c=y)
    plt.title(plot_name)
    plt.xlabel("Symmetry")
    plt.ylabel("Intensity")
    # plt.legend(handles=scatter.legend_elements()[0], labels=classes)
    # plt.show()
    plt.savefig("./plots/"+plot_name+".png")
    plt.clf()

In [4]:

def visualize_result(X, y, W, plot_name):

    plt.rcParams.update({'figure.figsize':(6, 6), 'figure.dpi':100})
    plt.rcParams["figure.autolayout"] = True

    print(X.shape, y.shape)
    b = W[0]
    w1, w2 = W[1], W[2]

    c = -b/w2
    m = -w1/w2
    xmin, xmax = min(X[:, 0]), max(X[:, 0])
    ymin, ymax = min(X[:, 1]), max(X[:, 1])

    xd = np.array([xmin, xmax])
    yd = m*xd + c
    # plt.plot(xd, yd, 'k', lw=1, ls='--')

    scatter = plt.scatter(X[:,0], X[:,1], c= y)
    # plot(xd, yd, 'go--', linewidth=2, markersize=12)
    plt.plot(xd, yd, color='green', marker='o', linestyle='dashed', linewidth=2, markersize=12)
    # plt.legend(handles=scatter.legend_elements()[0], labels=classes)
    plt.xlim(xmin, xmax)
    plt.ylim(ymin, ymax)

    plt.title(plot_name)
    plt.xlabel("Symmetry")
    plt.ylabel("Intensity")
    # plt.legend(handles=scatter.legend_elements()[0], labels=classes)
    # plt.show()
    plt.savefig('./plots/'+plot_name+'.png')
    plt.clf()

In [5]:
def predict_multi(X, W):
    """Predict class labels for samples in X.

    Args:
        X: An array of shape [n_samples, n_features].

    Returns:
        preds: An array of shape [n_samples,]. Only contains 0,..,k-1.
    """
    ### YOUR CODE HERE
    y_temp = np.matmul(X, W)
    y_hat = np.array([softmax(y_temp[i]) for i in range(y_temp.shape[0])])
    prediction = np.argmax(y_hat , axis=1)
    return prediction
    ### END YOUR CODE

def softmax_multi(x):
        """Compute softmax values for each sets of scores in x."""
        ### You must implement softmax by youself, otherwise you will not get credits for this part.

		### YOUR CODE HERE
        exps = np.exp(x)
        smax = exps/np.sum(exps)
        # print("Softmax shape: ", smax.shape)
        return smax
		### END YOUR CODE

def score_multi(X, labels, W):
    """Returns the mean accuracy on the given test data and labels.

    Args:
        X: An array of shape [n_samples, n_features].
        labels: An array of shape [n_samples,]. Only contains 0,..,k-1.

    Returns:
        score: An float. Mean accuracy of self.predict(X) wrt. labels.
    """
    ### YOUR CODE HERE
    preds = predict_test(X)
    n_samples = len(preds)
    return np.sum(preds == labels)/n_samples
    ### END YOUR CODE

def predict_test(X, W):
    """Predict class labels for samples in X.

    Args:
        X: An array of shape [n_samples, n_features].

    Returns:
        preds: An array of shape [n_samples,]. Only contains 1 or -1.
    """
    ### YOUR CODE HERE
    sigmoid_preds = 1 / (1 + np.exp(-1*np.matmul(X, W)))
    preds = [-1 if p < 0.5 else 1 for p in sigmoid_preds]

    return preds
    ### END YOUR CODE

def score_test(X, y, W):
    """Returns the mean accuracy on the given test data and labels.

    Args:
        X: An array of shape [n_samples, n_features].
        y: An array of shape [n_samples,]. Only contains 1 or -1.

    Returns:
        score: An float. Mean accuracy of self.predict(X) wrt. y.
    """
    ### YOUR CODE HERE
    preds = predict_test(X, W)
    n_samples = len(preds)

    return np.sum(preds == y)/n_samples
    ### END YOUR CODE

In [6]:
raw_data, labels = load_data(os.path.join(data_dir, train_filename))
raw_train, raw_valid, label_train, label_valid = train_valid_split(raw_data, labels, 2300)

##### Preprocess raw data to extract features
train_X_all = prepare_X(raw_train)
valid_X_all = prepare_X(raw_valid)
##### Preprocess labels for all data to 0,1,2 and return the idx for data from '1' and '2' class.
train_y_all, train_idx = prepare_y(label_train)
valid_y_all, val_idx = prepare_y(label_valid)  

####### For binary case, only use data from '1' and '2'  
train_X = train_X_all[train_idx]
train_y = train_y_all[train_idx]
####### Only use the first 1350 data examples for binary training. 
train_X = train_X[0:1350]
train_y = train_y[0:1350]

valid_X = valid_X_all[val_idx]
valid_y = valid_y_all[val_idx]
####### set lables to  1 and -1. Here convert label '2' to '-1' which means we treat data '1' as postitive class. 
train_y[np.where(train_y==2)] = -1
valid_y[np.where(valid_y==2)] = -1
data_shape= train_y.shape[0] 
# print(train_X.shape, train_y.shape, valid_X.shape, valid_y.shape)

# Visualize training data.
# visualize_features(train_X[:, 1:3], train_y, "2_class_train_features")

#######------------Logistic Regression Sigmoid Case------------

#### Check BGD, SGD, miniBGD
# logisticR_classifier = logistic_regression(learning_rate=0.4, max_iter=500)

# logisticR_classifier.fit_BGD(train_X, train_y)
# params = logisticR_classifier.get_params()
# print("Final Weights: ", params)
# print("Accuracy: ", logisticR_classifier.score(train_X, train_y))
# visualize_result(train_X[:, 1:3], train_y, params)
# visualize_result(valid_X[:, 1:3], valid_y, params)

# print("data_shape: ", data_shape)
# logisticR_classifier.fit_miniBGD(train_X, train_y, data_shape)
# params = logisticR_classifier.get_params()
# print("Final Weights: ", params)
# print("Accuracy: ", logisticR_classifier.score(train_X, train_y))
# visualize_result(train_X[:, 1:3], train_y, params)

# logisticR_classifier.fit_miniBGD(train_X, train_y, 16)
# print("Final Weights: ", logisticR_classifier.get_params())
# print("Accuracy: ", logisticR_classifier.score(train_X, train_y))
# visualize_result(train_X[:, 1:3], train_y, params)
# visualize_result(valid_X[:, 1:3], valid_y, params)

# logisticR_classifier.fit_miniBGD(train_X, train_y, 16)
# print("Final Weights: ", logisticR_classifier.get_params())
# print("Accuracy: ", logisticR_classifier.score(train_X, train_y))
# visualize_result(train_X[:, 1:3], train_y, params)
# visualize_result(valid_X[:, 1:3], valid_y, params)

# logisticR_classifier.fit_miniBGD(train_X, train_y, 1)
# print("Final Weights: ", logisticR_classifier.get_params())
# print("Accuracy: ", logisticR_classifier.score(train_X, train_y))

# logisticR_classifier.fit_SGD(train_X, train_y)
# params = logisticR_classifier.get_params()
# print("Final Weights: ", params)
# print("Accuracy: ", logisticR_classifier.score(train_X, train_y))
# visualize_result(train_X[:, 1:3], train_y, params)
# visualize_result(valid_X[:, 1:3], valid_y, params)

# Explore different hyper-parameters.
# ### YOUR CODE HERE
# final_model = [float('-inf'), [], 0, 0, 0, '']
# mini_bgd_final_model = [float('-inf'), [], 0, 0, 0, '']
# bgd_final_model = [float('-inf'), [], 0, 0, 0, '']
# sgd_final_model = [float('-inf'), [], 0, 0, 0, '']


# lrs = [0.1, 0.2, 0.4, 0.75, 1.0]
# max_iters = [50, 100, 200, 400, 500]
# gd_types = ['BGD', 'miniBGD', 'SGD']

# for gd_type in gd_types:
#     if gd_type == 'miniBGD':
#         mini_batch_sizes = [16, 32, 64]
#         for lr in lrs:
#             for max_iter in max_iters:
#                 logisticR_classifier = logistic_regression(learning_rate=lr, max_iter=max_iter)
#                 for mini_batch_size in mini_batch_sizes:
#                     print('miniBGD', lr, max_iter, mini_batch_size)
#                     logisticR_classifier.fit_miniBGD(train_X, train_y, mini_batch_size)
#                     params = logisticR_classifier.get_params()
#                     training_score = logisticR_classifier.score(train_X, train_y)
#                     print("training_score: ", training_score)
#                     validation_score = logisticR_classifier.score(valid_X, valid_y)
#                     print("validation_score: ", validation_score)
#                     if validation_score > mini_bgd_final_model[0]:
#                         mini_bgd_final_model = [validation_score, params, lr, max_iter, mini_batch_size, 'miniBGD']
#                         print(mini_bgd_final_model)
#     elif gd_type == 'BGD':
#         for lr in lrs:
#             for max_iter in max_iters:
#                 logisticR_classifier = logistic_regression(learning_rate=lr, max_iter=max_iter)
#                 print('BGD', lr, max_iter)
#                 logisticR_classifier.fit_BGD(train_X, train_y)
#                 params = logisticR_classifier.get_params()
#                 training_score = logisticR_classifier.score(train_X, train_y)
#                 print("training_score: ", training_score)
#                 validation_score = logisticR_classifier.score(valid_X, valid_y)
#                 print("validation_score: ", validation_score)
#                 if validation_score > bgd_final_model[0]:
#                     bgd_final_model = [validation_score, params, lr, max_iter, 0, 'BGD']
#                     print(bgd_final_model)
#     elif gd_type == 'SGD':
#         for lr in lrs:
#             for max_iter in max_iters:
#                 logisticR_classifier = logistic_regression(learning_rate=lr, max_iter=max_iter)
#                 print('SGD', lr, max_iter)
#                 logisticR_classifier.fit_SGD(train_X, train_y)
#                 params = logisticR_classifier.get_params()
#                 training_score = logisticR_classifier.score(train_X, train_y)
#                 print("training_score: ", training_score)
#                 validation_score = logisticR_classifier.score(valid_X, valid_y)
#                 print("validation_score: ", validation_score)
#                 if validation_score > sgd_final_model[0]:
#                     sgd_final_model = [validation_score, params, lr, max_iter, 0, 'SGD']
#                     print(sgd_final_model)

# print("bgd_final_model: ", bgd_final_model)
# print("mini_bgd_final_model: ", mini_bgd_final_model)
# print("sgd_final_model: ", sgd_final_model)

# #Best models for each algorithm
# bgd_final_model = [0.9735449735449735, [ 0.46873881, 10.41498126, -4.74745088], 1.0, 400, 0, 'BGD']
# mini_bgd_final_model = [0.9788359788359788, [ 1.70682355 17.09388503 -5.49211733], 0.4, 300, 64, 'miniBGD']
# sgd_final_model = [0.9788359788359788, [ 9.4833702 , 29.21028412,  1.15452184], 0.1, 200, 0, 'SGD']
# ### END YOUR CODE

# # Visualize the your 'best' model after training.

# logisticR_classifier = logistic_regression(learning_rate=1.0, max_iter=400)

# logisticR_classifier.fit_BGD(train_X, train_y)
# params = logisticR_classifier.get_params()
# print("Final Weights: ", params)
# print("Training Accuracy: ", logisticR_classifier.score(train_X, train_y))
# print("Validation Accuracy: ", logisticR_classifier.score(valid_X, valid_y))
# Final Weights:  [ 0.47071328 10.41367613 -4.74328509]
# Training Accuracy:  0.9666666666666667
# Validation Accuracy:  0.9735449735449735
# visualize_result(train_X[:, 1:3], train_y, params)
# visualize_result(valid_X[:, 1:3], valid_y, params)

# logisticR_classifier = logistic_regression(learning_rate=0.4, max_iter=300)
# logisticR_classifier.fit_miniBGD(train_X, train_y, 64)
# params = logisticR_classifier.get_params()
# print("Final Weights: ", params)
# print("Training Accuracy: ", logisticR_classifier.score(train_X, train_y))
# print("Validation Accuracy: ", logisticR_classifier.score(valid_X, valid_y))
# Final Weights:  [ 2.5210473  19.05145317 -5.20721508]
# Training Accuracy:  0.9696296296296296
# Validation Accuracy:  0.9761904761904762

# logisticR_classifier = logistic_regression(learning_rate=0.1, max_iter=200)
# logisticR_classifier.fit_SGD(train_X, train_y)
# params = logisticR_classifier.get_params()
# print("Final Weights: ", params)
# print("Training Accuracy: ", logisticR_classifier.score(train_X, train_y))
# print("Validation Accuracy: ", logisticR_classifier.score(valid_X, valid_y))
# Training Accuracy:  0.9725925925925926
# Validation Accuracy:  0.9788359788359788
# visualize_result(train_X[:, 1:3], train_y, params, "bgd_final_model_train_result_sigmoid")
# visualize_result(train_X[:, 1:3], train_y, params, "minibgd_final_model_train_result_sigmoid")
# visualize_result(train_X[:, 1:3], train_y, params, "sgd_final_model_train_result_sigmoid")
# visualize_result(valid_X[:, 1:3], valid_y, params, "bgd_final_model_validation_result_sigmoid")
# visualize_result(valid_X[:, 1:3], valid_y, params, "minibgd_final_model_validation_result_sigmoid")
# visualize_result(valid_X[:, 1:3], valid_y, params, "sgd_final_model_validation_result_sigmoid")

# Best model is the mini BGD model
# best_model = [0.9788359788359788, [ 1.70682355 17.09388503 -5.49211733], 0.4, 300, 64, 'miniBGD']
# best_params = best_model[1]
# visualize_result(train_X[:, 1:3], train_y, best_params, "best_model_training_loss")
# visualize_result(valid_X[:, 1:3], valid_y, best_params, "best_model_validation_loss")


# ### Use the 'best' model above to do testing. Note that the test data should be loaded and processed in the same way as the training data.
# ## YOUR CODE HERE
# raw_data_test, labels_test = load_data(os.path.join(data_dir, test_filename))

# # # # # # ##### Preprocess raw data to extract features
# test_X_all = prepare_X(raw_data_test)
# ##### Preprocess labels for all data to 0,1,2 and return the idx for data from '1' and '2' class.
# test_y_all, test_idx = prepare_y(labels_test)
# ####### For binary case, only use data from '1' and '2'  
# test_X = test_X_all[test_idx]
# test_y = test_y_all[test_idx]
# ####### set lables to  1 and -1. Here convert label '2' to '-1' which means we treat data '1' as postitive class. 
# test_y[np.where(test_y==2)] = -1
# test_data_shape= test_y.shape[0]
# # print("test_data_shape: ", test_data_shape)
# logisticR_classifier = logistic_regression(learning_rate=0.4, max_iter=300)
# logisticR_classifier.fit_miniBGD(train_X, train_y, 64)
# best_params = logisticR_classifier.get_params()

# print("Final Weights: ", best_params )
# print("Training Accuracy: ", score_test(train_X, train_y, best_params))
# print("Validation Accuracy: ", score_test(valid_X, valid_y, best_params))
# print("Test Accuracy: ", score_test(test_X, test_y, best_params))


# visualize_result(test_X[:, 1:3], test_y, best_params, "best_model_testing_result")
# visualize_features(test_X[:, 1:3], test_y)
# END YOUR CODE

In [7]:
def visualize_result_multi(X, y, W):
    plt.rcParams.update({'figure.figsize':(6, 6), 'figure.dpi':100})
    plt.rcParams["figure.autolayout"] = True

    print(X.shape, y.shape, W.shape)

    xmin, xmax = min(X[:, 0]), max(X[:, 0])
    ymin, ymax = min(X[:, 1]), max(X[:, 1])
    plt.xlim(xmin, xmax)
    plt.ylim(ymin, ymax)
    plt.scatter(X[:,0], X[:,1], c= y)
    
    color = ['red', 'green', 'black']
    
    for i in range(2):
        # w = W[i]
        # b = w[0]
        # w1, w2 = w[1], w[2]
        b, w1, w2 = W[0][i], W[1][i], W[2][i]
        c = -b/w2
        m = -w1/w2

        xd = np.array([xmin, xmax])
        yd = m*xd + c
        plt.plot(xd, yd, color=color[i], marker='o', linestyle='dashed', linewidth=2, markersize=12)


    plt.title("MultiClass result")
    plt.xlabel("Symmetry")
    plt.ylabel("Intensity")
    # plt.show()
    plt.savefig('./plots/2class_softmax_convergence.png')
    plt.clf()

In [8]:
# ------------Logistic Regression Multiple-class case, let k= 3------------
###### Use all data from '0' '1' '2' for training
# train_X = train_X_all
# train_y = train_y_all
# valid_X = valid_X_all
# valid_y = valid_y_all

# raw_data_test, labels_test = load_data(os.path.join(data_dir, test_filename))
# test_X = prepare_X(raw_data_test)
# test_y, idx = prepare_y(labels_test)

# visualize_features(train_X[:, 1:3], train_y, "LRM_train_features")
# visualize_features(valid_X[:, 1:3], valid_y, "LRM_valid_features")
# visualize_features(test_X[:, 1:3], test_y, "LRM_test_features")

########  miniBGD for multiclass Logistic Regression
# print(train_X.shape, train_y.shape, valid_X.shape, valid_y.shape)
# logisticR_classifier_multiclass = logistic_regression_multiclass(learning_rate=0.5, max_iter=100,  k= 3)
# logisticR_classifier_multiclass.fit_miniBGD(train_X, train_y, 32)
# params = logisticR_classifier_multiclass.get_params()

# Explore different hyper-parameters.
### YOUR CODE HERE
# final_model = [float('-inf'), [], 0, 0, 0, '']

# lrs = [0.1, 0.2, 0.4, 0.75, 1.0]
# max_iters = [50, 100, 200, 400, 500]
# mini_batch_sizes = [16, 32, 64]

# for lr in lrs:
#     for max_iter in max_iters:
#         logisticR_classifier_multiclass = logistic_regression_multiclass(learning_rate=lr, max_iter=max_iter,  k= 3)
#         for mini_batch_size in mini_batch_sizes:
#             print('miniBGD', lr, max_iter, mini_batch_size)
#             logisticR_classifier_multiclass.fit_miniBGD(train_X, train_y, mini_batch_size)
#             params = logisticR_classifier_multiclass.get_params()
#             training_score = logisticR_classifier_multiclass.score(train_X, train_y)
#             print("training_score: ", training_score)
#             validation_score = logisticR_classifier_multiclass.score(valid_X, valid_y)
#             print("validation_score: ", validation_score)
#             if validation_score > final_model[0]:
#                 final_model = [validation_score, params, lr, max_iter, mini_batch_size, 'miniBGD']
#                 print(final_model)
#print("Final Model: ", final_model)

# final_model = [0.8857142857142857,
# [[  8.73615057,   1.29160035,  -3.41908028],
# [ -3.0697722 ,  12.54171746, -19.47054993],
# [ 13.91399786,  -5.0503584 ,   1.61281812]],1.0,200,32,'miniBGD']

# logisticR_classifier_multiclass = logistic_regression_multiclass(learning_rate=0.5, max_iter=200,  k= 3)
# logisticR_classifier_multiclass.fit_miniBGD(train_X, train_y, 128)
# params = logisticR_classifier_multiclass.get_params()
# print("Params: ", params)
# training_score = logisticR_classifier_multiclass.score(train_X, train_y)
# print("training_score: ", training_score)
# validation_score = logisticR_classifier_multiclass.score(valid_X, valid_y)
# print("validation_score: ", validation_score)
# testing_score = logisticR_classifier_multiclass.score(test_X, test_y)
# print("testing_score: ", testing_score)

# Params:  [[  7.18726599  -0.984131    -4.67416801]
#  [  0.55830733  13.9652507  -15.0892574 ]
#  [ 13.12298073  -5.90900813   1.11319128]]
# training_score:  0.8947826086956522
# validation_score:  0.8793650793650793
# testing_score:  0.8672350791717418

# Params:  [[  6.32856892  -1.53839471  -6.19131709]
#  [  0.91083148  16.55917569 -15.3376584 ]
#  [ 10.83608674  -7.96171329  -1.29303694]]
# training_score:  0.8995652173913044
# validation_score:  0.873015873015873
# testing_score:  0.8684531059683313

# logisticR_classifier_multiclass = logistic_regression_multiclass(learning_rate=0.6, max_iter=200,  k= 3)
# logisticR_classifier_multiclass.fit_miniBGD(train_X, train_y, 64)
# Params:  [[  9.64785501   1.52456229  -2.22797855]
#  [  1.66443816  14.30584618 -13.72638955]
#  [ 11.74981459  -6.85746315  -0.08700084]]
# training_score:  0.8956521739130435
# validation_score:  0.873015873015873
# testing_score:  0.8708891595615104

# logisticR_classifier_multiclass = logistic_regression_multiclass(learning_rate=0.5, max_iter=200,  k= 3)
# logisticR_classifier_multiclass.fit_miniBGD(train_X, train_y, 128)
# Params:  [[  8.20477456   0.44883913  -2.3851905 ]
#  [  2.15294719  11.5409138  -11.45329729]
#  [  9.05297781  -7.93059492  -1.66585129]]
# training_score:  0.8921739130434783
# validation_score:  0.8761904761904762
# testing_score:  0.8745432399512789



### END YOUR CODE

# Visualize the your 'best' model after training.
# visualize_result_multi(train_X[:, 1:3], train_y, params)
# visualize_result_multi(valid_X[:, 1:3], valid_y, params)


# Use the 'best' model above to do testing.
### YOUR CODE HERE
# testing_score = logisticR_classifier_multiclass.score(test_X, test_y)
# print("testing_score: ", testing_score)
# visualize_result_multi(test_X[:, 1:3], test_y, params)
### END YOUR CODE


In [9]:
# ------------Connection between sigmoid and softmax------------
############ Now set k=2, only use data from '1' and '2' 

#####  set labels to 0,1 for softmax classifer
# train_X = train_X_all[train_idx]
# train_y = train_y_all[train_idx]
# train_X = train_X[0:1350]
# train_y = train_y[0:1350]
# valid_X = valid_X_all[val_idx]
# valid_y = valid_y_all[val_idx] 
# train_y[np.where(train_y==2)] = 0
# valid_y[np.where(valid_y==2)] = 0

# test_X_all = prepare_X(raw_data_test)
# test_y_all, test_idx = prepare_y(labels_test)
# test_X = test_X_all[test_idx]
# test_y = test_y_all[test_idx]
# test_y[np.where(test_y==2)] = 0
# visualize_features(train_X, train_y, "2 Class Softmax train features")
# visualize_features(test_X, test_y, "2 Class Softmax test features")

# ###### First, fit softmax classifer until convergence, and evaluate 
# ##### Hint: we suggest to set the convergence condition as "np.linalg.norm(gradients*1./batch_size) < 0.0005" or max_iter=10000:
# ### YOUR CODE HERE
# logisticR_classifier_multiclass = logistic_regression_multiclass(learning_rate=0.1, max_iter=10000,  k= 2)
# logisticR_classifier_multiclass.fit_miniBGD(train_X, train_y, 32)
# lrm_convergence_params = logisticR_classifier_multiclass.get_params()
# print("LRM Convergence Params: ", lrm_convergence_params)
# training_score = logisticR_classifier_multiclass.score(train_X, train_y)
# print("training_score: ", training_score)
# validation_score = logisticR_classifier_multiclass.score(valid_X, valid_y)
# print("validation_score: ", validation_score)
# ### END YOUR CODE






# train_X = train_X_all[train_idx]
# train_y = train_y_all[train_idx]
# train_X = train_X[0:1350]
# train_y = train_y[0:1350]
# valid_X = valid_X_all[val_idx]
# valid_y = valid_y_all[val_idx] 
# #####       set lables to -1 and 1 for sigmoid classifer
# train_y[np.where(train_y==2)] = -1
# valid_y[np.where(valid_y==2)] = -1   

# ###### Next, fit sigmoid classifer until convergence, and evaluate
# ##### Hint: we suggest to set the convergence condition as "np.linalg.norm(gradients*1./batch_size) < 0.0005" or max_iter=10000:
# ### YOUR CODE HERE
# logisticR_classifier = logistic_regression(learning_rate=0.4, max_iter=10000)
# logisticR_classifier.fit_miniBGD(train_X, train_y, 64)
# convergence_params = logisticR_classifier.get_params()
# print("LR convergence_params: ", convergence_params)
# training_score = logisticR_classifier.score(train_X, train_y)
# print("training_score: ", training_score)
# validation_score = logisticR_classifier.score(valid_X, valid_y)
# print("validation_score: ", validation_score)

# LRM Convergence Params:  [[ -5.36697941   5.04173235]
#  [-20.81503274   9.75946323]
#  [ -3.26876102  -1.06174654]]
# training_score:  0.9725925925925926
# validation_score:  0.9788359788359788
# Starting mini BGD...
# LR convergence_params:  [10.37087186 30.43517856  2.21460478]
# training_score:  0.9725925925925926
# validation_score:  0.9788359788359788

# lrm_convergence_params = np.array([[ -5.36697941,   5.04173235], [-20.81503274,   9.75946323], [ -3.26876102,  -1.06174654]])
# lr_convergence_params = np.array([10.37087186, 30.43517856,  2.21460478])
# visualize_result_multi(test_X, test_y, lrm_convergence_params)

### END YOUR CODE


################Compare and report the observations/prediction accuracy


'''
Explore the training of these two classifiers and monitor the graidents/weights for each step. 
Hint: First, set two learning rates the same, check the graidents/weights for the first batch in the first epoch. What are the relationships between these two models? 
Then, for what leaning rates, we can obtain w_1-w_2= w for all training steps so that these two models are equivalent for each training step. 
'''
# ### YOUR CODE HERE
train_X = train_X_all[train_idx]
train_y = train_y_all[train_idx]
train_X = train_X[0:1350]
train_y = train_y[0:1350]
valid_X = valid_X_all[val_idx]
valid_y = valid_y_all[val_idx] 
train_y[np.where(train_y==2)] = 0
valid_y[np.where(valid_y==2)] = 0 

logisticR_classifier_multiclass = logistic_regression_multiclass(learning_rate=0.1, max_iter=1,  k= 2)
logisticR_classifier_multiclass.fit_miniBGD(train_X, train_y, 256)
lrm_convergence_params = logisticR_classifier_multiclass.get_params()
print("LRM Convergence Params: ", lrm_convergence_params)
training_score = logisticR_classifier_multiclass.score(train_X, train_y)
print("training_score: ", training_score)
validation_score = logisticR_classifier_multiclass.score(valid_X, valid_y)
print("validation_score: ", validation_score)

train_X = train_X_all[train_idx]
train_y = train_y_all[train_idx]
train_X = train_X[0:1350]
train_y = train_y[0:1350]
valid_X = valid_X_all[val_idx]
valid_y = valid_y_all[val_idx] 
#####       set lables to -1 and 1 for sigmoid classifer
train_y[np.where(train_y==2)] = -1
valid_y[np.where(valid_y==2)] = -1

logisticR_classifier = logistic_regression(learning_rate=0.2, max_iter=1)
logisticR_classifier.fit_miniBGD(train_X, train_y, 256)
convergence_params = logisticR_classifier.get_params()
print("LR convergence_params: ", convergence_params)
training_score = logisticR_classifier.score(train_X, train_y)
print("training_score: ", training_score)
validation_score = logisticR_classifier.score(valid_X, valid_y)
print("validation_score: ", validation_score)
### END YOUR CODE

# ------------End------------

Weights after 0 mini-batch: [[ 0.00351563 -0.00351563]
 [-0.01191377  0.01191377]
 [ 0.00429995 -0.00429995]]
W2 - W1 after 0 mini-batch: [-0.00703125  0.02382753 -0.00859989]
Gradient/Weight:  [[-10. -10.]
 [-10. -10.]
 [-10. -10.]]
Weights after 1 mini-batch: [[-0.01504243  0.01504243]
 [-0.01734257  0.01734257]
 [ 0.02170017 -0.02170017]]
W2 - W1 after 1 mini-batch: [ 0.03008486  0.03468514 -0.04340035]
Gradient/Weight:  [[-12.3371393  -12.3371393 ]
 [ -3.13033403  -3.13033403]
 [ -8.01847373  -8.01847373]]
Weights after 2 mini-batch: [[-0.02988334  0.02988334]
 [-0.02328018  0.02328018]
 [ 0.03648348 -0.03648348]]
W2 - W1 after 2 mini-batch: [ 0.05976667  0.04656037 -0.07296696]
Gradient/Weight:  [[-4.96628202 -4.96628202]
 [-2.5505015  -2.5505015 ]
 [-4.05205501 -4.05205501]]
Weights after 3 mini-batch: [[-0.03777056  0.03777056]
 [-0.03152531  0.03152531]
 [ 0.0473228  -0.0473228 ]]
W2 - W1 after 3 mini-batch: [ 0.07554112  0.06305062 -0.0946456 ]
Gradient/Weight:  [[-2.08819417 

<Figure size 432x288 with 0 Axes>