In [78]:
import numpy as np
import matplotlib.pyplot as plt

def dataParser(x, y):
    xTrain, yTrain = open(x), open(y)
    feature = xTrain.readline().split(",")
    yTrain.readline()
    weight, ground = [], []
    for line in xTrain:
        L = line
        L = L.split(",")
        L = list(map(float, L))
        weight.append(L)
        ground.append(int(yTrain.readline()))
    xTrain.close()
    yTrain.close()
    return [feature, weight, ground]

def comp_confmat(actual, predicted, comment = False):
    classes = np.unique(actual)
    confmat = np.zeros((len(classes), len(classes)))
    for i in range(len(classes)):
        for j in range(len(classes)):
           confmat[i, j] = np.sum((actual == classes[i]) & (predicted == classes[j]))
    [TP, FP], [FN, TN] = confmat
    Precision = TP/(TP+FP)
    Recall = TP/(TP+FN)
    Accuracy = (TP+TN)/len(predicted)
    Specificity = (TN)/(TN+FP)
    F_m = lambda B : (1+B**2)*Precision*Recall/(Precision*B**2+Recall)
    if comment:
        print("Wrong guesses:         {:.0f}".format(FP+FN))
        print("Accuracy:              {:.5f}%".format(Accuracy*100), end = "\t")
        print("Precision:             {:.5f}%".format(Precision*100))
        print("Recall:                {:.5f}%".format(Recall*100), end = "\t")
        print("Specificity:           {:.5f}%".format(Specificity*100))
        print(f"F-Measures(1, 2, 0.5): {F_m(1):.4f}, {F_m(2):.4f}, {F_m(0.5):.4f}")
        print("Confusion Matrix:\n",confmat,"\n")
    return confmat, [Accuracy, Precision, Recall, Specificity, F_m(2)]

def sigmoid(x):
    return np.exp(-np.logaddexp(0, -x))

def train(weights, samples, grounds, epoch, gamma):
    weights = weights.copy()
    for i in range(epoch):
        Z = weights[0] + np.dot(weights[1:],samples.T)
        mcle = 1 - sigmoid(-Z) #Y = 1 | X,w for each sample
        cached = samples * (grounds - mcle)[:, None]
        weights = weights.copy() + gamma*np.insert(np.sum(cached, axis = 0), 0, 1)
    return weights

def trainMiniBatch(weights, trainSamples, trainGrounds, epoch, gamma, batchSize = 32, seed = None):
    weights = weights.copy()
    accuracyList = []
    if type(seed) == int: np.random.seed(seed)
    for i in range(epoch):
        permutation = list(np.random.permutation(len(trainGrounds)))
        shuffled_samples = trainSamples[permutation]
        shuffled_grounds = trainGrounds[permutation]
        samples = [shuffled_samples[i::batchSize] for i in range(batchSize)]
        grounds = [shuffled_grounds[i::batchSize] for i in range(batchSize)]
        estimates = []
        for j in range(batchSize):
            Z = weights[0] + np.dot(weights[1:],samples[j].T)
            mcle = 1 - sigmoid(-Z) #Y = 1 | X,w for each sample
            cached = samples[j] * (grounds[j] - mcle)[:, None]
            weights = weights.copy() + gamma*np.insert(np.sum(cached, axis = 0), 0, 1)
            estimates.extend(np.heaviside(mcle - 0.5, 1))
        falses = np.count_nonzero(np.array(estimates)-np.concatenate(grounds).ravel())
        accuracyList.append([1-falses/len(estimates) , i])
    return weights, np.array(accuracyList).T

def trainStochastic(weights, trainSamples, trainGrounds, epoch, gamma):
    weights = weights.copy()
    for i in range(epoch):
        permutation = list(np.random.permutation(len(trainGrounds)))
        shuffled_samples = trainSamples[permutation]
        shuffled_grounds = trainGrounds[permutation]
        for j in range(len(shuffled_samples)):
            Z = weights[0] + np.dot(weights[1:],shuffled_samples[j].T)
            mcle = 1 - sigmoid(-Z) #Y = 1 | X,w for each sample
            weights = weights.copy() + gamma*np.insert(shuffled_samples[j] * (shuffled_grounds[j] - mcle), 0, 1)
    return weights

def normalizeData(samples, normal):
    features = normal.copy().T
    data = samples.copy().T
    for i in range(len(features)):
        xMin = features[i].min()
        xMax = features[i].max()
        data[i] = (data[i].copy() - xMin)/(xMax - xMin)
    return data.T

def forward(weights, testSamples, comment = True):
    Z = weights[0] + np.dot(weights[1:],testSamples.T)
    mcle = 1 - sigmoid(-Z) #Y = 1 | X,w for each sample
    estimate = np.heaviside(mcle - 0.5, 1)
    return comp_confmat(test_ground, estimate, comment=comment)

def plotParameter(parameters, metric, labels):
        x_labels = [labels[0]+' = {}'.format(parameter) for parameter in parameters]
        for idx in range(len(metric)):
            plt.plot(x_labels, metric[idx], marker='o', markersize=6, linewidth=2, label=labels[1+idx])
        plt.title('Parameter vs Metrics Plot')
        plt.xlabel('Changing Parameter')
        plt.ylabel('% Metric')
        plt.legend()
        plt.show()

In [54]:
[features, samples, grounds] = dataParser("q2_train_samples.csv", "q2_train_labels.csv")
test_parse = dataParser("q2_test_samples.csv", "q2_test_labels.csv")
test_samples, test_ground = np.array(test_parse[1], dtype = float), np.array(test_parse[2], dtype = int)
samples = np.array(samples, dtype = float)
test_samples = normalizeData(test_samples, samples)
samples = normalizeData(samples, samples)
grounds = np.array(grounds, dtype = int)
weights = np.random.normal(0, 0.01, (len(features) + 1))
run = 1

In [18]:
#batch gradient descent
%matplotlib qt
lr = [0.0001,0.0002, 0.001, 0.0013021, 0.002, 0.005, 0.05, 0.5, 1, 2]
metrics = np.zeros((2,len(lr)))
for i in range(len(lr)):
    weights = np.random.normal(0, 0.01, (len(features) + 1))
    print("Learning Rate:",lr[i])
    weights = train(weights, samples, grounds, 40, lr[i])
    metrics[0,i],_,_,_,metrics[1,i] = forward(weights, test_samples)[1]
plotParameter(lr, metrics, ["LR","Accuracy Trial "+str(run),"F_2 Trial "+str(run)])
run += 1

Learning Rate: 0.0001
Wrong guesses:         55
Accuracy:              74.77064%	Precision:             99.19355%
Recall:                69.49153%	Specificity:           97.56098%
F-Measures(1, 2, 0.5): 0.8173, 0.7392, 0.9138
Confusion Matrix:
 [[123.   1.]
 [ 54.  40.]] 

Learning Rate: 0.0002
Wrong guesses:         40
Accuracy:              81.65138%	Precision:             99.19355%
Recall:                75.92593%	Specificity:           98.21429%
F-Measures(1, 2, 0.5): 0.8601, 0.7966, 0.9347
Confusion Matrix:
 [[123.   1.]
 [ 39.  55.]] 

Learning Rate: 0.001
Wrong guesses:         21
Accuracy:              90.36697%	Precision:             99.19355%
Recall:                86.01399%	Specificity:           98.66667%
F-Measures(1, 2, 0.5): 0.9213, 0.8836, 0.9624
Confusion Matrix:
 [[123.   1.]
 [ 20.  74.]] 

Learning Rate: 0.0013021
Wrong guesses:         17
Accuracy:              92.20183%	Precision:             93.54839%
Recall:                92.80000%	Specificity:           91.397

In [142]:
#mini-batch gradient descent
%matplotlib qt
lr = [0.00005,0.0001,0.0002, 0.005, 0.01, 0.05, 0.5, 1, 2, 3]
metrics = np.zeros((2,len(lr)))
for i in range(len(lr)):
    weights = np.random.normal(0, 0.01, (len(features) + 1))
    print("Learning Rate:",lr[i])
    weights, _ = trainMiniBatch(weights, samples, grounds, 40, lr[i])
    metrics[0,i],_,_,_,metrics[1,i] = forward(weights, test_samples)[1]
plotParameter(lr, metrics, ["LR","Accuracy Trial "+str(run),"F_2 Trial "+str(run)])
run += 1

Learning Rate: 5e-05
Wrong guesses:         69
Accuracy:              68.34862%	Precision:             100.00000%
Recall:                64.24870%	Specificity:           100.00000%
F-Measures(1, 2, 0.5): 0.7823, 0.6920, 0.8999
Confusion Matrix:
 [[124.   0.]
 [ 69.  25.]] 

Learning Rate: 0.0001
Wrong guesses:         52
Accuracy:              76.14679%	Precision:             99.19355%
Recall:                70.68966%	Specificity:           97.72727%
F-Measures(1, 2, 0.5): 0.8255, 0.7500, 0.9179
Confusion Matrix:
 [[123.   1.]
 [ 51.  43.]] 

Learning Rate: 0.0002
Wrong guesses:         39
Accuracy:              82.11009%	Precision:             99.19355%
Recall:                76.39752%	Specificity:           98.24561%
F-Measures(1, 2, 0.5): 0.8632, 0.8008, 0.9361
Confusion Matrix:
 [[123.   1.]
 [ 38.  56.]] 

Learning Rate: 0.005
Wrong guesses:         16
Accuracy:              92.66055%	Precision:             99.19355%
Recall:                89.13043%	Specificity:           98.75000

In [58]:
#stochastic gradient descent
%matplotlib qt
lr = [0.00005,0.0001,0.0002 ,0.001, 0.005, 0.05, 0.5, 1]
metrics = np.zeros((2,len(lr)))
for i in range(len(lr)):
    weights = np.random.normal(0, 0.01, (len(features) + 1))
    print("Learning Rate:",lr[i])
    weights = trainStochastic(weights, samples, grounds, 40, lr[i])
    metrics[0,i],_,_,_,metrics[1,i] = forward(weights, test_samples)[1]
plotParameter(lr, metrics, ["LR","Accuracy Trial "+str(run),"F_2 Trial "+str(run)])
run += 1

Learning Rate: 5e-05
Wrong guesses:         18
Accuracy:              91.74312%	Precision:             95.16129%
Recall:                90.76923%	Specificity:           93.18182%
F-Measures(1, 2, 0.5): 0.9291, 0.9161, 0.9425
Confusion Matrix:
 [[118.   6.]
 [ 12.  82.]] 

Learning Rate: 0.0001
Wrong guesses:         17
Accuracy:              92.20183%	Precision:             99.19355%
Recall:                88.48921%	Specificity:           98.73418%
F-Measures(1, 2, 0.5): 0.9354, 0.9044, 0.9685
Confusion Matrix:
 [[123.   1.]
 [ 16.  78.]] 

Learning Rate: 0.0002
Wrong guesses:         16
Accuracy:              92.66055%	Precision:             99.19355%
Recall:                89.13043%	Specificity:           98.75000%
F-Measures(1, 2, 0.5): 0.9389, 0.9098, 0.9700
Confusion Matrix:
 [[123.   1.]
 [ 15.  79.]] 

Learning Rate: 0.001
Wrong guesses:         23
Accuracy:              89.44954%	Precision:             91.12903%
Recall:                90.40000%	Specificity:           88.17204%


In [None]:
#batch gradient descent with best learning rate
weights = np.random.normal(0, 0.01, (len(features) + 1))
weights = train(weights, samples, grounds, 40, 0.0013021)
_, _ = forward(weights, test_samples)

Wrong guesses:         16
Accuracy:              92.66055%	Precision:             94.35484%
Recall:                92.85714%	Specificity:           92.39130%
F-Measures(1, 2, 0.5): 0.9360, 0.9315, 0.9405
Confusion Matrix:
 [[117.   7.]
 [  9.  85.]] 



In [64]:
#mini-batch gradient descent with best learning rate
weights = np.random.normal(0, 0.01, (len(features) + 1))
weights, _ = trainMiniBatch(weights, samples, grounds, 40, 0.05)
_, _ = forward(weights, test_samples)

Wrong guesses:         13
Accuracy:              94.03670%	Precision:             98.38710%
Recall:                91.72932%	Specificity:           97.64706%
F-Measures(1, 2, 0.5): 0.9494, 0.9299, 0.9698
Confusion Matrix:
 [[122.   2.]
 [ 11.  83.]] 



In [67]:
#stochastic gradient descent with best learning rate
weights = np.random.normal(0, 0.01, (len(features) + 1))
weights = trainStochastic(weights, samples, grounds, 40, 0.0002)
_, _ = forward(weights, test_samples)

Wrong guesses:         16
Accuracy:              92.66055%	Precision:             99.19355%
Recall:                89.13043%	Specificity:           98.75000%
F-Measures(1, 2, 0.5): 0.9389, 0.9098, 0.9700
Confusion Matrix:
 [[123.   1.]
 [ 15.  79.]] 



In [80]:
#mini-batch gradient descent with different initializations of weights
%matplotlib qt
initials = [ np.random.normal(0, 0.01, (len(features) + 1)),
            np.ones((len(features) + 1)),
            np.zeros((len(features) + 1))]
colorLegend = [['Random','skyblue',8],['Uniform','orange',6],['Zeros','red',4]]
for idx in range(len(initials)):
    weights = initials[idx]
    weights, accuracy = trainMiniBatch(weights, samples, grounds, 40, 0.05, seed = 1)
    print(colorLegend[idx][0], "Distribution Metrics")
    _, _ = forward(weights, test_samples)
    #plot
    plt.plot(accuracy[1], accuracy[0], marker='o', color=colorLegend[idx][1], label=colorLegend[idx][0], markersize=colorLegend[idx][2], linewidth=0)
plt.title('Accuracy vs Epoch')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

Random Distribution Metrics
Wrong guesses:         11
Accuracy:              94.95413%	Precision:             98.38710%
Recall:                93.12977%	Specificity:           97.70115%
F-Measures(1, 2, 0.5): 0.9569, 0.9414, 0.9729
Confusion Matrix:
 [[122.   2.]
 [  9.  85.]] 

Uniform Distribution Metrics
Wrong guesses:         11
Accuracy:              94.95413%	Precision:             98.38710%
Recall:                93.12977%	Specificity:           97.70115%
F-Measures(1, 2, 0.5): 0.9569, 0.9414, 0.9729
Confusion Matrix:
 [[122.   2.]
 [  9.  85.]] 

Zeros Distribution Metrics
Wrong guesses:         11
Accuracy:              94.95413%	Precision:             98.38710%
Recall:                93.12977%	Specificity:           97.70115%
F-Measures(1, 2, 0.5): 0.9569, 0.9414, 0.9729
Confusion Matrix:
 [[122.   2.]
 [  9.  85.]] 

