In [1]:
# Python Standard Libraries for importing data from binary file
import os.path #for accessing the file path
import struct  #for unpacking the binary data

import time    #for calculating time
import math    #for using floor in creating minibatches


#core packages
import numpy as np
import matplotlib.pyplot as plt

#custom module
from dataPrep import load_dataset, load_sample_dataset
from dataPrep import prep_dataset, rand_mini_batches

from finalModelUtils import *
from ffnn import *

np.random.seed(1)
%matplotlib inline

In [2]:
#loading complete dataset
train_x_orig, train_y_orig, dev_x_orig,dev_y_orig,test_x_orig,test_y_orig = load_dataset()

In [3]:
#loading Sample dataset
sample_size = 25
train_x_sample, train_y_sample, dev_x_sample, dev_y_sample, test_x_sample, test_y_sample = load_sample_dataset(sample_size)

print("Sample Size : %d%%\n"%(sample_size))
print("Data\t\t\t","Complete Dataset\t","Sample Dataset\t")
print("================================================================")
print("Training Set Images:\t"+ str(train_x_orig.shape)+"\t\t"+ str(train_x_sample.shape))
print("Training Set Labels:\t"+ str(train_y_orig.shape)+"\t\t"+ str(train_y_sample.shape))
print("Training Set Images:\t"+ str(dev_x_orig.shape)+"\t\t"+ str(dev_x_sample.shape))
print("Training Set Labels:\t"+ str(dev_y_orig.shape)+"\t\t"+ str(dev_y_sample.shape))
print("Test Set Images:\t"+str(test_x_orig.shape)+"\t\t"+ str(test_x_sample.shape))
print("Test Set Labels:\t"+str(test_y_orig.shape)+"\t\t"+ str(test_y_sample.shape))
print("================================================================")


Sample Size : 25%

Data			 Complete Dataset	 Sample Dataset	
Training Set Images:	(60000, 28, 28)		(15000, 28, 28)
Training Set Labels:	(60000, 1)		(15000, 1)
Training Set Images:	(5000, 28, 28)		(1250, 28, 28)
Training Set Labels:	(5000, 1)		(1250, 1)
Test Set Images:	(5000, 28, 28)		(1250, 28, 28)
Test Set Labels:	(5000, 1)		(1250, 1)


In [4]:
# Preparing the Dataset (Flattening and Normalizing)
train_x_norm,train_y_encoded, dev_x_norm,dev_y_encoded, test_x_norm, test_y_encoded = prep_dataset(train_x_sample, train_y_sample, dev_x_sample, dev_y_sample, test_x_sample, test_y_sample)
print("Data\t\t\t","Before Processing\t","After Processing")
print("=================================================================")
print("Training Set Images:\t" + str(train_x_sample.shape)+"\t\t"+ str(train_x_norm.shape))
print("Training Set Labels:\t" + str(train_y_sample.shape)+"\t\t"+ str(train_y_encoded.shape))
print("Dev Set Images:\t\t" + str(dev_x_sample.shape)+"\t\t"+ str(dev_x_norm.shape))
print("Dev Set Labels:\t\t" + str(dev_y_sample.shape)+"\t\t"+ str(dev_y_encoded.shape))
print("Test Set Images:\t" + str(test_x_sample.shape)+"\t\t"+ str(test_x_norm.shape))
print("Test Set Labels:\t" + str(test_y_sample.shape)+"\t\t"+ str(test_y_encoded.shape))
print("=================================================================")

Data			 Before Processing	 After Processing
Training Set Images:	(15000, 28, 28)		(784, 15000)
Training Set Labels:	(15000, 1)		(10, 15000)
Dev Set Images:		(1250, 28, 28)		(784, 1250)
Dev Set Labels:		(1250, 1)		(10, 1250)
Test Set Images:	(1250, 28, 28)		(784, 1250)
Test Set Labels:	(1250, 1)		(10, 1250)


## Model Implementation

In [5]:
layers_dim = init_layers(train_x_norm, train_y_encoded, hidden_layers = [800,300])

In [None]:
hyperParams = init_hyperParams(alpha = 0.0001, num_epoch = 5, mini_batch_size = 512,lambd = 0.7,keep_probs = [0.9,0.8,])

In [None]:
parameters = train(train_x_norm, train_y_encoded,dev_x_norm, dev_y_encoded,layers_dim, hyperParams, initialization = "he", optimizer = 'adam',regularizer = 'l2') 

## Random Search 2D

### Random Sampling

In [None]:
sam = []
for i in range(100):
    r = np.random.uniform(-5, -1)
    p = 10 ** r
#     print(r,p)
    sam.append(p)


plt.hist(sam,bins = 10)
    
    
plt.xscale('log')
plt.grid()

In [6]:
def sample_hyperParams(hParam_type, rng, sample_size):

    sample = []
    
    lower_range = rng[0]
    higher_range = rng[1]
    
    
    for i in range(sample_size):
        if hParam_type == "learning_rate":
            r = np.random.uniform(lower_range,higher_range)
            p = 10 ** r
            sample.append(p)
        elif hParam_type == "minibatch_size":
            s = np.random.randint(low = lower_range, high = higher_range)
            sample.append(s)
        else:
            raise ValueError("Sampling of only learning rate and minibatch size is possible for now")
            
    assert(len(sample) == sample_size)
    return sample



In [None]:
leaning_rate_ = sample_hyperParams(hParam_type ="learning_rate",rng = [-5,-1],sample_size = 300)
minibatch_size_ = sample_hyperParams(hParam_type ="minibatch_size",rng = [50,2200],sample_size = 300)


fig, ax = plt.subplots(nrows = 2, ncols = 2, figsize=(20,10))
# ax.set_yticklabels([0,0.00001, 0.0001, 0.001, 0.01, 0.1],fontsize=16)
ax[0,0].set_ylim(top = 10e-2, bottom = 10e-6)
ax[0,0].scatter(minibatch_size_,leaning_rate_)
ax[0,0].set_yscale('log')
ax[0,0].grid()

num_mini_batch = ax[1,0].hist(minibatch_size_, bins = 20)

num_alpha = ax[1,1].hist(leaning_rate_, bins = 100)
ax[1,1].set_xscale('log') #changing the scale to log scale to plot learning rate which is in log scale
plt.show()

# print(leaning_rate_samples[0:10],minibatch_size_samples[0:10])

In [None]:
sc = np.random.uniform(0,1,100)
# print(sc)
thres = 0.90

lr = []
for i in range(100):
    r = np.random.uniform(-5, -1)
    p = 10 ** r
    lr.append(p)
    
mbz = np.random.randint(50,2200,100)
# print(lr)
# print(mbz)

best_lr = []
best_mbz = []
best_sc = []

for ind, val in enumerate(sc):
    if val > thres:
        best_lr.append(lr[ind])
        best_mbz.append(mbz[ind])
        best_sc.append(val)


# print(best_lr)
# print(best_mbz)
print(best_sc, end = "\n\n")

print(np.max(best_sc), np.argmax(best_sc))

if len(best_sc) !=0:
    
    #generating the log range
    lg_min_lr = np.log10(min(best_lr))
    lg_max_lr = np.log10(max(best_lr))
    
    print(lg_min_lr,lg_max_lr)
    #generating the minimum change
    min_change = 2 * (10 ** int(lg_min_lr-2))
    max_change = 2 * (10 ** int(lg_max_lr-2))
    print(min(best_lr),max(best_lr))
    print(min_change,max_change, end = "\n\n")
    
#   slightly expanding the range
    print(min(best_lr) - min_change,max(best_lr)+max_change)
    print(np.log10(min(best_lr) - min_change),np.log10(max(best_lr)+max_change))
    
    
    
    print(min(best_mbz), max(best_mbz))
    print(min(best_mbz)-20, max(best_mbz)+20)
    
    

else:
    print("No pair hyper parameters could give the accuracy beyond the Threshold")



In [7]:
def calculate_new_range(score, threshold, samples, summary = True):
    learning_rate = samples["learning rate"]
    minibatch_size = samples["minibatch size"]
    
    good_lrs = []
    good_mbs = []
    good_scs = []
#     best_comb = []
    
    #getting the score above the threshold and along with their corresponding hyper parameters
    for ind, val in enumerate(score):
        if val > threshold:
            good_lrs.append(learning_rate[ind])
            good_mbs.append(minibatch_size[ind])
            good_scs.append(val)
            
    if len(good_scs) !=0:
    
        #generating the log range for learning rate
        lg_min_lr = np.log10(min(good_lrs))
        lg_max_lr = np.log10(max(good_lrs))

        #generating the minimum change for learning rate
        min_lr_change = 2 * (10 ** int(lg_min_lr-2))
        max_lr_change = 2 * (10 ** int(lg_max_lr-2))
        
        #calculating the new search range in log10 form after expanding the learning rate space
        exp_lr_min = min(good_lrs) - min_lr_change
        exp_lr_max = max(good_lrs) + max_lr_change
        lr_rng_small = [np.log10(exp_lr_min), np.log10(exp_lr_max)]


        #calculating the new search range for minibatch size by expanding the search space
        mbs_rng_small = [min(good_mbs)-20, max(good_mbs) + 20]
        
        #best hyper parameter combinations
        best_comb = [np.max(good_scs),good_lrs[np.argmax(good_scs)],good_mbs[np.argmax(good_scs)]]

    else:
        print("No pair hyper parameters could give the accuracy beyond the Threshold")
    
    if summary == True:
        print("+================+===============+================+")
        print("| Validation Acc | Learning Rate | Minibatch Size |")
        print("+================+===============+================+")

        for ind,sc in enumerate(good_scs):
            print("| %.5f\t | %.5f\t | %d\t\t  |"%(sc, good_lrs[ind], good_mbs[ind]))

        print("+================+===============+================+")



        print("New Search Space for Learning Rate: [%.6f,%.6f]"%(exp_lr_min, exp_lr_max))
        print("New Search Space for Minibatch Size: [%d,%d]"%(mbs_rng_small[0], mbs_rng_small[1]))
        print("Best Score: %.6f "%(best_comb[0]))
        print("Best Hyper Params:\n Learning Rate: %.6f\n Minibatch Size: %d"%(best_comb[1], best_comb[2]))
  
        
    
    return lr_rng_small, mbs_rng_small, best_comb


In [None]:
sc = np.random.uniform(0,1,100)
# print(sc)
thres = 0.90

lr = []
for i in range(100):
    r = np.random.uniform(-5, -1)
    p = 10 ** r
    lr.append(p)
    
mbz = np.random.randint(50,2200,100)

samples = {"learning rate": lr,
               "minibatch size" : mbz}
score_threshold = 0.90


lr_rng, mbs_rng, best = calculate_new_range(sc,score_threshold,samples, summary = True)

In [8]:
#Coarse search with 2 epoch, fine search with 5 epoch and detailed search with 10 epoch

def random_search_2D(param1, param2, search_type ="coarse", evaluate_metric = "test_acc", sample_size = 100, summary = True):
    
    
    hParam_type1 = param1["hParam type"]
    hParam_range1 = param1["hParam range"]
    
    hParam_type2 = param2["hParam type"]
    hParam_range2 = param2["hParam range"]
    
    if search_type == "coarse":
        epoch_size = 2
        score_threshold = 0.90
    elif search_type == "fine":
        epoch_size = 5
        score_threshold = 0.95
    elif search_type == "detail":
        epoch_size = 10
        score_threshold = 0.97
    else:
        raise ValueError("Search Type not identified. Must be 'coarse', 'fine', or 'detail'")
    
    ##generating Samples
    leaning_rate_samples = sample_hyperParams(hParam_type = hParam_type1, rng = hParam_range1, sample_size = sample_size)
    minibatch_size_samples = sample_hyperParams(hParam_type = hParam_type2,rng = hParam_range2, sample_size = sample_size)
    
    samples = {"learning rate": leaning_rate_samples,
               "minibatch size" : minibatch_size_samples}
    

    score = []
    
    toc = time.time()
        
    for i in range(sample_size):
                
        print("\nSample: %d/%d -- Learning Rate: %.6f | Minibatch Size: %d"%(i+1,sample_size,leaning_rate_samples[i],minibatch_size_samples[i]))
        print("==========================================================================================================")
        
        hyperParams = init_hyperParams(alpha = leaning_rate_samples[i], num_epoch = epoch_size, mini_batch_size = minibatch_size_samples[i])
        
        parameters = train(train_x_norm, train_y_encoded, dev_x_norm, dev_y_encoded,layers_dim, hyperParams, initialization = "he", optimizer = 'adam', visualize = False) 
        
        if evaluate_metric == "test_acc": 
            _,test_acc,_ = predict(test_x_norm, test_y_encoded,parameters)
            score.append(test_acc)

        elif evaluate_metric == "val_acc":
            prediction_dev,dev_acc, _ = predict(dev_x_norm, dev_y_encoded,parameters)
            score.append(dev_acc)

        else:
            raise ValueError("Metric must be 'val_acc' or 'test_acc'")
        
    print("==========================================================================================================")
    
    tic = time.time() # for calculating entire search time
    hrs, mins, secs , ms = convert_time((tic - toc)*1000)
    
    print("\n\n*************************** Total Search Time = %dhr %dmins %dsecs %.2fms ***************************\n\n"%(hrs, mins, secs, ms))
    
    assert(len(score) == sample_size)
    
    if summary == True:
        print(search_type.capitalize()+" Search Summary for Hyper Parameters with high Accuracy: \n")
    lr_rng_small, mbs_rng_small, best_comb = calculate_new_range(score, score_threshold, samples, summary = summary)
    
    return lr_rng_small, mbs_rng_small, best_comb

In [9]:
#Coarse Random Search
lr_rng_coarse = [-4,-1]
mbs_rng_coarse = [50,2200]

param1_coarse = {"hParam type": "learning_rate" ,
          "hParam range": lr_rng_coarse }
    
param2_coarse = {"hParam type": "minibatch_size",
          "hParam range": mbs_rng_coarse } 
    
lr_rng_fine, mbs_rng_fine, best_comb = random_search_2D(param1 = param1_coarse, param2 = param2_coarse , search_type ="coarse", evaluate_metric = "test_acc", sample_size = 60, summary = True)



Sample: 1/60 -- Learning Rate: 0.037885 | Minibatch Size: 117

Epoch: 1/2
Epoch: 2/2

*************************** Total Training Time = 0hr 1mins 38secs 439.33ms ***************************

Sample: 2/60 -- Learning Rate: 0.004453 | Minibatch Size: 1000

Epoch: 1/2
Epoch: 2/2

*************************** Total Training Time = 0hr 0mins 14secs 225.18ms ***************************

Sample: 3/60 -- Learning Rate: 0.000237 | Minibatch Size: 1211

Epoch: 1/2
Epoch: 2/2

*************************** Total Training Time = 0hr 0mins 13secs 671.37ms ***************************

Sample: 4/60 -- Learning Rate: 0.041277 | Minibatch Size: 657

Epoch: 1/2


  cost = -(1./m) * np.sum(np.sum(np.multiply(Y,np.log(AL)), axis = 0,keepdims=True))
  cost = -(1./m) * np.sum(np.sum(np.multiply(Y,np.log(AL)), axis = 0,keepdims=True))


Epoch: 2/2

*************************** Total Training Time = 0hr 0mins 20secs 259.72ms ***************************

Sample: 5/60 -- Learning Rate: 0.000100 | Minibatch Size: 460

Epoch: 1/2
Epoch: 2/2

*************************** Total Training Time = 0hr 0mins 32secs 103.71ms ***************************

Sample: 6/60 -- Learning Rate: 0.000102 | Minibatch Size: 2175

Epoch: 1/2
Epoch: 2/2

*************************** Total Training Time = 0hr 0mins 7secs 996.15ms ***************************

Sample: 7/60 -- Learning Rate: 0.032530 | Minibatch Size: 1667

Epoch: 1/2
Epoch: 2/2

*************************** Total Training Time = 0hr 0mins 8secs 706.37ms ***************************

Sample: 8/60 -- Learning Rate: 0.002051 | Minibatch Size: 153

Epoch: 1/2
Epoch: 2/2

*************************** Total Training Time = 0hr 1mins 25secs 147.39ms ***************************

Sample: 9/60 -- Learning Rate: 0.052590 | Minibatch Size: 826

Epoch: 1/2
Epoch: 2/2

*************************** Total

  A = np.divide(t,np.sum(t,axis = 0))


3426/15000 [==>........ 23%] - 0.96s | loss: nan | acc: 0.0977 | Val loss: nan | Val acc: 0.0840 

  dZ[Z < 0] = 0


Epoch: 2/2

*************************** Total Training Time = 0hr 0mins 8secs 64.47ms ***************************

Sample: 17/60 -- Learning Rate: 0.000126 | Minibatch Size: 1174

Epoch: 1/2
Epoch: 2/2

*************************** Total Training Time = 0hr 0mins 13secs 47.01ms ***************************

Sample: 18/60 -- Learning Rate: 0.089180 | Minibatch Size: 370

Epoch: 1/2
Epoch: 2/2

*************************** Total Training Time = 0hr 0mins 30secs 472.26ms ***************************

Sample: 19/60 -- Learning Rate: 0.005315 | Minibatch Size: 790

Epoch: 1/2
Epoch: 2/2

*************************** Total Training Time = 0hr 0mins 17secs 286.46ms ***************************

Sample: 20/60 -- Learning Rate: 0.076804 | Minibatch Size: 72

Epoch: 1/2
Epoch: 2/2

*************************** Total Training Time = 0hr 2mins 39secs 925.72ms ***************************

Sample: 21/60 -- Learning Rate: 0.000176 | Minibatch Size: 835

Epoch: 1/2
Epoch: 2/2

*************************** Tot

Epoch: 2/2

*************************** Total Training Time = 0hr 0mins 8secs 686.76ms ***************************

Sample: 33/60 -- Learning Rate: 0.001022 | Minibatch Size: 105

Epoch: 1/2
Epoch: 2/2

*************************** Total Training Time = 0hr 2mins 5secs 320.31ms ***************************

Sample: 34/60 -- Learning Rate: 0.006368 | Minibatch Size: 1814

Epoch: 1/2
Epoch: 2/2

*************************** Total Training Time = 0hr 0mins 9secs 135.22ms ***************************

Sample: 35/60 -- Learning Rate: 0.037837 | Minibatch Size: 810

Epoch: 1/2
Epoch: 2/2

*************************** Total Training Time = 0hr 0mins 16secs 450.34ms ***************************

Sample: 36/60 -- Learning Rate: 0.001981 | Minibatch Size: 1298

Epoch: 1/2
Epoch: 2/2

*************************** Total Training Time = 0hr 0mins 12secs 30.95ms ***************************

Sample: 37/60 -- Learning Rate: 0.009011 | Minibatch Size: 793

Epoch: 1/2
Epoch: 2/2

*************************** To

Epoch: 2/2

*************************** Total Training Time = 0hr 0mins 17secs 65.17ms ***************************

Sample: 49/60 -- Learning Rate: 0.004712 | Minibatch Size: 1654

Epoch: 1/2
Epoch: 2/2

*************************** Total Training Time = 0hr 0mins 10secs 676.97ms ***************************

Sample: 50/60 -- Learning Rate: 0.000953 | Minibatch Size: 1586

Epoch: 1/2
Epoch: 2/2

*************************** Total Training Time = 0hr 0mins 10secs 528.55ms ***************************

Sample: 51/60 -- Learning Rate: 0.000116 | Minibatch Size: 163

Epoch: 1/2
Epoch: 2/2

*************************** Total Training Time = 0hr 1mins 19secs 557.33ms ***************************

Sample: 52/60 -- Learning Rate: 0.000649 | Minibatch Size: 202

Epoch: 1/2
Epoch: 2/2

*************************** Total Training Time = 0hr 1mins 5secs 102.39ms ***************************

Sample: 53/60 -- Learning Rate: 0.003210 | Minibatch Size: 968

Epoch: 1/2
Epoch: 2/2

*************************** 

In [12]:
#Fine Random Search

param1_fine = {"hParam type": "learning_rate" ,
          "hParam range": lr_rng_fine }
    
param2_fine = {"hParam type": "minibatch_size",
          "hParam range": mbs_rng_fine } 
    
lr_rng_detail, mbs_rng_detail, best_comb = random_search_2D(param1 = param1_fine, param2 = param2_fine , search_type ="fine", evaluate_metric = "test_acc", sample_size = 30, summary = True)



Sample: 1/30 -- Learning Rate: 0.002313 | Minibatch Size: 1216

Epoch: 1/5
Epoch: 2/5
Epoch: 3/5
Epoch: 4/5
Epoch: 5/5

*************************** Total Training Time = 0hr 0mins 33secs 646.35ms ***************************

Sample: 2/30 -- Learning Rate: 0.002382 | Minibatch Size: 1253

Epoch: 1/5
Epoch: 2/5
Epoch: 3/5
Epoch: 4/5
Epoch: 5/5

*************************** Total Training Time = 0hr 0mins 36secs 545.34ms ***************************

Sample: 3/30 -- Learning Rate: 0.002348 | Minibatch Size: 1225

Epoch: 1/5
Epoch: 2/5
Epoch: 3/5
Epoch: 4/5
Epoch: 5/5

*************************** Total Training Time = 0hr 0mins 37secs 939.72ms ***************************

Sample: 4/30 -- Learning Rate: 0.002389 | Minibatch Size: 1218

Epoch: 1/5
Epoch: 2/5
Epoch: 3/5
Epoch: 4/5
Epoch: 5/5

*************************** Total Training Time = 0hr 0mins 34secs 754.02ms ***************************

Sample: 5/30 -- Learning Rate: 0.002296 | Minibatch Size: 1227

Epoch: 1/5
Epoch: 2/5
Epoch: 3/5
Ep

Epoch: 4/5
Epoch: 5/5

*************************** Total Training Time = 0hr 0mins 32secs 998.16ms ***************************

Sample: 11/30 -- Learning Rate: 0.002055 | Minibatch Size: 1238

Epoch: 1/5
Epoch: 2/5
Epoch: 3/5
Epoch: 4/5
Epoch: 5/5

*************************** Total Training Time = 0hr 0mins 32secs 838.20ms ***************************

Sample: 12/30 -- Learning Rate: 0.002080 | Minibatch Size: 1226

Epoch: 1/5
Epoch: 2/5
Epoch: 3/5
Epoch: 4/5
Epoch: 5/5

*************************** Total Training Time = 0hr 0mins 33secs 14.72ms ***************************

Sample: 13/30 -- Learning Rate: 0.002317 | Minibatch Size: 1229

Epoch: 1/5
Epoch: 2/5
Epoch: 3/5
Epoch: 4/5
Epoch: 5/5

*************************** Total Training Time = 0hr 0mins 32secs 897.04ms ***************************

Sample: 14/30 -- Learning Rate: 0.002174 | Minibatch Size: 1251

Epoch: 1/5
Epoch: 2/5
Epoch: 3/5
Epoch: 4/5
Epoch: 5/5

*************************** Total Training Time = 0hr 0mins 30secs 820.98m

Epoch: 2/5
Epoch: 3/5
Epoch: 4/5
Epoch: 5/5

*************************** Total Training Time = 0hr 0mins 33secs 29.44ms ***************************

Sample: 21/30 -- Learning Rate: 0.002173 | Minibatch Size: 1233

Epoch: 1/5
Epoch: 2/5
Epoch: 3/5
Epoch: 4/5
Epoch: 5/5

*************************** Total Training Time = 0hr 0mins 32secs 977.81ms ***************************

Sample: 22/30 -- Learning Rate: 0.002355 | Minibatch Size: 1215

Epoch: 1/5
Epoch: 2/5
Epoch: 3/5
Epoch: 4/5
Epoch: 5/5

*************************** Total Training Time = 0hr 0mins 32secs 992.97ms ***************************

Sample: 23/30 -- Learning Rate: 0.002079 | Minibatch Size: 1241

Epoch: 1/5
Epoch: 2/5
Epoch: 3/5
Epoch: 4/5
Epoch: 5/5

*************************** Total Training Time = 0hr 0mins 32secs 928.10ms ***************************

Sample: 24/30 -- Learning Rate: 0.002106 | Minibatch Size: 1237

Epoch: 1/5
Epoch: 2/5
Epoch: 3/5
Epoch: 4/5
Epoch: 5/5

*************************** Total Training Time = 0h

Epoch: 5/5

*************************** Total Training Time = 0hr 0mins 33secs 665.81ms ***************************

Sample: 30/30 -- Learning Rate: 0.002142 | Minibatch Size: 1223

Epoch: 1/5
Epoch: 2/5
Epoch: 3/5
Epoch: 4/5
Epoch: 5/5

*************************** Total Training Time = 0hr 0mins 33secs 183.43ms ***************************


*************************** Total Search Time = 0hr 16mins 48secs 169.67ms ***************************


Fine Search Summary for Hyper Parameters with high Accuracy: 

| Validation Acc | Learning Rate | Minibatch Size |
| 0.95440	 | 0.00231	 | 1216		  |
| 0.95760	 | 0.00238	 | 1253		  |
| 0.95680	 | 0.00235	 | 1225		  |
| 0.95600	 | 0.00239	 | 1218		  |
| 0.95680	 | 0.00230	 | 1227		  |
| 0.95600	 | 0.00229	 | 1236		  |
| 0.95520	 | 0.00216	 | 1229		  |
| 0.95440	 | 0.00206	 | 1236		  |
| 0.96080	 | 0.00230	 | 1224		  |
| 0.95360	 | 0.00206	 | 1238		  |
| 0.95520	 | 0.00208	 | 1226		  |
| 0.95600	 | 0.00232	 | 1229		  |
| 0.96080	 | 0.00217	 | 1251

In [14]:
#Detail Random Search

param1_detail = {"hParam type": "learning_rate" ,
          "hParam range": lr_rng_detail }
    
param2_detail = {"hParam type": "minibatch_size",
          "hParam range": mbs_rng_detail } 
    
lr_rng_fine, mbs_rng_fine, best_comb = random_search_2D(param1 = param1_detail, param2 = param2_detail , search_type ="detail", evaluate_metric = "test_acc", sample_size = 20, summary = True)



Sample: 1/20 -- Learning Rate: 0.001904 | Minibatch Size: 1200

Epoch: 1/10
Epoch: 2/10
Epoch: 3/10
Epoch: 4/10
Epoch: 5/10
Epoch: 6/10
Epoch: 7/10
Epoch: 8/10
Epoch: 9/10
Epoch: 10/10

*************************** Total Training Time = 0hr 1mins 5secs 594.18ms ***************************

Sample: 2/20 -- Learning Rate: 0.002357 | Minibatch Size: 1199

Epoch: 1/10
Epoch: 2/10
Epoch: 3/10
Epoch: 4/10
Epoch: 5/10
Epoch: 6/10
Epoch: 7/10
Epoch: 8/10
Epoch: 9/10
Epoch: 10/10

*************************** Total Training Time = 0hr 1mins 6secs 716.29ms ***************************

Sample: 3/20 -- Learning Rate: 0.001897 | Minibatch Size: 1199

Epoch: 1/10
Epoch: 2/10
Epoch: 3/10
Epoch: 4/10
Epoch: 5/10
Epoch: 6/10
Epoch: 7/10
Epoch: 8/10
Epoch: 9/10
Epoch: 10/10

*************************** Total Training Time = 0hr 1mins 6secs 945.30ms ***************************

Sample: 4/20 -- Learning Rate: 0.001950 | Minibatch Size: 1201

Epoch: 1/10
Epoch: 2/10
Epoch: 3/10
Epoch: 4/10
Epoch: 5/10
Epoch

Epoch: 8/10
Epoch: 9/10
Epoch: 10/10

*************************** Total Training Time = 0hr 1mins 6secs 338.78ms ***************************

Sample: 7/20 -- Learning Rate: 0.002153 | Minibatch Size: 1240

Epoch: 1/10
Epoch: 2/10
Epoch: 3/10
Epoch: 4/10
Epoch: 5/10
Epoch: 6/10
Epoch: 7/10
Epoch: 8/10
Epoch: 9/10
Epoch: 10/10

*************************** Total Training Time = 0hr 1mins 7secs 624.19ms ***************************

Sample: 8/20 -- Learning Rate: 0.002107 | Minibatch Size: 1235

Epoch: 1/10
Epoch: 2/10
Epoch: 3/10
Epoch: 4/10
Epoch: 5/10
Epoch: 6/10
Epoch: 7/10
Epoch: 8/10
Epoch: 9/10
Epoch: 10/10

*************************** Total Training Time = 0hr 1mins 6secs 742.08ms ***************************

Sample: 9/20 -- Learning Rate: 0.001954 | Minibatch Size: 1200

Epoch: 1/10
Epoch: 2/10
Epoch: 3/10
Epoch: 4/10
Epoch: 5/10
Epoch: 6/10
Epoch: 7/10
Epoch: 8/10
Epoch: 9/10
Epoch: 10/10

*************************** Total Training Time = 0hr 1mins 6secs 836.71ms *****************

Epoch: 3/10
Epoch: 4/10
Epoch: 5/10
Epoch: 6/10
Epoch: 7/10
Epoch: 8/10
Epoch: 9/10
Epoch: 10/10

*************************** Total Training Time = 0hr 1mins 7secs 88.50ms ***************************

Sample: 13/20 -- Learning Rate: 0.002420 | Minibatch Size: 1204

Epoch: 1/10
Epoch: 2/10
Epoch: 3/10
Epoch: 4/10
Epoch: 5/10
Epoch: 6/10
Epoch: 7/10
Epoch: 8/10
Epoch: 9/10
Epoch: 10/10

*************************** Total Training Time = 0hr 1mins 6secs 810.42ms ***************************

Sample: 14/20 -- Learning Rate: 0.002080 | Minibatch Size: 1204

Epoch: 1/10
Epoch: 2/10
Epoch: 3/10
Epoch: 4/10
Epoch: 5/10
Epoch: 6/10
Epoch: 7/10
Epoch: 8/10
Epoch: 9/10
Epoch: 10/10

*************************** Total Training Time = 0hr 1mins 7secs 494.97ms ***************************

Sample: 15/20 -- Learning Rate: 0.001920 | Minibatch Size: 1264

Epoch: 1/10
Epoch: 2/10
Epoch: 3/10
Epoch: 4/10
Epoch: 5/10
Epoch: 6/10
Epoch: 7/10
Epoch: 8/10
Epoch: 9/10
Epoch: 10/10

*************************** To

Epoch: 10/10

*************************** Total Training Time = 0hr 1mins 2secs 404.31ms ***************************

Sample: 18/20 -- Learning Rate: 0.002145 | Minibatch Size: 1256

Epoch: 1/10
Epoch: 2/10
Epoch: 3/10
Epoch: 4/10
Epoch: 5/10
Epoch: 6/10
Epoch: 7/10
Epoch: 8/10
Epoch: 9/10
Epoch: 10/10

*************************** Total Training Time = 0hr 1mins 3secs 426.91ms ***************************

Sample: 19/20 -- Learning Rate: 0.002106 | Minibatch Size: 1266

Epoch: 1/10
Epoch: 2/10
Epoch: 3/10
Epoch: 4/10
Epoch: 5/10
Epoch: 6/10
Epoch: 7/10
Epoch: 8/10
Epoch: 9/10
Epoch: 10/10

*************************** Total Training Time = 0hr 1mins 2secs 107.57ms ***************************

Sample: 20/20 -- Learning Rate: 0.002547 | Minibatch Size: 1255

Epoch: 1/10
Epoch: 2/10
Epoch: 3/10
Epoch: 4/10
Epoch: 5/10
Epoch: 6/10
Epoch: 7/10
Epoch: 8/10
Epoch: 9/10
Epoch: 10/10

*************************** Total Training Time = 0hr 1mins 2secs 209.70ms ***************************


********

In [None]:
print(score)
print(np.where(score == np.max(score)))

In [None]:
fig, ax = plt.subplots(figsize=(8,8))
im = plt.imshow(score,cmap="RdYlGn") #RdYlGn, PiYG, Accent,Blues,viridis, YlGnBu
fig.colorbar(im,ax=ax,fraction=0.045)

m = len(lr)
n = len(mbz)
    
ax.set_title("Result of Grid Search",fontsize=24,pad = 20)
ax.set_xticks(range(0,m))
ax.set_yticks(range(0,n))
ax.set_xlabel("Learning Rate", fontsize = 20)
ax.set_ylabel("Minibatch Size", fontsize = 20)

ax.set_xticklabels(lr,fontsize=16,rotation=45)
ax.set_yticklabels(mbz,fontsize=16)

#setting horizontal axes labeling to top.
ax.xaxis.set_ticks_position('top')
ax.xaxis.set_label_position('top')


# Turn off all the ticks
ax.tick_params(top=False,left=False)


thres = [0.88, 0.96]

for i in range(n):
    for j in range(m):
        ax.text(j, i, "%.4f"%(score[i, j]),
                       ha="center", va="center", color="w" if score[i,j] > thres[1] or score[i,j] < thres[0]  else "black")


fig.tight_layout()
plt.show()


In [None]:
cm_train = confusion_matrix(train_y_sample,prediction_train)

cm_dev = confusion_matrix(dev_y_sample,prediction_dev)

cm_test = confusion_matrix(test_y_sample,prediction_test)

In [None]:
metrics, macro_metrics, acc = model_metrics(cm_train)

In [None]:
save_model(file_name = "hyperParameters", parameters = hyper_params)

In [None]:
loaded_hyper_params = load_model(file_name = "final_model_adam_dropout")

In [None]:
for key,value in loaded_hyper_params.items():
    print(key + ": " + str(value))