# PART C (Neural Netwroks)

**import:**
* Layer class
* Neural Networks class
* load_datasets() util method

*Exploding gradients were observed when tried to train the model with inappropriate hyperparameters, such as big learning rate and units for hidden layer. In order to avoid runtime warnings that are destined to happen and continue training with other hyperparameters, without interrupting the code, we ignore warnings so that we can find the appropriate  hyperparameters.*

In [1]:
from ipynb.fs.full.NeuralNetworks import *
from ipynb.fs.full.LoadDatasets import *

import warnings
warnings.filterwarnings("ignore")

import gc

In [2]:
X_train, y_train, X_dev, y_dev, X_test, y_test = load_datasets()

**The basic model**

In [3]:
def model(X, y, M=16, learning_rate=0.5, epochs=100, early_stopping=True, B=128, X_dev=None, y_dev=None, use_tqdm=True):
    hidden_layer = Layer(M, X.shape[1], activation='relu')
    output_layer = Layer(1, M, activation='sigmoid')
    
    nn = NeuralNetwork()
    nn.input_layer(X, y.reshape(-1))
    
    nn.add_layer(hidden_layer)
    nn.add_layer(output_layer)
    
    nn.fit(learning_rate=learning_rate, epochs=epochs, early_stopping=early_stopping, X_dev=X_dev, y_dev=y_dev, B=B, use_tqdm=use_tqdm)
    
    return nn

In [4]:
nn1 = model(X_train, y_train, early_stopping=True, B=X_train.shape[0], X_dev=X_dev, y_dev=y_dev)

 27%|█████████████████████▊                                                           | 27/100 [00:00<00:01, 55.06it/s]


In [5]:
print("Test Accuracy: ", f'{nn1.accuracy(X_test, y_test.reshape(-1)):.4f}')
print("Epochs: ", nn1.epochs)

Test Accuracy:  0.9778
Epochs:  28


**Initializes**
* **the learning rates randomly** 
As we want alphas between the range [$10^{-5}$, $0.5$] :
$10^{-5*random}$ should be less or equal than 0.5 and we end up that random >= $\frac{log(2)}{5*log(10)}$


* **the M array (the number of units of hidden layer)**

In [6]:
import random 

random.seed(42)
start = np.log(2)/(5*np.log(10))
alphas = np.array([10**(-5*random.uniform(start,1)) for i in range(10)])
M = [2**i for i in range(1, 10 + 1)]

**Function that tunes the hyperparameters and returns the best:**
* **best_alpha** = Learning rate
* **best_M** = Number of units for the hidden layer
* **best_epochs** = epochs

In [7]:
def tune_model(X_train, y_train, M, alphas, max_epochs=100, early_stopping=False, X_dev=None, y_dev=None, use_tqdm=True, B=128):
    min_cost = 10**10
    best_alpha = 0
    best_M = 0
    best_epochs = 0

    for m in tqdm(M):
        for alpha in alphas:
            nn = model(X_train, y_train, M=m, learning_rate=alpha, epochs = max_epochs,
                       early_stopping=early_stopping, X_dev=X_dev, y_dev=y_dev, use_tqdm=False, B=B)
            
            current_best_cost_index = np.argmin(nn.J)
            if nn.J[current_best_cost_index] < min_cost:
                min_cost=nn.J[current_best_cost_index]
                best_alpha=alpha
                best_M=m
                best_epochs=current_best_cost_index + 1
                
            del nn
            gc.collect()
    
    nn = model(X_train, y_train, M=best_M, learning_rate=best_alpha, epochs = best_epochs,
                   early_stopping=False, X_dev=X_dev, y_dev=y_dev, use_tqdm=False, B=B)
    
    return nn, best_alpha, best_M, best_epochs, min_cost

In [8]:
nn2, best_alpha, best_M, best_epochs, min_cost = tune_model(X_train, y_train, M=M, alphas=alphas, early_stopping=True, X_dev=X_dev, y_dev=y_dev, B=X_train.shape[0])

100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [11:53<00:00, 71.38s/it]


In [9]:
print("Test Accuracy: ", f'{nn2.accuracy(X_dev, y_dev.reshape(-1)):.4f}')
print("Best alpha: ", f'{best_alpha:.5f}')
print("Best M: ", best_M)
print("best_epochs: ", best_epochs)

Test Accuracy:  0.9780
Best alpha:  0.19519
Best M:  16
best_epochs:  100


**Initialize the batch sizes**

In [10]:
B = np.array([2**i for i in range(1, 8 + 1)])

**Find best mini_batch size using the basic model**

In [11]:
min_cost = 10**10
best_B = X_train.shape[0]
best_B_epochs=100

for batch_size in tqdm(B):
    nn = model(X_train, y_train, early_stopping=True, B=batch_size, X_dev=X_dev, y_dev=y_dev, use_tqdm=False)

    current_best_cost_index = np.argmin(nn.J)
    if nn.J[current_best_cost_index] < min_cost:
        min_cost=nn.J[current_best_cost_index]
        best_B=batch_size
        best_B_epochs=current_best_cost_index + 1

nn3 = model(X_train, y_train, epochs=best_B_epochs, early_stopping=False, B=best_B, X_dev=X_dev, y_dev=y_dev, use_tqdm=True)

100%|████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:04<00:00,  1.96it/s]
100%|██████████████████████████████████████████████████████████████████████████████████| 14/14 [00:00<00:00, 38.59it/s]


In [12]:
print("Test Accuracy: ", f'{nn3.accuracy(X_test, y_test.reshape(-1)):.4f}')
print("Mini batch size: ", best_B)
print("Epochs: ", nn3.epochs)

Test Accuracy:  0.9654
Mini batch size:  64
Epochs:  14


**Find best mini_batch size, the best learning rate, best epochs and M(neurons of hidden layer)**

In [None]:
min_B_cost = 10**10
best_B = X_train.shape[0]
best_B_epochs=100
best_B_alpha=0
best_B_M = X_train.shape[0]

for batch_size in B:
    nn, best_alpha, best_M, best_epochs, min_cost = tune_model(X_train, y_train, M=M, alphas=alphas, early_stopping=True, X_dev=X_dev, 
                                                      y_dev=y_dev, B=batch_size, use_tqdm=False)

    if min_cost < min_B_cost:
        best_B=batch_size
        best_B_epochs=best_epochs
        best_B_alpha=best_alpha
        best_B_M=best_M

nn4 = model(X_train, y_train, M=best_M, learning_rate=best_alpha, epochs=best_B_epochs,
                   early_stopping=False, X_dev=X_dev, y_dev=y_dev, use_tqdm=True, B=best_B)

In [None]:
print("Test Accuracy: ", f'{nn4.accuracy(X_test, y_test.reshape(-1)):.4f}')
print("Learning rate: ", f'{best_alpha:.5f}')
print("Mini batch size: ", best_B)
print("Number of units: ", best_B)
print("Epochs: ", best_B_epochs)