# PART C (Neural Netwroks)

import:
* Layer class
* Neural Networks class
* load_datasets() util method

In [None]:
from ipynb.fs.full.NeuralNetworks import *
from ipynb.fs.full.LoadDatasets import *

In [None]:
X_train, y_train, X_dev, y_dev, X_test, y_test = load_datasets()

**The basic model**

In [None]:
def model(X, y, M=4, learning_rate=0.1, epochs=500, early_stopping=True, B=128, X_dev=None, y_dev=None, use_tqdm=True):
    hidden_layer = Layer(M, X.shape[1])
    output_layer = Layer(1, M)
    
    nn = NeuralNetwork()
    nn.input_layer(X, y.reshape(-1))
    
    nn.add_layer(hidden_layer)
    nn.add_layer(output_layer)
    
    nn.fit(learning_rate=learning_rate, epochs=epochs, early_stopping=early_stopping, X_dev=X_dev, y_dev=y_dev, B=B, use_tqdm=use_tqdm)
    
    return nn

In [None]:
nn = model(X_train, y_train, early_stopping=True, B=X_train.shape[0], X_dev=X_dev, y_dev=y_dev)

In [None]:
print("Test Accuracy: ", f'{nn.accuracy(X_test, y_test.reshape(-1)):.3f}')
print("Epochs: ", nn.epochs)

**Initializes**
* **the learning rates randomly** 
* **the M array (the number of units of hidden layer)**

In [None]:
import random 

random.seed(42)
alphas = np.array([10**(-5*random.random()) for i in range(10)])
M = [2**i for i in range(1, 10 + 1)]

**Function that tunes the hyperparameters and returns the best:**
* **best_alpha** = Learning rate
* **best_M** = Number of units for the hidden layer
* **best_epochs** = epochs

In [None]:
def tune_model(B=128):
    max_epochs = 500
    min_cost = 10**10
    best_alpha = 0
    best_M = 0
    best_epochs = 0

    for m in tqdm(M):
        for alpha in alphas:
        
            nn = model(X_train, y_train, M=m, learning_rate=alpha, epochs = max_epochs,
                       early_stopping=True, X_dev=X_dev, y_dev=y_dev, use_tqdm=False, B=B)

            current_best_cost_index = np.argmin(nn.J)
        
            if nn.J[current_best_cost_index] < min_cost:
                min_cost=nn.J[current_best_cost_index]
                best_alpha=alpha
                best_M=m
                best_epochs=current_best_cost_index + 1
    
    nn = model(X_train, y_train, M=best_M, learning_rate=best_alpha, epochs = best_epochs,
                   early_stopping=False, X_dev=X_dev, y_dev=y_dev, use_tqdm=False, B=B)
    
    return nn, best_alpha, best_M, best_epochs

In [None]:
nn, best_alpha, best_M, best_epochs = tune_model(B=X_train.shape[0])

In [None]:
print("Test Accuracy: ", f'{nn.accuracy(X_dev, y_dev.reshape(-1)):.3f}')
print("Best alpha: ", f'{best_alpha:.5f}')
print("Best M: ", best_M)
print("best_epochs: ", best_epochs)

**Initialize the batch sizes**

In [None]:
B = np.array([2**i for i in range(8 + 1)])

**Find best mini_batch size using the basic model**

In [None]:
min_cost = 10**10
best_B = X_train.shape[0]
best_B_epochs=500

for batch_size in tqdm(B):
    nn = model(X_train, y_train, early_stopping=True, B=batch_size, X_dev=X_dev, y_dev=y_dev, use_tqdm=False)

    current_best_cost_index = np.argmin(nn.J)
    if nn.J[current_best_cost_index] < min_cost:
        min_cost=nn.J[current_best_cost_index]
        best_B=batch_size
        best_B_epochs=current_best_cost_index + 1

nn = model(X_train, y_train, epochs=best_B_epochs, early_stopping=False, B=best_B, X_dev=X_dev, y_dev=y_dev, use_tqdm=True)

In [None]:
print("Test Accuracy: ", f'{nn.accuracy(X_test, y_test.reshape(-1)):.3f}')
print("B: ", best_B)
print("Epochs: ", nn.epochs)

**Find best mini_batch size using the best learning rate, best epochs and M(neurons of hidden layer)**

In [None]:
min_cost = 10**10
best_B = X_train.shape[0]

for batch_size in tqdm(B):
    nn = model(X_train, y_train, M=best_M, B=batch_size, learning_rate=best_alpha, epochs = best_epochs,
                   early_stopping=False, X_dev=X_dev, y_dev=y_dev, use_tqdm=False)
    
    current_cost=nn.J[nn.J.shape[0] - 1]
    if current_cost < min_cost:
        min_cost=current_cost
        best_B=batch_size
        
nn = model(X_train, y_train, M=best_M, learning_rate=best_alpha, epochs=best_epochs,
                   early_stopping=False, X_dev=X_dev, y_dev=y_dev, use_tqdm=True, B=best_B)

In [None]:
print("Test Accuracy: ", f'{nn.accuracy(X_test, y_test.reshape(-1)):.3f}')
print("B: ", best_B)
print("Epochs: ", nn.epochs)