# Searching for Optimal Hyperparameters

In this section, I try to optimize several factors including:
  - Learning rate
  - Number of hidden layers
  - Number of neurons in each hidden layer
  - Activation function
  - Loss function

To do this, I create a number of networks and train them for 10 epochs each, taking the best of 3 runs for each network.

For each variable, I first attempted a wide range of possible values. Then, when I had a pretty good idea of what range of values yield good results, I used a ternary search to quickly converge on the optimal value.

### Utility functions

These functions will be useful for training groups of networks, and visualizing the best results.

In [None]:
from typing import *
from network import *

In [None]:
class NetworkGroup:

    def __init__(self, networks: Optional[List[Network]] = None) -> None:
        self.networks = networks
    
    def run_experiment(self, n_runs: int = 3, epochs: int = 30):
        scores = [0 for _ in range(len(self.networks))]
        for i, network in enumerate(self.networks, 1):
            print(f"Testing network {i}...")
            correct = 0
            for run in range(1, n_runs + 1):
                print(f"Training, run {run}...")
                network.reset()
                network.train(epochs, training_data)
                cur = network.evaluate(testing_data)
                percent = round(cur / len(testing_data) * 100, 2)
                print(f"Run {run}: {cur} / {len(testing_data)} ({percent}%)")
                correct = max(correct, cur)
            scores[i-1] = correct
            percent = round(correct / len(testing_data) * 100, 2)
            print(f"Best run: {correct} / {len(testing_data)} ({percent}%)")
            print()
        
        print("Experiment complete")
        sorted_networks = [(s, i) for i, s in enumerate(scores, 1)]
        sorted_networks.sort(reverse=True)
        print(f"In order of accuracy after 10 epochs:")
        t = len(testing_data)
        for score, index in sorted_networks:
            percent = round(score / t * 100, 2)
            print(f"  Network {index}: {score} / {t} ({percent}%)")
    
    def wide_search(
        self,
        constructor: Callable,
        values: List[float],
        runs: int = 3,
        epochs: int = 30
    ):
        self.networks = []
        for value in values:
            self.networks.append(constructor(value))
        
        self.run_experiment(runs, epochs)
    
    def ternary_search(self, constructor: Callable, low, high):
        pass



### One hidden layer

### Learning rate optimization

In [None]:
def learning_rate_constructor(learning_rate: float):
    return Network(
        [
            Dense(784, 30),
            Sigmoid(30),
            Dense(30, 10),
            Sigmoid(10)
        ],
        mse_prime,
        learning_rate,
        64
    )

In [None]:
Group = NetworkGroup()
Group.wide_search(learning_rate_constructor, [0.01, 0.1, 0.5, 1.0, 3.0, 10.0], 1)

NameError: name 'mse_prime' is not defined