In [30]:
from itertools import product as cartesian_prod
import numpy as np
"""
# calculate_all_hyperparameter_combos 
# 
# Input: N/A
# 
# Output: A 2D Float Matrix containing all the desired hyperparameters to test
# 
"""
def calculate_all_hyperparameter_combos():
    # Internal function to iterate by double
    def frange(start, stop, step):
        i = start
        while i < stop:
            yield i
            i += step
    # Gammas are incremented by 0.01
    # Alphas are incremented by 0.05
    gamma_values = [round(gamma, 2) for gamma in frange(0.01, 1.0, 0.01)]
    alpha_values = [round(alpha, 2) for alpha in frange(0.1, 1.0, 0.1)]
    # Return all possible combinations of hyperparameters using the cartesian product
    return list(cartesian_prod(gamma_values, alpha_values))

In [64]:
###
# After running all the data through the cluster, it is time to determine what the best average hyperparameters are
## 
def determine_best_hyperparameters(file_location:str, specific_value:bool, gamma:float=-1.0, alpha:float=-1.0):
    all_hyparameters = calculate_all_hyperparameter_combos()
    # Open the file
    all_values = {i:[] for i in all_hyparameters}
    # Read and store all the accuracy values
    with open(file_location, 'r') as f:
        lines = f.readlines()
        for line in lines:
            split_line = line.split(',')
            test_num = split_line[0]
            alpha = float(split_line[1])
            gamma = float(split_line[2])
            accuracy = float(split_line[5])
            key = (gamma, alpha)
            all_values[key].append(accuracy)
        # Sum all accuracy lists
        for key, accuracies in all_values.items():
            average = np.average(all_values[key])
            all_values[key] = average
        if specific_value:
            key = (gamma, alpha)
            print(key,":",all_values[key], "-", file_location)
        else:
            # Check for largest value
            max_acc = 0
            max_key = None
            for key, accuracy in all_values.items():
                if accuracy > max_acc:
                    max_key = key
                    max_acc = accuracy
            print(max_key,":",max_acc, "-", file_location)
    
    

In [63]:
determine_best_hyperparameters('HyperParameterOptimizations/train_no_weighting_runs.csv', False)
determine_best_hyperparameters('HyperParameterOptimizations/train_weighted_runs.csv', False)
determine_best_hyperparameters('HyperParameterOptimizations/test_weighted_runs.csv', False)
determine_best_hyperparameters('HyperParameterOptimizations/test_no_weighting_runs.csv', False)

(0.86, 0.1) : 0.6643585360664213 - HyperParameterOptimizations/train_no_weighting_runs.csv
(0.86, 0.1) : 0.6508344117546555 - HyperParameterOptimizations/train_weighted_runs.csv
(0.84, 0.1) : 0.7048005034990829 - HyperParameterOptimizations/test_weighted_runs.csv
(0.81, 0.1) : 0.7129023248233838 - HyperParameterOptimizations/test_no_weighting_runs.csv
