In [1]:
import torch
import numpy as np
import torch.nn as nn
import matplotlib.pyplot as plt
from tqdm import tqdm
import pandas as pd
import random

In [2]:
from layers import CompressionLayer, QuantizationLayer, FeatureSelectionLayer, HardQuantizationLayer, HardQuantizationThresholdLayer
from models import MultiLayerPerceptron
from datasets import get_dataloader
from training_utils import train_model, eval_val, eval_quantization

In [3]:
from joblib import Parallel, delayed

## Load California Housing

In [4]:
dataset = 'California_Housing'
train_loader, val_loader, test_loader = get_dataloader(dataset = dataset)

In [54]:
def estimate_quantile(train_loader, quantiles):
    all_data = []

    # Collect all data from the train_loader
    for batch in train_loader:
        inputs, _ = batch
        all_data.append(inputs)

    # Concatenate all data along the first dimension
    all_data = torch.cat(all_data, dim=0)
    quantile_values = torch.quantile(all_data, quantiles, dim=0).transpose(0,1)
    return quantile_values

def get_quantization_thresholds(train_loader, n_bits):
    thresholds = 2 ** n_bits - 1
    quantiles = torch.arange(1 / (thresholds + 1), 1, 1 / (thresholds + 1))
    thresholds = estimate_quantile(train_loader, quantiles)
    return thresholds


In [64]:
architecture = [8,128,128,128,1]
thresholds = get_quantization_thresholds(train_loader, n_bits=2)
model = HardQuantizationThresholdLayer(thresholds=thresholds)
            

In [65]:
data = next(iter(train_loader))
inputs, targets = data
output = model(inputs)


### Grid Search for DNN

In [45]:
hidden_neurons = [128, 256, 512, 1024]
max_hidden_layers = 6
neuron_combinations = [[hidden_neuron] for hidden_neuron in hidden_neurons]
neuron_combination_dict = {1: neuron_combinations}
neuron_combinations = []
for current_layers in range(2,max_hidden_layers + 1):
    current_lists = neuron_combination_dict.get(current_layers-1)
    new_lists = []
    for current_list in current_lists:
        for hidden_neuron in hidden_neurons:
            new_list = current_list + [hidden_neuron]
            ## Only add new_list, if it first goes up in neurons and then down
            # if (np.diff(np.sign(np.diff(np.array(new_list)))) <= 0).all() & np.count_nonzero(np.diff(np.sign(np.diff(np.array(new_list))))) <=1:
            ## Only add new_list, if it first goes up in neurons and then down and only has 2 values    
            if ((np.diff(np.sign(np.diff(np.array(new_list)))) <= 0).all()) & (np.count_nonzero(np.diff(np.sign(np.diff(np.array(new_list))))) <=2) & (len(np.unique(np.array(new_list))) <= 3):
                new_lists.append(new_list)
    neuron_combination_dict.update({current_layers: new_lists})
    if current_layers > 3:
        neuron_combinations += new_lists

print(len(neuron_combinations))

170


In [46]:
device = 'cuda'

In [91]:
def random_search_hard_quantization_threshold(n_steps = 10, n_bits =8, optimize_dict = {}, device = 'cpu', when = 'pre'):
    thresholds = get_quantization_thresholds(train_loader, n_bits)

    # Define default hyperparameters
    weight_decay =  0
    learning_rate = 0.001
    neuron_combination = [256,256]
    num_epochs = 30
    add_noise = False

    # Lists to store results
    random_search_losses = []
    hyperparameter_dict = {
        'weight_decay': [],
        'learning_rate': [],
        'architecture': [],
        'num_epochs': [],
        'add_noise': []}
    

    # Perform random search
    for _ in tqdm(range(n_steps)):
        for key, value in optimize_dict.items():
            if key == 'weight_decay':
                weight_decay = random.choice(value)
            elif key == 'learning_rate':
                learning_rate = random.choice(value)
            elif key == 'neuron_combination':
                neuron_combination = random.choice(value)
            elif key == 'num_epochs':
                num_epochs = random.choice(value)    
            elif key == 'add_noise':
                add_noise = random.choice(value)    
            else:
                raise ValueError(f"Unknown hyperparameter: {key}")
            
        architecture = [8] + neuron_combination + [1]
        hyperparameter_dict['weight_decay'].append(weight_decay)
        hyperparameter_dict['learning_rate'].append(learning_rate)
        hyperparameter_dict['architecture'].append(neuron_combination)
        hyperparameter_dict['num_epochs'].append(num_epochs)
        hyperparameter_dict['add_noise'].append(add_noise)

        quantization_model = HardQuantizationThresholdLayer(thresholds=thresholds)
        mlp = MultiLayerPerceptron(architecture)

        if when == 'pre':
            training_model = nn.Sequential(
                quantization_model,
                mlp
            )
        elif when == 'post':
            training_model = mlp

        eval_model = nn.Sequential(
                quantization_model,
                mlp
            )
        

        training_model.to(device)
        eval_model.to(device)
        criterion = nn.MSELoss()
        optimizer = torch.optim.Adam(training_model.parameters(), lr=learning_rate, weight_decay=weight_decay)
        
        best_val_loss = train_model(training_model, num_epochs=num_epochs,
                    train_loader=train_loader, test_loader=test_loader,
                    optimizer=optimizer, criterion=criterion, has_quantization_layer=False,
                    train_quantization_layer=False, print_result=False,
                    add_noise=add_noise, device=device)

        val_loss = eval_val(model=eval_model,
                val_dataloader=test_loader,
                criterion=criterion, device = device)
        
        if when == 'pre':
            random_search_losses.append(best_val_loss)
        elif when == 'post':
            random_search_losses.append(val_loss)    
    # Create DataFrame with results
    results_df = pd.DataFrame({
        'Architecture': hyperparameter_dict['architecture'],
        'Loss': random_search_losses,
        'Weight Decay': hyperparameter_dict['weight_decay'],
        'Learning Rate': hyperparameter_dict['learning_rate'],
        'Num Epochs': hyperparameter_dict['num_epochs'],
        'Add Noise': hyperparameter_dict['add_noise']

    })
    results_df = results_df.sort_values('Loss')  # Sort by loss ascending    
    return results_df

In [81]:
results_df_pre = random_search_hard_quantization_threshold( n_bits = 8,
                                              n_steps = 50,
                                              optimize_dict=
                                              {'weight_decay': [0, 0.0001],
                                              'learning_rate': [0.001, 0.0001],
                                                'add_noise': [False, True],
                                              'neuron_combination': neuron_combinations,
                                              'num_epochs': [30]},
                                              device = device,
                                              when = 'pre')

100%|██████████| 50/50 [16:58<00:00, 20.37s/it]


In [87]:
results_df_pre.head()

Unnamed: 0,Architecture,Loss,Weight Decay,Learning Rate,Num Epochs,Add Noise
32,"[128, 512, 512, 512, 512, 128]",0.320809,0.0,0.001,30,False
2,"[128, 128, 128, 128, 128, 128]",0.322091,0.0001,0.001,30,False
22,"[128, 256, 1024, 1024, 1024, 1024]",0.32555,0.0001,0.001,30,False
43,"[128, 256, 256, 256, 128]",0.331088,0.0001,0.001,30,True
42,"[512, 512, 512, 512, 128]",0.336274,0.0,0.001,30,False


In [83]:
results_df_pre.to_csv(f'results/{dataset}/random_search_results_hard_quantization_quantiles_pre.csv', index=False)

In [None]:
results_df_post = random_search_hard_quantization_threshold( n_bits = 8,
                                              n_steps = 50,
                                              optimize_dict=
                                              {'weight_decay': [0, 0.0001],
                                              'learning_rate': [0.001, 0.0001],
                                                'add_noise': [False, True],
                                              'neuron_combination': neuron_combinations,
                                              'num_epochs': [30]},
                                              device = device,
                                              when = 'post')

  0%|          | 0/50 [00:00<?, ?it/s]

In [88]:
results_df_post.head()

Unnamed: 0,Architecture,Loss,Weight Decay,Learning Rate,Num Epochs,Add Noise
30,"[1024, 1024, 1024, 1024, 1024, 1024]",3.018074,0.0001,0.001,30,False
2,"[256, 512, 1024, 1024, 1024, 512]",3.307789,0.0,0.001,30,False
5,"[128, 256, 1024, 1024, 1024, 1024]",5.495318,0.0001,0.001,30,False
27,"[128, 512, 512, 512, 256]",5.691167,0.0001,0.001,30,False
10,"[256, 512, 512, 512, 128]",7.217902,0.0001,0.001,30,False


In [86]:
results_df_post.to_csv(f'results/{dataset}/random_search_results_hard_quantization_quantiles_post.csv', index=False)