# Dataset Information
This notebook is used to find the average expected value of the network. This is used to inform how well/poorly the network is doing at energy regression. The loss of the network is mean squared error, so on average the network is off of the correct value by sqrt(loss)

In [1]:
#import the necessary tools
from utils import load_torch_datasets, load_torch_datasets_quant
from math import sqrt
from statistics import mean
from processing import FINN_FACILE_Preproc as preproc
from processing import FINN_FACILE_Postproc as postproc

In [2]:
#specify loss for comparison purposes
loss = 26.65
avg_error = sqrt(loss)
print(avg_error)

5.162363799656123


In [2]:
#load datasets
datasets = [None, None, None]
datasets[0], datasets[1], datasets[2], shape = load_torch_datasets_quant()

X_train shape: (229538, 14)
X_val shape: (12752, 14)
X_test shape: (12752, 14)
Y_train shape: (229538, 1)
Y_val shape: (12752, 1)
Y_test shape: (12752, 1)
Using saved split data


In [3]:
expected_vals = []
for dataset in datasets:
    for tensor in dataset:
        expected_val=tensor[-1][0].item()
        expected_vals.append(expected_val)

In [4]:
#print the average
print(mean(expected_vals))

0.04999951474704269


In [5]:
#find the mins and maxes of each column of dataset
maxes = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
mins = [9999,9999,9999,9999,9999,9999,9999,9999,9999,9999,9999,9999,9999,9999,9999]
for dataset in datasets:
    for tensor in dataset:
        #print(tensor)
        for index in range(14):
            value = tensor[0][index].item()
            currMax = maxes[index]
            currMin = mins[index]
            if(value > currMax):
                maxes[index] = value
            if(value < currMin):
                mins[index] = value
        value = tensor[-1][0].item()
        if(value > maxes[14]):
            maxes[14] = value
        if(value < mins[14]):
            mins[14] = value
print(maxes)
print(mins)

[1.0, 1.0, 0.9999998106554049, 1.0, 1.0, 0.999964471813329, 1.0, 0.9999999905851952, 0.9999999956577791, 1.0, 1.0, 0.9999999880338537, 0.9999999998399088, 0.9999999997741191, 0.9999998547861165]
[0.0, 0.0, 0.0, 0.0, 0.0, 3.223272948584546e-08, 2.5906125469281093e-09, 4.1812929975117144e-09, 2.5313367335872137e-09, -2.2611500966998698e-10, 1.6545647370087432e-09, -1.5658824307049203e-09, -7.204106391371633e-09, 1.2961479300496014e-08, 0.0]


In [4]:
from torch import tensor, float64
#compute the increment for each column with a given bitwidth
BITWIDTH = 4

incr_vals = []
increments = 2**BITWIDTH - 1
for idx in range(len(mins)):
    low = mins[idx]
    high = maxes[idx]
    rng = high - low
    incr = rng/increments
    calc_high = low + incr * increments
    incr_vals.append(incr)
    print("column: " + str(idx) + "\tmin: " + str(low) + "\tincr: " + str(incr) + "\thigh: " + str(high) + "\tcalculated_high: " + str(calc_high))
min_tensor_pre = tensor(mins[0:14], dtype=float64)
incr_tensor_pre = tensor(incr_vals[0:14], dtype=float64)
min_tensor_post = tensor(mins[14], dtype=float64)
incr_tensor_post = tensor(incr_vals[14], dtype=float64)

column: 0	min: 31.0	incr: 5.466666666666667	high: 113.0	calculated_high: 113.0
column: 1	min: 1.0	incr: 0.4	high: 7.0	calculated_high: 7.0
column: 2	min: 0.0	incr: 33.00859375	high: 495.12890625	calculated_high: 495.12890625000006
column: 3	min: -29.0	incr: 3.8666666666666667	high: 29.0	calculated_high: 29.0
column: 4	min: 1.0	incr: 4.733333333333333	high: 72.0	calculated_high: 72.0
column: 5	min: 7.618940435349941e-05	incr: 0.0008362909934173028	high: 0.012620554305613041	calculated_high: 0.012620554305613041
column: 6	min: 17.317169189453125	incr: 4353.9111470540365	high: 65325.984375	calculated_high: 65325.984375
column: 7	min: 33.44824981689453	incr: 3983.0883458455405	high: 59779.7734375	calculated_high: 59779.7734375
column: 8	min: 17.927364349365234	incr: 9595.703800710042	high: 143953.484375	calculated_high: 143953.484375
column: 9	min: 55.530860900878906	incr: 41011.318775939944	high: 615225.3125	calculated_high: 615225.3125
column: 10	min: 55.538421630859375	incr: 16988.59118

In [5]:
print(min_tensor_pre)
print(incr_tensor_pre)
print(min_tensor_post)
print(incr_tensor_post)

tensor([ 3.1000e+01,  1.0000e+00,  0.0000e+00, -2.9000e+01,  1.0000e+00,
         7.6189e-05,  1.7317e+01,  3.3448e+01,  1.7927e+01,  5.5531e+01,
         5.5538e+01,  3.6765e+01,  3.6762e+01,  3.6761e+01],
       dtype=torch.float64)
tensor([5.4667e+00, 4.0000e-01, 3.3009e+01, 3.8667e+00, 4.7333e+00, 8.3629e-04,
        4.3539e+03, 3.9831e+03, 9.5957e+03, 4.1011e+04, 1.6989e+04, 5.2231e+03,
        2.6027e+03, 2.0291e+03], dtype=torch.float64)
tensor(0., dtype=torch.float64)
tensor(33.8267, dtype=torch.float64)


In [23]:
#create new, quantized datasets based on min and incr vals above
quant_datasets = [None, None, None]
for set_idx in range(3):
    curr_set = datasets[set_idx]
    for row_idx in range(len(curr_set)):
        for col_idx in range(14):
            curr_val = curr_set[row_idx][0][col_idx].item()
            quant_val = int((curr_val - mins[col_idx])/incr_vals[col_idx])
            curr_set[row_idx][0][col_idx] = quant_val
    quant_datasets[set_idx] = curr_set

tensor([[5.8000e+01, 2.0000e+00, 3.6937e+00,  ..., 1.6452e+03, 1.5683e+03,
         9.7300e+02],
        [7.3000e+01, 3.0000e+00, 1.6363e+00,  ..., 1.0946e+03, 8.9427e+02,
         7.3725e+02],
        [7.4000e+01, 2.0000e+00, 1.1591e+01,  ..., 4.5392e+03, 1.4678e+03,
         7.6628e+02],
        ...,
        [6.4000e+01, 2.0000e+00, 9.2186e+00,  ..., 1.7354e+03, 1.1922e+03,
         6.7194e+02],
        [7.1000e+01, 2.0000e+00, 7.6534e-01,  ..., 8.9196e+02, 1.6237e+03,
         8.8821e+02],
        [9.1000e+01, 2.0000e+00, 8.1764e+00,  ..., 2.9541e+03, 1.7474e+03,
         8.3617e+02]], dtype=torch.float64)


In [6]:
print(datasets[0][:])

(tensor([[5.8000e+01, 2.0000e+00, 3.6937e+00,  ..., 1.6452e+03, 1.5683e+03,
         9.7300e+02],
        [7.3000e+01, 3.0000e+00, 1.6363e+00,  ..., 1.0946e+03, 8.9427e+02,
         7.3725e+02],
        [7.4000e+01, 2.0000e+00, 1.1591e+01,  ..., 4.5392e+03, 1.4678e+03,
         7.6628e+02],
        ...,
        [6.4000e+01, 2.0000e+00, 9.2186e+00,  ..., 1.7354e+03, 1.1922e+03,
         6.7194e+02],
        [7.1000e+01, 2.0000e+00, 7.6534e-01,  ..., 8.9196e+02, 1.6237e+03,
         8.8821e+02],
        [9.1000e+01, 2.0000e+00, 8.1764e+00,  ..., 2.9541e+03, 1.7474e+03,
         8.3617e+02]], dtype=torch.float64), tensor([[28.0091],
        [ 0.0000],
        [ 0.0000],
        ...,
        [17.4846],
        [ 0.0000],
        [48.8326]], dtype=torch.float64))


In [9]:
pre = preproc(min_tensor_pre, incr_tensor_pre)
post = postproc(min_tensor_post, incr_tensor_post)
test_tensor = datasets[0][:][0]
print(test_tensor)
result = pre(test_tensor)
print(result)

tensor([[5.8000e+01, 2.0000e+00, 3.6937e+00,  ..., 1.6452e+03, 1.5683e+03,
         9.7300e+02],
        [7.3000e+01, 3.0000e+00, 1.6363e+00,  ..., 1.0946e+03, 8.9427e+02,
         7.3725e+02],
        [7.4000e+01, 2.0000e+00, 1.1591e+01,  ..., 4.5392e+03, 1.4678e+03,
         7.6628e+02],
        ...,
        [6.4000e+01, 2.0000e+00, 9.2186e+00,  ..., 1.7354e+03, 1.1922e+03,
         6.7194e+02],
        [7.1000e+01, 2.0000e+00, 7.6534e-01,  ..., 8.9196e+02, 1.6237e+03,
         8.8821e+02],
        [9.1000e+01, 2.0000e+00, 8.1764e+00,  ..., 2.9541e+03, 1.7474e+03,
         8.3617e+02]], dtype=torch.float64)
tensor([[ 4,  2,  0,  ...,  0,  0,  0],
        [ 7,  5,  0,  ...,  0,  0,  0],
        [ 7,  2,  0,  ...,  0,  0,  0],
        ...,
        [ 6,  2,  0,  ...,  0,  0,  0],
        [ 7,  2,  0,  ...,  0,  0,  0],
        [10,  2,  0,  ...,  0,  0,  0]], dtype=torch.int8)
