In [1]:
# Basics
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
import random

# TensorFlow/Keras (Keras layers and optimizers below)
from tensorflow.keras.utils import to_categorical
from tensorflow import keras

# sklearn
from sklearn.model_selection import KFold

# Custom
import sys
sys.path.insert(0, '../src/utils')
# from trace_handler import TraceHandler
from data_loader import DataLoader
import constants
import aes
# from single_byte_evaluator import SingleByteEvaluator
sys.path.insert(0, '../src/modeling')
from network import Network
from evaluator import Evaluator
import ge

# Suppress TensorFlow messages
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1' # 1 for INFO, 2 for INFO & WARNINGs, 3 for INFO & WARNINGs & ERRORs

In [2]:
BYTE_IDX = 0
# N_CLASSES = 256 ############# look at to_categorical of DataLoader if something goes wrong

train_dl = DataLoader('/prj/side_channel/PinataTraces/datasets/SBOX_OUT/D1-K1.json', BYTE_IDX)
x_train, y_train, pltxt_train = train_dl.gen_set(train=True) # Default 80% train (40,000 train traces)
true_key_byte_train = train_dl.get_true_key_byte()

Loading the dataset... 
Dataset successfully loaded (55.64 seconds).


In [3]:
from tensorflow.keras.optimizers import SGD, Adam, RMSprop
from keras.models import Sequential
from keras.layers import Dense, Dropout, BatchNormalization

HP_CHOICES = {#'kernel_initializer': ['random_normal', 'random_uniform', 
              #                       'truncated_normal', 
              #                       'zeros', 'ones', 
              #                       'glorot_normal', 'glorot_uniform',
              #                       'he_normal', 'he_uniform',
              #                       'identity', 'orthogonal', 'constant', 'variance_scaling'],
              'kernel_initializer': ['random_normal', 'random_uniform', 'truncated_normal', 'he_normal', 'he_uniform'],  
              'activation':         ['relu', 'tanh'],
              'hidden_layers':      [1, 2, 3, 4, 5, 6],
              'hidden_neurons':     [100, 200, 300, 400, 500, 600],
              'dropout_rate':       [0.0, 0.2, 0.4],
              'optimizer':          [SGD, Adam, RMSprop],
              'learning_rate':      [1e-3, 1e-4, 1e-5, 1e-6],
              'batch_size':         [50, 100, 200, 500, 1000]}    

In [4]:
N_MODELS = 10

hps = []
for _ in range(N_MODELS):
    hps.append({k: random.choice(HP_CHOICES[k]) for k in HP_CHOICES})

In [10]:
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

callbacks = []
callbacks.append(EarlyStopping(monitor='val_loss', 
                               patience=12))
callbacks.append(ReduceLROnPlateau(monitor='val_loss',
                                   factor=0.2,
                                   patience=5,
                                   min_lr=1e-7))

In [11]:
def ge_score(ge, n=10):

    iszero = ge==0
    
    if len(iszero) == 0:
        return len(ge)

    tmp = np.concatenate(([0], iszero, [0])) # generate a vector with 1s where ge is 0 (all other elements are 0)
    start_end_zeros = np.abs(np.diff(tmp)) # generate a vector with 1s only at start_idx, end_idx of a 0-sequence in ge
    
    zero_slices = np.where(start_end_zeros==1)[0].reshape(-1, 2)
    for el in zero_slices:
        diff = el[1] - el[0]
        print(f'---> diff = {diff} <---')
        if diff >= n:
            return el[0]
        
    return len(ge)

In [12]:
N_EXP = 10
EPOCHS = 300

METRIC = 'ACC'

kf = KFold(n_splits=N_EXP)

results = [] # list of tuples (model_id, model_avg_val_loss, model_history)
for i, hp in enumerate(hps):

    net_metric = []
    for e, (train_indices, val_indices) in enumerate(kf.split(x_train)):
        print(f'----- HPs {i+1}/{N_MODELS} ||| EXP {e+1}/{N_EXP} -----')
        
        x_t = x_train[train_indices]
        y_t = y_train[train_indices]
        
        x_v = x_train[val_indices]
        y_v = y_train[val_indices]
        pltxt_v = pltxt_train[val_indices]
        
        key_bytes = np.array([aes.key_from_labels(pb, 'SBOX_OUT')
                              for pb in pltxt_v])
        
        print(':::: Computing SCORE ::::')
        net_ge = ge.guessing_entropy(model_type='MLP',
                                        n_exp=10,
                                        hp=hp,
                                        x_train=x_t,
                                        y_train=y_t,
                                        x_test=x_v,
                                        epochs=100,
                                        key_bytes=key_bytes,
                                        true_key_byte=true_key_byte_train)
        net_score = ge_score(net_ge)
        print(f':::: SCORE computed: {net_score} ::::')

        net_metric.append(net_score)
    
    net_metric = np.array(net_metric)
    net_avg_metric = np.mean(net_metric)
    
    results.append((i, net_avg_metric))
    
    print(f'Avg SCORE: {net_avg_metric}')
    print()

Model successfully trained.
---> diff = 1 <---
---> diff = 1 <---
---> diff = 1 <---
---> diff = 3911 <---
:::: SCORE computed: 89 ::::
Avg SCORE: 76.9

----- HPs 9/10 ||| EXP 1/10 -----
:::: Computing SCORE ::::
Training the model...
Model successfully trained.
:::: SCORE computed: 4000 ::::
----- HPs 9/10 ||| EXP 2/10 -----
:::: Computing SCORE ::::
Training the model...
Model successfully trained.
:::: SCORE computed: 4000 ::::
----- HPs 9/10 ||| EXP 3/10 -----
:::: Computing SCORE ::::
Training the model...
Model successfully trained.
:::: SCORE computed: 4000 ::::
----- HPs 9/10 ||| EXP 4/10 -----
:::: Computing SCORE ::::
Training the model...
Model successfully trained.
:::: SCORE computed: 4000 ::::
----- HPs 9/10 ||| EXP 5/10 -----
:::: Computing SCORE ::::
Training the model...
Model successfully trained.
:::: SCORE computed: 4000 ::::
----- HPs 9/10 ||| EXP 6/10 -----
:::: Computing SCORE ::::
Training the model...
Model successfully trained.
:::: SCORE computed: 4000 ::::
-

In [15]:
results.sort(key=lambda x: x[1])

print(f'K-Fold Crossvalidation Results: {[(idx, score) for idx, score in results]}')

K-Fold Crossvalidation Results: [(5, 27.4), (7, 76.9), (4, 261.5), (3, 461.5), (9, 3389.2), (6, 3853.9), (0, 4000.0), (1, 4000.0), (2, 4000.0), (8, 4000.0)]


In [17]:
best_net_hp = hps[results[0][0]]
best_net_hp

{'kernel_initializer': 'truncated_normal',
 'activation': 'tanh',
 'hidden_layers': 6,
 'hidden_neurons': 100,
 'dropout_rate': 0.2,
 'optimizer': keras.optimizer_v2.rmsprop.RMSprop,
 'learning_rate': 1e-05,
 'batch_size': 50}

In [18]:
from evaluator import Evaluator

ges_single = []
ges_diff = []

for i, device in enumerate(constants.DEVICES):
    for j, key in enumerate(constants.KEYS):
        
        print(f'----- {device}-{key} -----')
        
        if device == 'D1' and key == 'K1':
            path = '/prj/side_channel/PinataTraces/datasets/SBOX_OUT/D1-K1.json'
        else:
            path = f'/prj/side_channel/PinataTraces/datasets/SBOX_OUT/{device}-{key}.json'

        test_dl = DataLoader(path, BYTE_IDX)
        x_test, y_test, pltxt_test = test_dl.gen_test()
        true_key_byte = test_dl.get_true_key_byte()
        
        evaluator = Evaluator(x_test, pltxt_test, true_key_byte, 'MLP')

        ge_single_model = evaluator.guessing_entropy(n_exp=10,
                                                     hp=best_net_hp,
                                                     x_train=x_train,
                                                     y_train=y_train,
                                                     epochs=200,
                                                     single_model=True)
        
        print()
        
        ge_diff_models = evaluator.guessing_entropy(n_exp=10,
                                                    hp=best_net_hp,
                                                    x_train=x_train,
                                                    y_train=y_train,
                                                    epochs=200,
                                                    single_model=False)
        
        ges_single.append(ge_single_model)
        ges_diff.append(ge_diff_models)
        
        print()

ges_single = np.array(ges_single)
ges_diff = np.array(ges_diff)

----- D1-K1 -----
Loading the dataset... 
Dataset successfully loaded (54.91 seconds).


Recovering key-bytes: 100%|██████████| 10000/10000 [00:00<00:00, 10257.43it/s]


Training the model...


KeyboardInterrupt: 

In [None]:
N = 30

f, ax = plt.subplots(2, 1, figsize=(20,15))

device = 1
for i in range(len(ges_single)):
    
    key = i % 3
    
    ax[0].plot(ges_single[i][:N], marker='o', label=f'D{device}-K{key+1}')
    ax[1].plot(ges_diff[i][:N], marker='o', label=f'D{device}-K{key+1}')
    
    if key == 2:
        device += 1

ax[0].plot(np.zeros(N), color='r', ls='--')
ax[0].set_title('GE SINGLE MODEL: D1-K1 for training')
ax[0].set_xlabel('Number of Traces')
ax[0].set_ylabel('Guessing Entropy')
ax[0].set_xticks(range(N))
ax[0].grid()
ax[0].legend()

ax[1].plot(np.zeros(N), color='r', ls='--')
ax[1].set_title('GE INDEPENDENT MODELS: D1-K1 for training')
ax[1].set_xlabel('Number of Traces')
ax[1].set_ylabel('Guessing Entropy')
ax[1].set_xticks(range(N))
ax[1].grid()
ax[1].legend()

plt.show()

In [None]:
import matplotlib.colors as colors

N = 30

f, ax = plt.subplots(len(constants.DEVICES), len(constants.KEYS), figsize=(30,15))

row = 0
for i in range(len(ges_single)):
    
    col = i % 3
    
    ax[row, col].plot(ges_single[i][:N], marker='o', color=list(colors.TABLEAU_COLORS.keys())[i])
    ax[row, col].plot(np.zeros(N), color='r', ls='--')
    ax[row, col].set_title(f'GE SINGLE MODEL: D{row+1}-K{col+1}')
    ax[row, col].set_xlabel('Number of Traces')
    ax[row, col].set_ylabel('Guessing Entropy')
    ax[row, col].grid()
    ax[row, col].set_xticks(range(N))
    
    if col == 2:
        row += 1

plt.show()

In [None]:
import matplotlib.colors as colors

N = 30

f, ax = plt.subplots(len(constants.DEVICES), len(constants.KEYS), figsize=(30,15))

row = 0
for i in range(len(ges_diff)):
    
    col = i % 3
    
    ax[row, col].plot(ges_diff[i][:N], marker='o', color=list(colors.TABLEAU_COLORS.keys())[i])
    ax[row, col].plot(np.zeros(N), color='r', ls='--')
    ax[row, col].set_title(f'GE INDEPENDENT MODELS: D{row+1}-K{col+1}')
    ax[row, col].set_xlabel('Number of Traces')
    ax[row, col].set_ylabel('Guessing Entropy')
    ax[row, col].grid()
    ax[row, col].set_xticks(range(N))
    
    if col == 2:
        row += 1

plt.show()

In [None]:
f, ax = plt.subplots(2, 1, figsize=(20,15))

device = 1
for i in range(len(ges_single)):
    
    key = i % 3
    
    ax[0].plot(ges_single[i][:2000], label=f'D{device}-K{key+1}')
    ax[1].plot(ges_diff[i][:2000], label=f'D{device}-K{key+1}')
    
    if key == 2:
        device += 1

ax[0].set_title('GE SINGLE MODEL: D1-K1 for training')
ax[0].set_xlabel('Number of Traces')
ax[0].set_ylabel('Guessing Entropy')
ax[0].grid()
ax[0].legend()

ax[1].set_title('GE INDEPENDENT MODELS: D1-K1 for training')
ax[1].set_xlabel('Number of Traces')
ax[1].set_ylabel('Guessing Entropy')
ax[1].grid()
ax[1].legend()

plt.show()