In [None]:
!pip install scikit-learn
!pip install pandas
!pip install da4ml
# For da4ml, also required: !conda install conda-forge::verilator -y

In [None]:
import os
import random
import numpy as np
import torch.nn.functional as F
import numpy as np
from torch.utils.data import TensorDataset, DataLoader

os.environ["KERAS_BACKEND"] = "torch"
import keras
keras.backend.set_image_data_format("channels_first")
from pquant.layers import PQDense
from pquant.activations import PQActivation
from pquant import get_ebops
from da4ml.trace.ops import quantize, relu
from da4ml.trace import comb_trace, FixedVariableArrayInput, FixedVariableArray
from da4ml.codegen import VerilogModel
import random

def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
set_seed(42)

In [None]:


import pickle as pkl
from pathlib import Path

from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler


def get_data(data_path: Path, seed=42):
    try:
        import zstd
    except ImportError:
        zstd = None
    if not os.path.exists(data_path):
        print('Downloading data...')
        data = fetch_openml('hls4ml_lhc_jets_hlf')
        buf = pkl.dumps(data)
        with open(data_path, 'wb') as f:
            if zstd is not None:
                buf = zstd.compress(buf)
            f.write(buf)
    else:
        os.makedirs(data_path.parent, exist_ok=True)
        with open(data_path, 'rb') as f:
            buf = f.read()
            if zstd is not None:
                buf = zstd.decompress(buf)
            data = pkl.loads(buf)

    X, y = data['data'], data['target']
    codecs = {'g': 0, 'q': 1, 't': 4, 'w': 2, 'z': 3}
    y = np.array([codecs[i] for i in y])

    X_train_val, X_test, y_train_val, y_test = train_test_split(X, y, test_size=0.2, random_state=seed)

    X_train_val, X_test, y_train_val, y_test = X_train_val.astype(np.float32), X_test.astype(np.float32), y_train_val, y_test

    scaler = StandardScaler()
    X_train_val = scaler.fit_transform(X_train_val)
    X_test = scaler.transform(X_test)

    X_train_val = X_train_val.astype(np.float32)
    y_train_val = y_train_val.astype(np.float32)

    return X_train_val, X_test, y_train_val, y_test


X_train, X_test, y_train, y_test = get_data(Path('/tmp/inp_data.zst'))
np.random.seed(42)
random.seed(42)

In [None]:
import torch.nn.functional as F
import torch
from torch.utils.data import TensorDataset, DataLoader

X_train_t = torch.from_numpy(X_train).float()
X_test_t  = torch.from_numpy(X_test).float()

y_train_idx = torch.from_numpy(y_train).long()
y_test_idx  = torch.from_numpy(y_test).long()

y_train_oh = F.one_hot(y_train_idx, num_classes=5).float()
y_test_oh  = F.one_hot(y_test_idx,  num_classes=5).float()

train_ds = TensorDataset(X_train_t, y_train_oh)
test_ds  = TensorDataset(X_test_t,  y_test_oh)


train_loader = DataLoader(train_ds, batch_size=33200, shuffle=True, num_workers=4)
test_loader  = DataLoader(test_ds,  batch_size=33200, shuffle=False, num_workers=4)



In [None]:
from pquant import cs_config, dst_config

def build_model(config):
    class Model(torch.nn.Module):
        def __init__(self, config):
            super().__init__()
            self.dense1 = PQDense(config, 16, 64, 
                                  in_quant_bits = (1, 3, 3))
            self.relu = PQActivation(config, "relu")
            self.dense2 = PQDense(config, 64, 32)
            self.dense3 = PQDense(config, 32, 32)
            self.dense4 = PQDense(config, 32, 5, 
                                  quantize_output=True, 
                                  out_quant_bits=(1, 3, 3))

        def forward(self, x):
            x = self.relu(self.dense1(x))
            x = self.relu(self.dense2(x))
            x = self.relu(self.dense3(x))
            x = self.dense4(x)
            return x
    return Model(config)

config = dst_config()
config.training_parameters.epochs = 1000
config.quantization_parameters.default_data_integer_bits = 3.
config.quantization_parameters.default_data_fractional_bits = 2.
config.quantization_parameters.default_weight_fractional_bits = 3.
config.quantization_parameters.use_relu_multiplier = False
model = build_model(config)

model.to("cuda")
model(torch.rand(1, 16).to("cuda")) # Call once to build Keras layers

In [None]:
model

In [None]:
loss_func = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(lr=1e-2, params=model.parameters())
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[600, 800], gamma=0.1)

In [None]:
from pquant import get_layer_keep_ratio, get_model_losses
train_accuracies = []

def training_loop(model, trainloader, device, loss_func, optimizer, epoch, scheduler=None, *args, **kwargs):
    for data in trainloader:
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = loss_func(outputs, labels)
        losses = get_model_losses(model, torch.tensor(0.).to(device))
        loss += losses
        loss.backward()
        optimizer.step()
        epoch += 1
        accuracy = torch.mean((torch.argmax(outputs, dim=1) == torch.argmax(labels, dim=1)).float())
    if scheduler is not None:
        scheduler.step()
    train_accuracies.append(accuracy.cpu().numpy())

val_accuracies = []
remaining_weights = []
ebops = []
def validate_loop(model, testloader, device, loss_func, epoch, *args, **kwargs):
    correct = 0
    total = 0
    model.eval()
    with torch.no_grad():
        for data in testloader:
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = loss_func(outputs, labels)
            accuracy = torch.mean((torch.argmax(outputs, dim=1) == torch.argmax(labels, dim=1)).float())
        val_accuracies.append(accuracy.cpu().numpy())
        ratio = get_layer_keep_ratio(model)
        remaining_weights.append(ratio.cpu().numpy())
        ebops.append(get_ebops(model).cpu().numpy())


In [None]:
from pquant import train_model
model.to("cuda")
trained_model = train_model(model = model, 
                                config = config, 
                                train_func = training_loop, 
                                valid_func = validate_loop, 
                                trainloader = train_loader, 
                                device="cuda",
                                testloader = test_loader, 
                                loss_func = loss_func,
                                optimizer = optimizer,
                                scheduler=scheduler
                                )
print(f"Remaining weights={remaining_weights[-1] * 100:.2f}%", f"   EBOPs={int(ebops[-1])}", f"   Accuracy={val_accuracies[-1]*100:.2f}:%")

In [None]:
import matplotlib.pyplot as plt

plt.plot(train_accuracies, label="Train")
plt.plot(val_accuracies, label="Valid")
plt.ylabel("Accuracy")
plt.xlabel('Epoch')
plt.ylim(0.6, 0.77)
plt.legend()
plt.show()

plt.plot(remaining_weights)
plt.ylabel('Remaining weights')
plt.xlabel('Epoch')
plt.show()

plt.plot(ebops)
plt.ylabel('EBOPs')
plt.xlabel('Epoch')
plt.show()

# da4ml
For this part you need to have verilator installed (conda install conda-forge::verilator -y). We extract the weight and bias matrices from the model as numpy arrays, and build the forward pass of the model using numpy operations.

In [None]:
w0, b0 = model.dense1.weight.detach().cpu().numpy(), model.dense1.bias.detach().cpu().numpy()
w1, b1 = model.dense2.weight.detach().cpu().numpy(), model.dense2.bias.detach().cpu().numpy()
w2, b2 = model.dense3.weight.detach().cpu().numpy(), model.dense3.bias.detach().cpu().numpy()
w3, b3 = model.dense4.weight.detach().cpu().numpy(), model.dense4.bias.detach().cpu().numpy()

In [None]:
data_i = int(config.quantization_parameters.default_data_integer_bits)
data_f = int(config.quantization_parameters.default_data_fractional_bits)
data_np_test = np.clip(X_test_t, -(2**data_i), 2**data_i-2**(-data_f))
import yaml
if True:
        inp = FixedVariableArrayInput((16))
        x = quantize(inp, k=1, i=data_i, f=data_f, overflow_mode="WRAP", round_mode="RND")

        x = w0 @ x
        x = x + b0
        x = quantize(x, k=0, i=data_i, f=data_f, overflow_mode="SAT", round_mode="RND")   
        x = w1 @ x
        x = x + b1
        x = quantize(x, k=0, i=data_i, f=data_f, overflow_mode="SAT", round_mode="RND")   
        x = w2 @ x
        x = x + b2
        x = quantize(x, k=0, i=data_i, f=data_f, overflow_mode="SAT", round_mode="RND")   
        x = w3 @ x
        x = x + b3
        out = quantize(x, k=1, i=data_i, f=data_f, overflow_mode="SAT", round_mode="RND")   


        comb_logic = comb_trace(inp, out)
        verilog_model = VerilogModel(comb_logic, "vmodel", "path_to_model_folder", latency_cutoff=5, clock_uncertainty=0., part_name="xcu250-figd2104-2L-e")
        verilog_model.write()
        verilog_model.compile(verbose=True)
        
verilog_model