In [2]:
import sys
import os
import pandas as pd
import numpy as np
from sklearn.tree import DecisionTreeRegressor
from sklearn.tree import plot_tree
import matplotlib.pyplot as plt
from sklearn.inspection import permutation_importance
import matplotlib.pyplot as plt

def gen_X(num_var: int, sample_size : int):
    data_dictionary = {}
    for i in range(num_var):
        var_name = "x" + f"{i + 1}"
        data_dictionary[var_name] = list(np.random.binomial(1, .5, sample_size))
    return pd.DataFrame(data_dictionary)


In [3]:
sys.path.append('/root/barcode/')
from BarcodeScanner import tree_and_clustering, base_barcode

In [4]:
X = gen_X(3, 500)

In [7]:
y = X.apply(lambda seq: 1+ seq.x1 + seq.x2 + np.random.normal(), axis = 1)

In [9]:
barcode  = base_barcode(X, y)

In [11]:
barcode.L_inv

array([[ 1,  0,  0,  0,  0,  0,  0,  0],
       [-1,  0,  0,  0,  1,  0,  0,  0],
       [-1,  0,  1,  0,  0,  0,  0,  0],
       [-1,  1,  0,  0,  0,  0,  0,  0],
       [ 1,  0, -1,  0, -1,  0,  1,  0],
       [ 1, -1,  0,  0, -1,  1,  0,  0],
       [ 1, -1, -1,  1,  0,  0,  0,  0],
       [-1,  1,  1, -1,  1, -1, -1,  1]], dtype=int8)

In [15]:
import torch
from torch import nn

In [16]:
embedding = nn.Embedding(8, 8)

In [27]:
embedding.weight = nn.Parameter(torch.from_numpy(barcode.L_inv.T), requires_grad = False)

In [30]:
barcode.L_inv

array([[ 1,  0,  0,  0,  0,  0,  0,  0],
       [-1,  0,  0,  0,  1,  0,  0,  0],
       [-1,  0,  1,  0,  0,  0,  0,  0],
       [-1,  1,  0,  0,  0,  0,  0,  0],
       [ 1,  0, -1,  0, -1,  0,  1,  0],
       [ 1, -1,  0,  0, -1,  1,  0,  0],
       [ 1, -1, -1,  1,  0,  0,  0,  0],
       [-1,  1,  1, -1,  1, -1, -1,  1]], dtype=int8)

In [28]:
embedding.weight

Parameter containing:
tensor([[ 1, -1, -1, -1,  1,  1,  1, -1],
        [ 0,  0,  0,  1,  0, -1, -1,  1],
        [ 0,  0,  1,  0, -1,  0, -1,  1],
        [ 0,  0,  0,  0,  0,  0,  1, -1],
        [ 0,  1,  0,  0, -1, -1,  0,  1],
        [ 0,  0,  0,  0,  0,  1,  0, -1],
        [ 0,  0,  0,  0,  1,  0,  0, -1],
        [ 0,  0,  0,  0,  0,  0,  0,  1]], dtype=torch.int8)

In [29]:
embedding(torch.from_numpy(np.array([1])))

tensor([[ 0,  0,  0,  1,  0, -1, -1,  1]], dtype=torch.int8)

In [73]:
class Lasso_Barcode(nn.Module):
    def __init__(self, L_inv, num_variable):
        super().__init__()
        barcode_size = 2**num_variable
        L_inv = torch.from_numpy(L_inv.T)
        self.embedding = nn.Embedding(barcode_size, barcode_size)
        self.embedding.weight = nn.Parameter(L_inv, requires_grad = False)
        self.linear = nn.Linear(barcode_size, 1, bias = False)

    def l1_reg(self):
        with torch.no_grad():
            return self.linear.weight.abs().sum()

    def forward(self, x):
        x = self.embedding(x)
        x = self.linear(x.to(torch.float32))
        return x, self.l1_reg()

In [74]:
input_tensor = torch.from_numpy(barcode.gen_barcode(X)[:10])
input_tensor = input_tensor.to(torch.int32)

In [75]:
lasso = Lasso_Barcode(barcode.L_inv, 3)

In [76]:
lasso(input_tensor)

(tensor([[[-0.1033]],
 
         [[-0.0425]],
 
         [[-0.1992]],
 
         [[-0.1992]],
 
         [[-0.0425]],
 
         [[ 0.1261]],
 
         [[-0.0425]],
 
         [[ 0.1261]],
 
         [[ 0.1756]],
 
         [[ 0.0563]]], grad_fn=<UnsafeViewBackward0>),
 tensor(0.7227))

In [43]:
lasso.penalty()

tensor(1.3666)