In [1]:
import sys
import os
import pandas as pd
import numpy as np
from sklearn.tree import DecisionTreeRegressor
from sklearn.tree import plot_tree
import matplotlib.pyplot as plt
from sklearn.inspection import permutation_importance
import matplotlib.pyplot as plt

def gen_X(num_var: int, sample_size : int):
    data_dictionary = {}
    for i in range(num_var):
        var_name = "x" + f"{i + 1}"
        data_dictionary[var_name] = list(np.random.binomial(1, .5, sample_size))
    return pd.DataFrame(data_dictionary)


In [2]:
sys.path.append('/root/barcode/')
from BarcodeScanner import base_barcode

In [118]:
X = gen_X(3, 2000)

In [119]:
y = X.apply(lambda seq: 1+ seq.x1 + seq.x2 + seq.x2*seq.x3 + np.random.normal(scale = 1.4), axis = 1)

In [120]:
barcode = base_barcode(X, y)

In [121]:
import torch
from torch import nn

In [215]:
class Lasso_Barcode(nn.Module):
    def __init__(self, L_inv, num_variable):
        super().__init__()
        barcode_size = 2**num_variable
        L_inv = torch.from_numpy(L_inv.astype(float))
        self.embedding = nn.Embedding(barcode_size, barcode_size)
        self.embedding.weight = nn.Parameter(L_inv.to(torch.float32), requires_grad = False)
        self.linear = nn.Linear(barcode_size, 1, bias = False, dtype = torch.float32)

    def l1_reg(self):
        return torch.abs(self.linear.weight).sum()

    def forward(self, x):
        x = self.embedding(x)
        x = self.linear(x)
        return x, self.l1_reg()

In [216]:
input_tensor = torch.from_numpy(base_barcode.gen_barcode(X).reshape(-1))
input_tensor = input_tensor.to(torch.int32)

In [217]:
lasso = Lasso_Barcode(barcode.L, 3)

In [218]:
lasso.linear.weight

Parameter containing:
tensor([[-0.1291, -0.2770,  0.2925,  0.0964,  0.0875, -0.1415, -0.2600, -0.0501]],
       requires_grad=True)

In [512]:
import torch.optim as optim

criterion = nn.MSELoss()
optimizer = optim.Adam(lasso.parameters(), lr=0.01)


In [572]:
# input_tensor = torch.from_numpy(barcode.gen_barcode(X).reshape(-1))
# input_tensor = input_tensor.to(torch.int32)

alpha = 0.6


In [573]:
output_feature = torch.from_numpy(y.to_numpy().reshape(-1,1))
output_feature = output_feature.to(torch.float32)

In [580]:
num_epochs = 1000
for epoch in range(num_epochs):
    # Forward pass
    optimizer.zero_grad()

    outputs, l1_reg = lasso(input_tensor)
    # loss = criterion(outputs, output_feature)

    loss = criterion(outputs, output_feature) + alpha * l1_reg  # Total loss with L1 regularization
    loss.backward()
    optimizer.step()
    # lasso.linear.weight = nn.Parameter((lasso.linear.weight[0] * lasso.linear.weight[0].abs().gt(0.01).to(torch.uint8)).reshape(1, -1))
    # optimizer = optim.SGD(lasso.parameters(), lr=0.001)


In [581]:
lasso.linear.weight[0].abs().sum()

tensor(2.6277, grad_fn=<SumBackward0>)

In [523]:
lasso.linear.weight[0].abs().sum()

tensor(3.7366, grad_fn=<SumBackward0>)

In [582]:
lasso.linear.weight[0].abs().lt(0.01).sum()

tensor(5)

In [583]:
lasso.linear.weight[0]

tensor([ 1.3022e+00,  4.0022e-01,  9.0928e-01,  5.5994e-03, -1.3209e-04,
        -1.5695e-03,  5.4659e-03,  3.2725e-03], grad_fn=<SelectBackward0>)