In [15]:
import torch
import lightgbm as lgb
from tqdm import tqdm
from loaders.lending_loader import load_data, mono_list
from monotonenorm import SigmaNet, GroupSort
from sklearn.metrics import balanced_accuracy_score, accuracy_score
import numpy as np
import mup
import pandas as pd

device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")

In [56]:
Xtr, Ytr, Xts, Yts = load_data(get_categorical_info=False)
monotonic_constraints = [int(i in mono_list) for i in range(Xtr.shape[1])]

In [57]:
pd.DataFrame(Xtr).describe()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,18,19,20,21,22,23,24,25,26,27
count,418697.0,418697.0,418697.0,418697.0,418697.0,418697.0,418697.0,418697.0,418697.0,418697.0,...,418697.0,418697.0,418697.0,418697.0,418697.0,418697.0,418697.0,418697.0,418697.0,418697.0
mean,0.334036,0.011373,0.406938,0.992018,0.018809,0.365412,0.345099,0.304984,0.000225,0.471998,...,0.061161,0.005493,0.021524,0.011414,0.007442,0.057581,0.00075,0.014263,0.00648,0.001595
std,0.224993,0.031971,0.369318,0.00722,0.009673,0.223429,0.193078,0.460401,0.014982,0.499216,...,0.239626,0.073913,0.145123,0.106225,0.085946,0.23295,0.027375,0.118574,0.080235,0.039911
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.166667,0.0,0.0,0.990526,0.012342,0.185751,0.213006,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,0.333333,0.0,0.4,0.993158,0.018288,0.320611,0.325156,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,0.5,0.0,0.8,0.995158,0.024775,0.491094,0.46067,1.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
max,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [58]:
Xtr = Xtr[:10000]
Ytr = Ytr[:10000]
#Xts = Xts[:1000]
#Yts = Yts[:1000]

In [59]:
clf = lgb.LGBMRegressor(n_estimators=10000, max_depth=5, learning_rate=.1, monotone_constraint=monotonic_constraints)
clf.fit(Xtr, Ytr, early_stopping_rounds=200, eval_set=[(Xts, Yts)], eval_metric='mse', verbose=0)

LGBMRegressor(max_depth=5,
              monotone_constraint=[1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                                   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
              n_estimators=10000)

In [60]:
acc, bacc = 0,0
for i in np.linspace(0,1,100):
  acc = max(acc, accuracy_score(Yts, clf.predict(Xts)>i))
  bacc = max(bacc, balanced_accuracy_score(Yts, clf.predict(Xts)>i))
print(f"Accuracy: {acc*100:.4f}%, Balanced Accuracy: {bacc*100:.4f}%")

Accuracy: 65.0715%, Balanced Accuracy: 65.1389%


In [74]:
torch.manual_seed(13)

per_layer_lip = 2
width = 32

class Model(torch.nn.Module):
  def __init__(self, width, robust=False, sigma=False):
    super().__init__()
    if robust:
      from monotonenorm import direct_norm
      activation = lambda : GroupSort(2)
    else:
      direct_norm = lambda x, *args, **kwargs: x # make it a normal network
      activation = lambda : torch.nn.ReLU()

    self.nn = torch.nn.Sequential(
      direct_norm(torch.nn.Linear(Xtr.shape[1], width), kind="one-inf", alpha=per_layer_lip),
      activation(),
      direct_norm(torch.nn.Linear(width, width), kind="inf", alpha=per_layer_lip),
      activation(),
      direct_norm(torch.nn.Linear(width, width), kind="inf", alpha=per_layer_lip),
      activation(),
      direct_norm(mup.MuReadout(width, 1), kind="inf", alpha=per_layer_lip),
      torch.nn.Sigmoid()
    )
    if sigma:
      self.nn = SigmaNet(self.nn, sigma=per_layer_lip**4, monotone_constraints=monotonic_constraints)
  
  def forward(self, x):
    return self.nn(x)

base = Model(1)
delta = Model(2)
model = Model(width)
mup.set_base_shapes(model, base, delta=delta)

for param in model.parameters():
    ### If initializing manually with fixed std or bounds,
    ### then replace with same function from mup.init
    # torch.nn.init.uniform_(param, -0.1, 0.1)
    mup.init.uniform_(param, -0.1, 0.1)
    ### Likewise, if using
    ###   `xavier_uniform_, xavier_normal_, kaiming_uniform_, kaiming_normal_`
    ### from `torch.nn.init`, replace with the same functions from `mup.init`


model = model.to(device)

optimizer = mup.MuAdam(model.parameters(), lr=1e-3)
print('params:', sum(p.numel() for p in model.parameters()))
#scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=.999)

params: 3073


In [75]:
print(model)

Model(
  (nn): Sequential(
    (0): Linear(in_features=28, out_features=32, bias=True)
    (1): ReLU()
    (2): Linear(in_features=32, out_features=32, bias=True)
    (3): ReLU()
    (4): Linear(in_features=32, out_features=32, bias=True)
    (5): ReLU()
    (6): MuReadout(in_features=32, out_features=1, bias=True)
  )
)


In [76]:
Xtrt = torch.tensor(Xtr, dtype=torch.float32).to(device)
Ytrt = torch.tensor(Ytr, dtype=torch.float32).view(-1, 1).to(device)
Xtst = torch.tensor(Xts, dtype=torch.float32).to(device)
Ytst = torch.tensor(Yts, dtype=torch.float32).view(-1, 1).to(device)

#between -1 and 1
# min_ = Xtrt.min()
# max_ = Xtrt.max()
# Xtrt = (Xtrt - min_) / (max_ - min_) * 2 - 1 
# Xtst = (Xtst - min_) / (max_ - min_) * 2 - 1

#std = 1
mean = Xtrt.mean(0)
std = Xtrt.std(0)
Xtrt = (Xtrt - mean) / std
Xtst = (Xtst - mean) / std


dataloader = torch.utils.data.DataLoader(torch.utils.data.TensorDataset(Xtrt, Ytrt), batch_size=int(2**9), shuffle=True)

bar = tqdm(range(1000))
for i in bar:
  for Xi, yi in dataloader:
    y_pred = model(Xi)
    losstr = torch.nn.functional.mse_loss(y_pred, yi)
    optimizer.zero_grad()
    losstr.backward()
    optimizer.step()
    #scheduler.step()

  with torch.no_grad():
    y_predts = model(Xtst)
    lossts = torch.nn.functional.mse_loss(y_predts, Ytst)
    if i % 1 == 0:
      acc = 0
      for i in np.linspace(0, 1, 50):
        acc = max(acc, accuracy_score(Ytst.cpu().numpy(), y_predts.cpu().numpy()>i))
    bar.set_description(f'Loss: {losstr.item():.4f} {lossts.item():.4f}, acc: {acc.item():.4f}')

Loss: 0.2266 0.2182, acc: 0.6496:  10%|▉         | 95/1000 [00:26<04:16,  3.52it/s]


KeyboardInterrupt: 