Demo of current progress with Dataoob

In [1]:
# Imports
import sklearn.metrics as metrics
import torch

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from dataoob.dataloader import data_loading, utils
device = torch.device("mps")

def cast(*args): # TODO mps
    return [
        torch.from_numpy(arg).to(dtype=torch.float32, device=device) for arg in args
    ]

Loading Data

In [4]:
# Data loading
dict_no = {}
dict_no["train"] = 1000
dict_no["valid"] = 400

print("Loading Data")
noise_idx = data_loading.load_tabular_data('adult', dict_no, 0.1)
print("Data Loaded")


Loading Data


URLError: <urlopen error [Errno 8] nodename nor servname provided, or not known>

In [3]:
(
    x_train,
    y_train,
    x_valid,
    y_valid,
    x_test,
    y_test,
    col_names,
) = data_loading.preprocess_data("minmax", "train.csv", "valid.csv", "test.csv")


In [4]:
# Prepare data
x1, y1, x2, y2 = cast(x_train, y_train, x_valid, y_valid)
y1, y2 = utils.one_hot_encode(y1, y2)

Setting up the models and default arguments

In [5]:
# Imports
from dataoob.model import ann, logistic_regression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression

from dataoob.model import ClassifierSkLearnWrapper, ClassifierUnweightedSkLearnWrapper

In [6]:
models = {
    # Wrappers for sklearn modles, makes the api more cohesive
    'sklogreg': ClassifierSkLearnWrapper(LogisticRegression(), device=device),
    'logreg': logistic_regression.LogisticRegression(x_train.shape[1]),
    'ann': ann.ANN(2),
    'knn': ClassifierUnweightedSkLearnWrapper(KNeighborsClassifier(2), device=device)
}

Some metrics

In [7]:
roc =  lambda a, b: metrics.roc_auc_score(a.detach().cpu(), b.detach().cpu())
acc = lambda a, b: metrics.accuracy_score(torch.argmax(a).detach().cpu(), torch.argmax(b).detach().cpu())

Selecting your metrics and model

In [8]:
model = models['sklogreg']
metric = roc

DVRL

In [10]:
from dataoob.dataval.dvrl.dvrl import DVRL
dvrl = DVRL(
    pred_model=model,
    metric=metric,
    x_dim=x_train.shape[1],
    y_dim=2,
    hidden_dim=100,
    layer_number=5,
    comb_dim=10,
    act_fn=torch.nn.ReLU(),
    device=device
)


In [11]:
dvrl.input_data(x1, y1, x2, y2)

dvrl.train_data_values(batch_size=128, rl_epochs=2000)
e = dvrl.evaluate_data_values(x1, y1)


  0%|          | 0/2000 [00:00<?, ?it/s]

dvrl_perf=0.8254287941787941
reward_curr=-0.0630197505197505
torch.mean(sel_prob_curr)=tensor(0.6250, device='mps:0', grad_fn=<MeanBackward0>)
loss=tensor(-5.4184, device='mps:0', grad_fn=<AddBackward0>)


  nonzero_finite_vals = torch.masked_select(
  1%|          | 24/2000 [00:02<01:40, 19.65it/s]

dvrl_perf=0.7892736486486487
reward_curr=-0.09917489604989593
torch.mean(sel_prob_curr)=tensor(0.5234, device='mps:0', grad_fn=<MeanBackward0>)
loss=tensor(-8.8080, device='mps:0', grad_fn=<AddBackward0>)


  2%|▏         | 45/2000 [00:03<01:24, 23.24it/s]

dvrl_perf=0.7671842515592515
reward_curr=-0.1212642931392931
torch.mean(sel_prob_curr)=tensor(0.4531, device='mps:0', grad_fn=<MeanBackward0>)
loss=tensor(-10.8721, device='mps:0', grad_fn=<AddBackward0>)


  3%|▎         | 63/2000 [00:03<01:22, 23.37it/s]

dvrl_perf=0.8267931392931394
reward_curr=-0.06165540540540526
torch.mean(sel_prob_curr)=tensor(0.5078, device='mps:0', grad_fn=<MeanBackward0>)
loss=tensor(-5.4981, device='mps:0', grad_fn=<AddBackward0>)


  4%|▍         | 84/2000 [00:04<01:20, 23.89it/s]

dvrl_perf=0.8516112266112267
reward_curr=-0.03683731808731794
torch.mean(sel_prob_curr)=tensor(0.4922, device='mps:0', grad_fn=<MeanBackward0>)
loss=tensor(-3.2902, device='mps:0', grad_fn=<AddBackward0>)


  5%|▌         | 105/2000 [00:05<01:17, 24.36it/s]

dvrl_perf=0.830918659043659
reward_curr=-0.05752988565488559
torch.mean(sel_prob_curr)=tensor(0.5234, device='mps:0', grad_fn=<MeanBackward0>)
loss=tensor(-5.1031, device='mps:0', grad_fn=<AddBackward0>)


  6%|▌         | 123/2000 [00:06<01:19, 23.74it/s]

dvrl_perf=0.8629158004158004
reward_curr=-0.025532744282744213
torch.mean(sel_prob_curr)=tensor(0.5781, device='mps:0', grad_fn=<MeanBackward0>)
loss=tensor(-2.2357, device='mps:0', grad_fn=<AddBackward0>)


  7%|▋         | 144/2000 [00:07<01:16, 24.38it/s]

dvrl_perf=0.8124350311850312
reward_curr=-0.07601351351351338
torch.mean(sel_prob_curr)=tensor(0.5938, device='mps:0', grad_fn=<MeanBackward0>)
loss=tensor(-6.6416, device='mps:0', grad_fn=<AddBackward0>)


  8%|▊         | 165/2000 [00:08<01:15, 24.15it/s]

dvrl_perf=0.8769815488565488
reward_curr=-0.011466995841995842
torch.mean(sel_prob_curr)=tensor(0.5312, device='mps:0', grad_fn=<MeanBackward0>)
loss=tensor(-1.0128, device='mps:0', grad_fn=<AddBackward0>)


  9%|▉         | 183/2000 [00:08<01:18, 23.25it/s]

dvrl_perf=0.7813474532224532
reward_curr=-0.10710109147609137
torch.mean(sel_prob_curr)=tensor(0.5078, device='mps:0', grad_fn=<MeanBackward0>)
loss=tensor(-9.5173, device='mps:0', grad_fn=<AddBackward0>)


 10%|█         | 204/2000 [00:09<01:13, 24.50it/s]

dvrl_perf=0.7943737006237006
reward_curr=-0.09407484407484401
torch.mean(sel_prob_curr)=tensor(0.5391, device='mps:0', grad_fn=<MeanBackward0>)
loss=tensor(-8.3207, device='mps:0', grad_fn=<AddBackward0>)


 11%|█▏        | 225/2000 [00:10<01:11, 24.78it/s]

dvrl_perf=0.8070426195426195
reward_curr=-0.08140592515592515
torch.mean(sel_prob_curr)=tensor(0.4766, device='mps:0', grad_fn=<MeanBackward0>)
loss=tensor(-7.2930, device='mps:0', grad_fn=<AddBackward0>)


 12%|█▏        | 243/2000 [00:11<01:14, 23.54it/s]

dvrl_perf=0.8360511954261954
reward_curr=-0.05239734927234918
torch.mean(sel_prob_curr)=tensor(0.6172, device='mps:0', grad_fn=<MeanBackward0>)
loss=tensor(-4.5459, device='mps:0', grad_fn=<AddBackward0>)


 13%|█▎        | 264/2000 [00:12<01:11, 24.28it/s]

dvrl_perf=0.6323089916839917
reward_curr=-0.25613955301455293
torch.mean(sel_prob_curr)=tensor(0.4609, device='mps:0', grad_fn=<MeanBackward0>)
loss=tensor(-22.9254, device='mps:0', grad_fn=<AddBackward0>)


 14%|█▍        | 285/2000 [00:12<01:11, 23.98it/s]

dvrl_perf=0.7912551975051976
reward_curr=-0.09719334719334705
torch.mean(sel_prob_curr)=tensor(0.5859, device='mps:0', grad_fn=<MeanBackward0>)
loss=tensor(-8.5005, device='mps:0', grad_fn=<AddBackward0>)


 15%|█▌        | 303/2000 [00:13<01:08, 24.81it/s]

dvrl_perf=0.8256886694386694
reward_curr=-0.06275987525987525
torch.mean(sel_prob_curr)=tensor(0.5781, device='mps:0', grad_fn=<MeanBackward0>)
loss=tensor(-5.5028, device='mps:0', grad_fn=<AddBackward0>)


 16%|█▌        | 324/2000 [00:14<01:09, 24.24it/s]

dvrl_perf=0.8628183471933472
reward_curr=-0.02563019750519746
torch.mean(sel_prob_curr)=tensor(0.5703, device='mps:0', grad_fn=<MeanBackward0>)
loss=tensor(-2.2565, device='mps:0', grad_fn=<AddBackward0>)


 17%|█▋        | 345/2000 [00:15<01:07, 24.37it/s]

dvrl_perf=0.8534303534303534
reward_curr=-0.03501819126819117
torch.mean(sel_prob_curr)=tensor(0.5156, device='mps:0', grad_fn=<MeanBackward0>)
loss=tensor(-3.0975, device='mps:0', grad_fn=<AddBackward0>)


 18%|█▊        | 363/2000 [00:16<01:05, 24.91it/s]

dvrl_perf=0.7997011434511435
reward_curr=-0.0887474012474011
torch.mean(sel_prob_curr)=tensor(0.4922, device='mps:0', grad_fn=<MeanBackward0>)
loss=tensor(-7.8813, device='mps:0', grad_fn=<AddBackward0>)


 19%|█▉        | 384/2000 [00:16<01:05, 24.61it/s]

dvrl_perf=0.8616489085239085
reward_curr=-0.0267996361746361
torch.mean(sel_prob_curr)=tensor(0.5703, device='mps:0', grad_fn=<MeanBackward0>)
loss=tensor(-2.3605, device='mps:0', grad_fn=<AddBackward0>)


 20%|██        | 405/2000 [00:17<01:05, 24.35it/s]

dvrl_perf=0.7615319646569647
reward_curr=-0.1269165800415799
torch.mean(sel_prob_curr)=tensor(0.4922, device='mps:0', grad_fn=<MeanBackward0>)
loss=tensor(-11.3649, device='mps:0', grad_fn=<AddBackward0>)


 21%|██        | 423/2000 [00:18<01:05, 23.95it/s]

dvrl_perf=0.868698024948025
reward_curr=-0.01975051975051967
torch.mean(sel_prob_curr)=tensor(0.5625, device='mps:0', grad_fn=<MeanBackward0>)
loss=tensor(-1.7328, device='mps:0', grad_fn=<AddBackward0>)


 22%|██▏       | 444/2000 [00:19<01:04, 24.11it/s]

dvrl_perf=0.8315033783783783
reward_curr=-0.05694516632016633
torch.mean(sel_prob_curr)=tensor(0.5859, device='mps:0', grad_fn=<MeanBackward0>)
loss=tensor(-4.9608, device='mps:0', grad_fn=<AddBackward0>)


 23%|██▎       | 462/2000 [00:20<01:04, 23.85it/s]

dvrl_perf=0.8486551455301454
reward_curr=-0.03979339916839919
torch.mean(sel_prob_curr)=tensor(0.5078, device='mps:0', grad_fn=<MeanBackward0>)
loss=tensor(-3.5199, device='mps:0', grad_fn=<AddBackward0>)


 24%|██▍       | 483/2000 [00:21<01:04, 23.49it/s]

dvrl_perf=0.8327377858627858
reward_curr=-0.05571075883575882
torch.mean(sel_prob_curr)=tensor(0.5703, device='mps:0', grad_fn=<MeanBackward0>)
loss=tensor(-4.9120, device='mps:0', grad_fn=<AddBackward0>)


 25%|██▌       | 504/2000 [00:21<01:00, 24.64it/s]

dvrl_perf=0.8471608627858628
reward_curr=-0.04128768191268184
torch.mean(sel_prob_curr)=tensor(0.5781, device='mps:0', grad_fn=<MeanBackward0>)
loss=tensor(-3.6130, device='mps:0', grad_fn=<AddBackward0>)


 26%|██▋       | 525/2000 [00:22<01:00, 24.37it/s]

dvrl_perf=0.7732263513513513
reward_curr=-0.1152221933471933
torch.mean(sel_prob_curr)=tensor(0.4922, device='mps:0', grad_fn=<MeanBackward0>)
loss=tensor(-10.2912, device='mps:0', grad_fn=<AddBackward0>)


 27%|██▋       | 543/2000 [00:23<00:58, 24.88it/s]

dvrl_perf=0.8122076403326404
reward_curr=-0.07624090436590425
torch.mean(sel_prob_curr)=tensor(0.6328, device='mps:0', grad_fn=<MeanBackward0>)
loss=tensor(-6.4903, device='mps:0', grad_fn=<AddBackward0>)


 28%|██▊       | 564/2000 [00:24<00:58, 24.56it/s]

dvrl_perf=0.7510070166320166
reward_curr=-0.13744152806652798
torch.mean(sel_prob_curr)=tensor(0.5234, device='mps:0', grad_fn=<MeanBackward0>)
loss=tensor(-12.0805, device='mps:0', grad_fn=<AddBackward0>)


 29%|██▉       | 585/2000 [00:25<00:59, 23.78it/s]

dvrl_perf=0.7729989604989604
reward_curr=-0.11544958419958418
torch.mean(sel_prob_curr)=tensor(0.5859, device='mps:0', grad_fn=<MeanBackward0>)
loss=tensor(-9.9894, device='mps:0', grad_fn=<AddBackward0>)


 30%|███       | 603/2000 [00:25<00:56, 24.53it/s]

dvrl_perf=0.8143840956340956
reward_curr=-0.07406444906444898
torch.mean(sel_prob_curr)=tensor(0.5234, device='mps:0', grad_fn=<MeanBackward0>)
loss=tensor(-6.6154, device='mps:0', grad_fn=<AddBackward0>)


 31%|███       | 624/2000 [00:26<00:55, 24.80it/s]

dvrl_perf=0.7105314449064448
reward_curr=-0.1779170997920998
torch.mean(sel_prob_curr)=tensor(0.5156, device='mps:0', grad_fn=<MeanBackward0>)
loss=tensor(-15.8207, device='mps:0', grad_fn=<AddBackward0>)


 32%|███▏      | 642/2000 [00:27<01:13, 18.59it/s]

dvrl_perf=0.7112785862785863
reward_curr=-0.1771699584199583
torch.mean(sel_prob_curr)=tensor(0.4453, device='mps:0', grad_fn=<MeanBackward0>)
loss=tensor(-15.9634, device='mps:0', grad_fn=<AddBackward0>)


 33%|███▎      | 663/2000 [00:28<00:58, 22.86it/s]

dvrl_perf=0.7637733887733887
reward_curr=-0.12467515592515588
torch.mean(sel_prob_curr)=tensor(0.5938, device='mps:0', grad_fn=<MeanBackward0>)
loss=tensor(-10.9079, device='mps:0', grad_fn=<AddBackward0>)


 34%|███▍      | 684/2000 [00:29<00:54, 24.11it/s]

dvrl_perf=0.7532484407484408
reward_curr=-0.13520010395010384
torch.mean(sel_prob_curr)=tensor(0.4844, device='mps:0', grad_fn=<MeanBackward0>)
loss=tensor(-11.9960, device='mps:0', grad_fn=<AddBackward0>)


 35%|███▌      | 702/2000 [00:30<01:07, 19.30it/s]

dvrl_perf=0.7332380457380456
reward_curr=-0.155210498960499
torch.mean(sel_prob_curr)=tensor(0.6094, device='mps:0', grad_fn=<MeanBackward0>)
loss=tensor(-13.4440, device='mps:0', grad_fn=<AddBackward0>)


 36%|███▌      | 723/2000 [00:31<00:57, 22.12it/s]

dvrl_perf=0.8505717255717256
reward_curr=-0.03787681912681906
torch.mean(sel_prob_curr)=tensor(0.5469, device='mps:0', grad_fn=<MeanBackward0>)
loss=tensor(-3.3011, device='mps:0', grad_fn=<AddBackward0>)


 37%|███▋      | 744/2000 [00:32<00:57, 21.87it/s]

dvrl_perf=0.8018451143451144
reward_curr=-0.08660343035343021
torch.mean(sel_prob_curr)=tensor(0.5469, device='mps:0', grad_fn=<MeanBackward0>)
loss=tensor(-7.4516, device='mps:0', grad_fn=<AddBackward0>)


 38%|███▊      | 762/2000 [00:33<01:03, 19.55it/s]

dvrl_perf=0.8055483367983367
reward_curr=-0.08290020790020791
torch.mean(sel_prob_curr)=tensor(0.6016, device='mps:0', grad_fn=<MeanBackward0>)
loss=tensor(-7.1065, device='mps:0', grad_fn=<AddBackward0>)


 39%|███▉      | 783/2000 [00:34<00:53, 22.64it/s]

dvrl_perf=0.7172882016632016
reward_curr=-0.171160343035343
torch.mean(sel_prob_curr)=tensor(0.6406, device='mps:0', grad_fn=<MeanBackward0>)
loss=tensor(-14.6848, device='mps:0', grad_fn=<AddBackward0>)


 40%|████      | 804/2000 [00:35<00:55, 21.55it/s]

dvrl_perf=0.7653976091476091
reward_curr=-0.12305093555093549
torch.mean(sel_prob_curr)=tensor(0.5078, device='mps:0', grad_fn=<MeanBackward0>)
loss=tensor(-10.9089, device='mps:0', grad_fn=<AddBackward0>)


 41%|████      | 822/2000 [00:36<01:11, 16.53it/s]

dvrl_perf=0.6844139812889813
reward_curr=-0.20403456340956327
torch.mean(sel_prob_curr)=tensor(0.5391, device='mps:0', grad_fn=<MeanBackward0>)
loss=tensor(-17.7400, device='mps:0', grad_fn=<AddBackward0>)


 42%|████▏     | 843/2000 [00:37<01:05, 17.77it/s]

dvrl_perf=0.8293269230769231
reward_curr=-0.05912162162162149
torch.mean(sel_prob_curr)=tensor(0.5938, device='mps:0', grad_fn=<MeanBackward0>)
loss=tensor(-4.9829, device='mps:0', grad_fn=<AddBackward0>)


 43%|████▎     | 862/2000 [00:38<01:02, 18.12it/s]

dvrl_perf=0.8483952702702702
reward_curr=-0.040053274428274444
torch.mean(sel_prob_curr)=tensor(0.6250, device='mps:0', grad_fn=<MeanBackward0>)
loss=tensor(-3.2765, device='mps:0', grad_fn=<AddBackward0>)


 43%|████▎     | 864/2000 [00:38<00:50, 22.47it/s]


KeyboardInterrupt: 

Data Shap

In [25]:
from dataoob.dataval.shap.shap import ShapEvaluator
ShapEvaluator(
    pred_model=model,
    metric=metric,
    GR_threshold=1.01
)

TypeError: ShapEvaluator.__init__() missing 1 required positional argument: 'pred_model'

KNN