Demo of current progress with Dataoob

In [1]:
# Imports
import sklearn.metrics as metrics
import torch

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from dataoob.dataloader import data_loader
device = torch.device("mps")

Loading Data

In [3]:
# Data loading
train_count = 1000
valid_count = 400
test_count = 100

(x_train, y_train), (x_valid, y_valid), (x_test, y_test) = data_loader.DataLoader(
    'adult', False, train_count, valid_count, test_count, categorical=True, device=device, 
)


Setting up the models and default arguments

In [4]:
# Imports
from dataoob.model import ann, logistic_regression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression

from dataoob.model import ClassifierSkLearnWrapper, ClassifierUnweightedSkLearnWrapper

In [5]:
models = {
    # Wrappers for sklearn modles, makes the api more cohesive
    'sklogreg': ClassifierSkLearnWrapper(LogisticRegression(), device=device),
    'logreg': logistic_regression.LogisticRegression(x_train.shape[1]),
    'ann': ann.ANN(2),
    'knn': ClassifierUnweightedSkLearnWrapper(KNeighborsClassifier(2), device=device)
}

Some metrics

In [7]:
roc =  lambda a, b: metrics.roc_auc_score(a.detach().cpu(), b.detach().cpu())
acc = lambda a, b: metrics.accuracy_score(torch.argmax(a).detach().cpu(), torch.argmax(b).detach().cpu())

Selecting your metrics and model

In [8]:
model = models['sklogreg']
metric = roc

DVRL

In [9]:
from dataoob.dataval.dvrl.dvrl import DVRL
dvrl = DVRL(
    pred_model=model,
    metric=metric,
    x_dim=x_train.shape[1],
    y_dim=2,
    hidden_dim=100,
    layer_number=5,
    comb_dim=10,
    act_fn=torch.nn.ReLU(),
    device=device
)


In [10]:
dvrl.input_data(x_train, y_train, x_valid, y_valid)

dvrl.train_data_values(batch_size=128, rl_epochs=2000)
e = dvrl.evaluate_data_values(x_train, y_train)


TypeError: ConcatDataset.__init__() takes 2 positional arguments but 4 were given

Data Shap

In [None]:
from dataoob.dataval.shap.shap import ShapEvaluator
ShapEvaluator(
    pred_model=model,
    metric=metric,
    GR_threshold=1.01
)

KNN