In [1]:
import numpy as np
import pandas as pd

from hulearn.datasets import load_titanic
from sklearn.model_selection import GridSearchCV

In [2]:
df = load_titanic(as_frame=True)
X, y = df.drop(columns=['survived']), df['survived']

In [3]:
df.head()

Unnamed: 0,survived,pclass,name,sex,age,fare,sibsp,parch
0,0,3,"Braund, Mr. Owen Harris",male,22.0,7.25,1,0
1,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,71.2833,1,0
2,1,3,"Heikkinen, Miss. Laina",female,26.0,7.925,0,0
3,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,53.1,1,0
4,0,3,"Allen, Mr. William Henry",male,35.0,8.05,0,0


In [4]:
def fare_based(dataf, threshold=10):
    return np.array(dataf['fare'] > threshold).astype(int)

In [5]:
from hulearn.classification import FunctionClassifier
from sklearn.metrics import precision_score, recall_score, accuracy_score, make_scorer

mod = FunctionClassifier(fare_based, threshold=10)

In [6]:
grid = GridSearchCV(mod,
                    cv=2,
                    param_grid={'threshold': np.linspace(0, 100, 30)},
                    scoring={'accuracy': make_scorer(accuracy_score),
                             'precision': make_scorer(precision_score),
                             'recall': make_scorer(recall_score)},
                    refit='accuracy')
grid.fit(X, y);


In [7]:
score_df = (pd.DataFrame(grid.cv_results_)
  .set_index('param_threshold')
  [['mean_test_accuracy', 'mean_test_precision', 'mean_test_recall']])

score_df.plot(figsize=(12, 5), title="scores vs. fare-threshold");

In [9]:
from sklego.datasets import load_penguins
from hulearn.experimental.interactive import InteractiveCharts
from hulearn.classification import InteractiveClassifier

df = load_penguins(as_frame=True).dropna()
clf = InteractiveCharts(df, labels="species")


In [3]:
df.head()

Unnamed: 0,species,island,bill_length_mm,bill_depth_mm,flipper_length_mm,body_mass_g,sex
0,Adelie,Torgersen,39.1,18.7,181.0,3750.0,male
1,Adelie,Torgersen,39.5,17.4,186.0,3800.0,female
2,Adelie,Torgersen,40.3,18.0,195.0,3250.0,female
4,Adelie,Torgersen,36.7,19.3,193.0,3450.0,female
5,Adelie,Torgersen,39.3,20.6,190.0,3650.0,male


In [14]:
clf.add_chart(x="bill_length_mm", y="bill_depth_mm")

In [15]:
clf.data()

[{'chart_id': '2f06469a-5',
  'x': 'bill_length_mm',
  'y': 'bill_depth_mm',
  'polygons': {'Adelie': {'bill_length_mm': [[39.90552540269009,
      32.78270563524823,
      34.62936261199241,
      41.57631028641102,
      47.90770563524823]],
    'bill_depth_mm': [[21.55919946289063,
      19.06655481359751,
      15.317920994470832,
      16.51429348993679,
      21.572093499538134]]},
   'Gentoo': {'bill_length_mm': [[51.33721144920172,
      44.91788005385288,
      41.048694007341254,
      41.048694007341254,
      46.50072889106218,
      52.744188193387764,
      61.06448250904186,
      61.25677903842059]],
    'bill_depth_mm': [[17.50468121199311,
      15.880459858224512,
      14.602384038865617,
      13.192424469354648,
      13.084669003376927,
      13.842878382334556,
      17.09719943200551,
      17.09719943200551]]},
   'Chinstrap': {'bill_length_mm': [[55.479670677258454,
      51.563847445935686,
      44.85100762081094,
      41.494587708248574,
      46.15628203

In [16]:
model = InteractiveClassifier(json_desc=clf.data())

X, y = df.drop(columns=['species']), df['species']

preds = model.fit(X, y).predict_proba(X)

In [21]:
preds[13,:]

array([9.98005982e-01, 9.97008973e-04, 9.97008973e-04])