In the previous guide we saw the merits of the `FunctionClassifier`. In this guide we'll make the approach even more powerful by using the `CaseWhen` object.

In [1]:
%load_ext autoreload
%autoreload 2

In [10]:
import numpy as np
import pandas as pd 

from hulearn.case_when import CaseWhen
from hulearn.datasets import load_titanic
from hulearn.classification import FunctionClassifier
from sklearn.model_selection import GridSearchCV

df = load_titanic(as_frame=True)
X, y = df.drop(columns=['survived']), df['survived']

In [19]:
from hulearn.underscore import _ 

func = CaseWhen(default=0).when(_['sex'] =='male', 1)
mod = FunctionClassifier(func)

In [33]:
def gender_based(X, thres_f=10, thres_m=10):
    func = (CaseWhen(default=0)
            .when((_['sex'] == 'female') & (_['fare'] > thres_f), 1)
            .when((_['sex'] == 'male') & (_['fare'] > thres_m), 1))
    return func(X)

mod = FunctionClassifier(gender_based, thres_f=10, thres_m=10)

In [34]:
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import precision_score, recall_score, accuracy_score, make_scorer

grid = GridSearchCV(mod, 
                    cv=2, 
                    param_grid={'thres_f': np.linspace(0, 100, 30), 
                                'thres_m': np.linspace(0, 100, 30)},
                    scoring={'accuracy': make_scorer(accuracy_score), 
                             'precision': make_scorer(precision_score),
                             'recall': make_scorer(recall_score)},
                    refit='accuracy'
                )
grid.fit(X, y)

score_df = (pd.DataFrame(grid.cv_results_)
  .set_index('param_threshold')
  [['mean_test_accuracy', 'mean_test_precision', 'mean_test_recall']])

score_df.plot(figsize=(12, 5), title="scores vs. fare-threshold");

Traceback (most recent call last):
  File "/Users/vincent/Development/human-learn/venv/lib/python3.7/site-packages/sklearn/model_selection/_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/vincent/Development/human-learn/hulearn/classification/functionclassifier.py", line 52, in fit
    predictions = self.func(X, **self.kwargs)
  File "<ipython-input-33-30c224e375ed>", line 3, in gender_based
    .when((_['sex'] == 'female') & (_['fare'] > thres_f), 1)
TypeError: unsupported operand type(s) for &: '_underscore' and '_underscore'



TypeError: unsupported operand type(s) for &: '_underscore' and '_underscore'