In [3]:
from scikit_classifiers import main
from scikit_classifiers import read_dataset
from scikit_classifiers import VALID_DATASET_NAMES as _DATASETS
from scikit_classifiers import CLASSIFIERS
import plotly.graph_objects as go
from sklearn.tree import plot_tree
from matplotlib.pyplot import figure
%load_ext autoreload
%autoreload 2

In [4]:
_MODELS = list(CLASSIFIERS.keys())
print(f"Models")
[print(f"{i}: {m}") for i, m in enumerate(_MODELS)]
print(f"Datasets")
_ = [print(f"{i}: {m}") for i, m in enumerate(_DATASETS)]

Models
0: MLP
1: RF
2: Decision Tree
3: LR_L1
4: LR_L2
Datasets
0: heart-statlog
1: cervical-cancer


In [88]:
_DATASET_NAME = _DATASETS[0]
all_feature_names, _ = read_dataset(_DATASET_NAME, head=True)
selected_feature_indexes = None

In [114]:
def _plot_grid_search(cv_results, model_name):
    
    params = cv_results["params"]
    param_names = list(params[0].keys())
    if len(param_names) == 2:
        x_title = list(params[0].keys())[0]
        y_title = list(params[0].keys())[1] 
        x = [p[x_title] for p in params]
        y = [p[y_title] for p in params]
    elif len(param_names) == 1:
        x_title = "1st hidden layer"
        y_title = "2nd hidden layer"
        x, y = [], []
        for p in params:
            x.append(p[param_names[0]][0] if isinstance(p[param_names[0]], tuple) else p[param_names[0]])
            y.append(p[param_names[0]][0] if isinstance(p[param_names[0]], tuple) else 0)
        
    
    fig = go.Figure()

    fig.add_trace(go.Scatter(
        x=x,
        y=y,
        hovertext=cv_results["mean_test_score"],
        marker=dict(
            size=10,
            color=cv_results["mean_test_score"],
            colorbar=dict(
                title="mean_test_score"
            ),
            colorscale="Magma"
        ),
        mode="markers")
    )
    fig.update_layout(
        title=f"Grid Search for {model_name}",
        xaxis_title=x_title,
        yaxis_title=repr(y_title),
    )

    return fig

## Random Forest

In [97]:
_MODEL_NAME = _MODELS[1]
model = main(dataset_name=_DATASET_NAME, model_name=_MODEL_NAME, selected_feature_indexes=selected_feature_indexes)
fig = _plot_grid_search(model.cv_results_, _MODEL_NAME)
fig.show()

>>> RF
valid score = 0.8563424947145876
test score = 0.8518518518518519
{'max_depth': 4, 'n_estimators': 50}


## Logistic Regression

In [98]:
_MODEL_NAME = _MODELS[3]
model_l1 = main(dataset_name=_DATASET_NAME, model_name=_MODEL_NAME, selected_feature_indexes=selected_feature_indexes)
fig = _plot_grid_search(model_l1.cv_results_, _MODEL_NAME)
fig.show()

>>> LR_L1
valid score = 0.8609936575052854
test score = 0.8518518518518519
{'C': 0.5, 'penalty': 'l2'}


## MLP

In [117]:
_MODEL_NAME = _MODELS[0]
model_MLP = main(dataset_name=_DATASET_NAME, model_name=_MODEL_NAME, selected_feature_indexes=selected_feature_indexes)
fig = _plot_grid_search(model_MLP.cv_results_, _MODEL_NAME)
fig.show()

>>> MLP
valid score = 0.6157505285412264
test score = 0.6296296296296297
{'hidden_layer_sizes': 100}
