In [None]:
from scikit_classifiers import main
from scikit_classifiers import read_dataset
from scikit_classifiers import DATASETS
from scikit_classifiers import CLASSIFIERS
from scikit_classifiers import SELECTED_FEATURES
from helpers import print_datasets
from helpers import print_models
import plotly.graph_objects as go
from sklearn.tree import plot_tree
from matplotlib.pyplot import figure

%load_ext autoreload
%autoreload 2

### AVAILABLE  MODELS

In [None]:
_MODELS = list(CLASSIFIERS.keys())
print_models()

### AVAILABLE DATASETS

In [None]:
_DATASETS = list(DATASETS.keys())
print_datasets()

### SELECTED DATASET

In [None]:
_DATASET_NAME = _DATASETS[1]
_BALANCE = True

print(
    f"Selected dataset: \
    {_DATASET_NAME}{' (balanced)' if _BALANCE else ''}" \
)

### FEATURE SELECTION

In [None]:
_SELECT_FEATURES = False

feature_names, _ = read_dataset(_DATASET_NAME, head=True, select_features=_SELECT_FEATURES)
for i, f in enumerate(feature_names):
    print(f"{i}: {f}")
print(f"\nnum features: {len(feature_names)}")

## Random Forest

In [None]:
_MODEL_NAME = _MODELS[1]
model = main(
    dataset_name=_DATASET_NAME, 
    model_name=_MODEL_NAME, 
    select_features=_SELECT_FEATURES, 
    balance=_BALANCE
)
feature_importances = model.best_estimator_.feature_importances_
fig = go.Figure(
    data=[go.Bar(x=feature_names, y=feature_importances)],
    layout_title_text=f"Feature importance for {_DATASET_NAME} given a {_MODEL_NAME} model"
)
fig.show()

## Decision Tree

In [None]:
_MODEL_NAME = _MODELS[2]
model = main(
    dataset_name=_DATASET_NAME, 
    model_name=_MODEL_NAME, 
    select_features=_SELECT_FEATURES, 
    balance=_BALANCE
)
figure(figsize=(17, 8))
plot_tree(
    model.best_estimator_, 
    feature_names=feature_names, 
    fontsize=10, 
    class_names=["Y", "F"]
)

## Logistic Regression

In [None]:
_MODEL_NAME = _MODELS[3]
model_l1 = main(
    dataset_name=_DATASET_NAME, 
    model_name=_MODEL_NAME, 
    select_features=_SELECT_FEATURES, 
    balance=_BALANCE
)

_MODEL_NAME = _MODELS[4]
model_l2 = main(
    dataset_name=_DATASET_NAME, 
    model_name=_MODEL_NAME, 
    select_features=_SELECT_FEATURES, 
    balance=_BALANCE
)
feature_importances_l1 = model_l1.best_estimator_.coef_[0]
feature_importances_l2 = model_l2.best_estimator_.coef_[0]
feature_names, _ = read_dataset(_DATASET_NAME, head=True)

fig = go.Figure(
    data=[
        go.Bar(x=feature_names, y=feature_importances_l1, name="L1"), 
        go.Bar(x=feature_names, y=feature_importances_l2, name="L2")
    ],
    layout_title_text=f"Feature importance for {_DATASET_NAME} given a Logistic Regression model"
)
fig.show()