In [1]:
import numpy as np
import pandas as pd

from skrough.dataprep import prepare_factorized_data

df = pd.DataFrame(
    np.array(
        [
            ["sunny", "hot", "high", "weak", "no"],
            ["sunny", "hot", "high", "strong", "no"],
            ["overcast", "hot", "high", "weak", "yes"],
            ["rain", "mild", "high", "weak", "yes"],
            ["rain", "cool", "normal", "weak", "yes"],
            ["rain", "cool", "normal", "strong", "no"],
            ["overcast", "cool", "normal", "strong", "yes"],
            ["sunny", "mild", "high", "weak", "no"],
            ["sunny", "cool", "normal", "weak", "yes"],
            ["rain", "mild", "normal", "weak", "yes"],
            ["sunny", "mild", "normal", "strong", "yes"],
            ["overcast", "mild", "high", "strong", "yes"],
            ["overcast", "hot", "normal", "weak", "yes"],
            ["rain", "mild", "high", "strong", "no"],
        ],
        dtype=object,
    ),
    columns=["Outlook", "Temperature", "Humidity", "Wind", "Play"],
)
TARGET_COLUMN = "Play"
# df_dec = df.pop(TARGET_COLUMN)
x, x_counts, y, y_count = prepare_factorized_data(df=df, target_attr="Play")

In [2]:
from skrough.algorithms.bireducts import get_bireduct_daab_heuristic
from skrough.chaos_measures import entropy
from skrough.feature_importance import (
    compute_objs_attrs_score_gains,
    get_feature_importance_for_objs_attrs,
)

bireducts = get_bireduct_daab_heuristic(
    x,
    y,
    chaos_measure=entropy,
    epsilon=0.3,
    candidates_count=1,
    allowed_randomness=0.9,
    n_bireducts=10,
)
bireducts

[ObjsAttrsSubset(objs=[0, 1, 4, 6, 7, 8, 9, 10, 12, 13], attrs=[2]),
 ObjsAttrsSubset(objs=[0, 1, 5, 7, 13], attrs=[]),
 ObjsAttrsSubset(objs=[2, 3, 4, 6, 8, 9, 10, 11, 12], attrs=[]),
 ObjsAttrsSubset(objs=[0, 1, 2, 3, 5, 6, 7, 8, 10, 11, 12], attrs=[2, 0]),
 ObjsAttrsSubset(objs=[2, 3, 4, 6, 8, 9, 10, 11, 12], attrs=[]),
 ObjsAttrsSubset(objs=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13], attrs=[0, 3, 1]),
 ObjsAttrsSubset(objs=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13], attrs=[0, 1, 3]),
 ObjsAttrsSubset(objs=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13], attrs=[0, 3, 1]),
 ObjsAttrsSubset(objs=[0, 1, 5, 7, 13], attrs=[]),
 ObjsAttrsSubset(objs=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13], attrs=[3, 1, 0])]

In [3]:
# compute_objs_attrs_score_gains(
#     x=x,
#     x_counts=x_counts,
#     y=y,
#     y_count=y_count,
#     chaos_fun=gini_impurity,
#     objs_attrs=bireducts[0],
# )

get_feature_importance_for_objs_attrs(
    x=x,
    x_counts=x_counts,
    y=y,
    y_count=y_count,
    column_names=df.columns[:-1],
    objs_attrs_collection=bireducts,
    chaos_fun=entropy,
)

Unnamed: 0,column,count,global_gain,avg_global_gain,global_gain_cover_weighted,avg_global_gain_cover_weighted,local_gain,avg_local_gain,local_gain_cover_weighted,avg_local_gain_cover_weighted
0,Outlook,5.0,3.381576,0.676315,3.285402,0.65708,3.806364,0.761273,3.619164,0.723833
1,Temperature,4.0,1.358539,0.339635,1.358539,0.339635,1.358539,0.339635,1.358539,0.339635
2,Humidity,2.0,0.505737,0.252868,0.386519,0.19326,1.59411,0.797055,1.183161,0.591581
3,Wind,4.0,1.929968,0.482492,1.929968,0.482492,1.929968,0.482492,1.929968,0.482492
