# Rough Set check functions

In [5]:
import numpy as np
import pandas as pd

from skrough.checks import (
    check_if_consistent_table,
    check_if_functional_dependency,
    check_if_reduct,
)
from skrough.dataprep import prepare_factorized_data

## Dataset

Let's prepare a sample data set - "Play Golf Dataset".

In [6]:
df = pd.DataFrame(
    np.array(
        [
            ["sunny", "hot", "high", "weak", "no"],
            ["sunny", "hot", "high", "strong", "no"],
            ["overcast", "hot", "high", "weak", "yes"],
            ["rain", "mild", "high", "weak", "yes"],
            ["rain", "cool", "normal", "weak", "yes"],
            ["rain", "cool", "normal", "strong", "no"],
            ["overcast", "cool", "normal", "strong", "yes"],
            ["sunny", "mild", "high", "weak", "no"],
            ["sunny", "cool", "normal", "weak", "yes"],
            ["rain", "mild", "normal", "weak", "yes"],
            ["sunny", "mild", "normal", "strong", "yes"],
            ["overcast", "mild", "high", "strong", "yes"],
            ["overcast", "hot", "normal", "weak", "yes"],
            ["rain", "mild", "high", "strong", "no"],
        ],
        dtype=object,
    ),
    columns=["Outlook", "Temperature", "Humidity", "Wind", "Play"],
)
TARGET_COLUMN = "Play"
x, x_domain_sizes, y, y_domain_size = prepare_factorized_data(df, TARGET_COLUMN)

## Data table consistency

Let's check if the data table is consistent:

- check whole table
- check using a given subset of attributes

In [9]:
check_if_consistent_table(x, y)

True

In [14]:
# check using only first two columns
check_if_consistent_table(x[:, 0:2], y)

False

## Check functional dependency

In [18]:
# check functional dependency on all objects (using default: `None`) and all attrs
# (using default: `None`)
check_if_functional_dependency(x, y)

True

In [21]:
# check on all objects (using default: `None`) and on attrs `0, 2, 3`
check_if_functional_dependency(x, y, attrs=[0, 2, 3])

True

In [22]:
# check on all objects (using default: `None`) and on attrs `0, 1`
check_if_functional_dependency(x, y, attrs=[0, 1])

False

In [24]:
# check on objects `0, 2, 5` and on attrs `0, 1`
check_if_functional_dependency(x, y, objs=[0, 2, 5], attrs=[0, 1])

True

## Check reducts

For "Play Golf Dataset" there are only two reducts:

- "Outlook", "Temperature", "Humidity" - `attrs == [0, 1, 2]`
- "Outlook", "Humidity", "Wind" - `attrs == [0, 2, 3]`

In [31]:
check_if_reduct(x, y, attrs=[0, 2, 3])

True

In [30]:
check_if_reduct(x, y, attrs=[0, 2, 3])

True

In [32]:
# too few attributes ~ no functional dependency
check_if_reduct(x, y, attrs=[0, 1])

False

In [33]:
# too many attributes ~ some of them can be removed
check_if_reduct(x, y, attrs=[0, 1, 2, 3])

False