In [None]:
%load_ext autoreload
%autoreload 2

%matplotlib inline

## Install libraries

```bash
conda create -n edu4 python=3.11 jupyter matplotlib
```

```bash 
! pip install -U -r requirements.txt
```

```bash
! pip install -U numpy
! pip install -U scikit-learn
```

## Update repository

In [None]:
! git pull

## Add import path

In [None]:
import os
import sys
import gc

In [None]:
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [None]:
del module_path

## Organize imports

In [None]:
import multiprocessing
from pathlib import Path

In [None]:
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap

In [None]:
import seaborn as sns

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

In [None]:
import plotly.express as px

In [None]:
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.discriminant_analysis import (
    LinearDiscriminantAnalysis, 
    QuadraticDiscriminantAnalysis
)
from sklearn.tree import DecisionTreeClassifier, _tree
from sklearn.ensemble import RandomForestClassifier 
from sklearn.datasets import (
    load_iris,
    load_wine,
    load_breast_cancer,
    make_classification,
)
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import (
    MaxAbsScaler,
    MinMaxScaler,
    StandardScaler,
    LabelEncoder, 
    OneHotEncoder,
)
from sklearn.metrics import (
    precision_score, 
    recall_score, 
    f1_score,
    classification_report,
    confusion_matrix
)
from sklearn.compose import ColumnTransformer, make_column_transformer

In [None]:
from scipy import stats
from scipy.interpolate import interp1d

In [None]:
from src.lattmc.fca.utils import *
from src.lattmc.fca.data_utils import *
from src.lattmc.fca.image_utils import *
from src.lattmc.fca.models import *
from src.lattmc.fca.fca_utils import *
from src.lattmc.fca.image_gens import *

#### Number of CPU cores

In [None]:
workers = multiprocessing.cpu_count()
workers

In [None]:
SEED = 2024

## Initialize Path

In [None]:
PATH = Path('data')
images_path = PATH / 'images'
images_path.mkdir(exist_ok=True, parents=True)
pumpkin_path = PATH / 'Pumpkin_Seeds_Dataset.xlsx'
ad_click_path = PATH / 'advertising.csv'

## Prepare the model

#### Visualize model

In [None]:
# Step 1: Create a dataset
X, y = make_classification(n_features=2, n_redundant=0, n_informative=2, random_state=0, n_clusters_per_class=1)

# Split the dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 2: Train a decision tree classifier
clf = DecisionTreeClassifier(max_depth=3)
clf.fit(X_train, y_train)

# Step 3: Create a mesh grid for plotting decision regions
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.1),
                     np.arange(y_min, y_max, 0.1))

# Step 4: Predict class labels for each point in the mesh
Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)

# Step 5: Plot the decision boundaries
plt.contourf(xx, yy, Z, alpha=0.8)
plt.scatter(X[:, 0], X[:, 1], c=y, edgecolors='k', marker='o')
plt.title("Decision Tree Classifier Decision Regions")
plt.xlabel("Feature 1")
plt.ylabel("Feature 2")
plt.show()

In [None]:
# Load the Iris dataset
iris = load_iris()
X = iris.data[:, [2, 3]]  # Using only the two most important features: petal length and petal width
y = iris.target

# Split the dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a decision tree classifier
clf = DecisionTreeClassifier(max_depth=3)
clf.fit(X_train, y_train)

# Create a mesh grid for plotting decision regions
x_min, x_max = X[:, 0].min() - 0.5, X[:, 0].max() + 0.5
y_min, y_max = X[:, 1].min() - 0.5, X[:, 1].max() + 0.5
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.01),
                     np.arange(y_min, y_max, 0.01))

# Predict class labels for each point in the mesh
Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)

# Plot the decision boundaries
plt.contourf(xx, yy, Z, alpha=0.8)
plt.scatter(X[:, 0], X[:, 1], c=y, edgecolors='k', marker='o', s=100)
plt.title("Decision Tree Classifier on IRIS Dataset (Petal Length & Width)")
plt.xlabel("Petal Length (cm)")
plt.ylabel("Petal Width (cm)")
plt.show()


In [None]:
from sklearn.tree import _tree

# Function to list all decision rules
def list_decision_regions(tree, feature_names):
    tree_ = tree.tree_
    feature_name = [
        feature_names[i] if i != _tree.TREE_UNDEFINED else "undefined!"
        for i in tree_.feature
    ]
    
    def recurse(node, depth):
        indent = "  " * depth
        if tree_.feature[node] != _tree.TREE_UNDEFINED:
            name = feature_name[node]
            threshold = tree_.threshold[node]
            print(f"{indent}if {name} <= {threshold:.2f}:")
            recurse(tree_.children_left[node], depth + 1)
            print(f"{indent}else:  # if {name} > {threshold:.2f}")
            recurse(tree_.children_right[node], depth + 1)
        else:
            print(f"{indent}return class {tree_.value[node]}")
    
    recurse(0, 1)

# List decision regions for the trained decision tree
list_decision_regions(clf, iris.feature_names[2:4])


In [None]:
from sklearn.tree import _tree

def get_tree_regions(tree):
    """
    Returns all decision regions of a trained decision tree classifier.
    
    Parameters:
    - tree: Trained DecisionTreeClassifier instance.

    Returns:
    - regions: List of tuples, where each tuple defines a region in terms of feature thresholds
               and the class predicted within that region.
    """
    tree_ = tree.tree_
    regions = []

    def recurse(node, region):
        if tree_.feature[node] != _tree.TREE_UNDEFINED:
            # This is an internal node
            feature = tree_.feature[node]
            threshold = tree_.threshold[node]

            # Left child region (feature <= threshold)
            left_region = region.copy()
            left_region.append((feature.item(), "<=", threshold.item()))
            recurse(tree_.children_left[node], left_region)

            # Right child region (feature > threshold)
            right_region = region.copy()
            right_region.append((feature.item(), ">", threshold.item()))
            recurse(tree_.children_right[node], right_region)
        else:
            # This is a leaf node, add region with prediction
            predicted_class = tree_.value[node].argmax().item()
            regions.append((region, predicted_class))

    # Initialize recursion from root
    recurse(0, [])
    
    return regions

In [None]:
clf = DecisionTreeClassifier().fit(X, y)

# Get regions
regions = get_tree_regions(clf)
print(regions)

In [None]:
for region in regions:
    print(region)
    print()

In [None]:
from sklearn.tree import _tree

def get_tree_regions_with_and(tree):
    """
    Returns all decision regions of a trained decision tree classifier with explicit AND conditions.
    
    Parameters:
    - tree: Trained DecisionTreeClassifier instance.

    Returns:
    - regions: List of tuples, where each tuple contains a list of "AND" conditions defining
               a region and the class predicted within that region.
    """
    tree_ = tree.tree_
    regions = []

    def recurse(node, conditions):
        if tree_.feature[node] != _tree.TREE_UNDEFINED:
            # This is an internal node
            feature = tree_.feature[node]
            threshold = tree_.threshold[node]

            # Left child region (feature <= threshold)
            left_conditions = conditions.copy()
            left_conditions.append(f"(feature_{feature} <= {threshold})")
            recurse(tree_.children_left[node], left_conditions)

            # Right child region (feature > threshold)
            right_conditions = conditions.copy()
            right_conditions.append(f"(feature_{feature} > {threshold})")
            recurse(tree_.children_right[node], right_conditions)
        else:
            # This is a leaf node, add region with prediction
            predicted_class = tree_.value[node].argmax()
            region_description = " AND ".join(conditions)
            regions.append((region_description, predicted_class))

    # Initialize recursion from root
    recurse(0, [])
    return regions


In [None]:
# Get regions with AND conditions
regions_with_and = get_tree_regions_with_and(clf)
for region, prediction in regions_with_and:
    print(f"Region: {region}, Predicted class: {prediction}")

In [None]:
clf.tree_.feature