In [1]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from typing import List, Dict, Tuple, Any

In [2]:
from sklearn.metrics import classification_report, accuracy_score, recall_score, f1_score, precision_score

In [3]:
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.utils.validation import check_X_y, check_array, check_is_fitted

In [4]:
X_train = pd.read_parquet('train_binarized.parquet')
X_val = pd.read_parquet('val_binarized.parquet')

In [5]:
y_train = pd.read_parquet('y_train.parquet')
y_val = pd.read_parquet('y_val.parquet')

In [6]:
X_train

Unnamed: 0,pregnancies_ge_1,pregnancies_le_1,pregnancies_ge_3,pregnancies_le_3,pregnancies_ge_7,pregnancies_le_7,pregnancies_ge_9,pregnancies_le_9,glucose_ge_79,glucose_le_79,...,age_ge_24,age_le_24,age_ge_29,age_le_29,age_ge_40,age_le_40,age_ge_50,age_le_50,age_ge_60,age_le_60
357,1,0,1,0,1,0,1,0,1,0,...,1,0,1,0,1,0,0,1,0,1
73,1,0,1,0,0,1,0,1,1,0,...,0,1,0,1,0,1,0,1,0,1
352,1,0,1,1,0,1,0,1,0,1,...,1,0,1,0,1,0,0,1,0,1
497,1,0,0,1,0,1,0,1,1,0,...,1,0,0,1,0,1,0,1,0,1
145,0,1,0,1,0,1,0,1,1,0,...,0,1,0,1,0,1,0,1,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
71,1,0,1,0,0,1,0,1,1,0,...,1,0,0,1,0,1,0,1,0,1
106,1,1,0,1,0,1,0,1,1,0,...,1,0,0,1,0,1,0,1,0,1
270,1,0,1,0,1,0,1,0,1,0,...,1,0,1,0,0,1,0,1,0,1
435,0,1,0,1,0,1,0,1,1,0,...,1,0,1,1,0,1,0,1,0,1


In [7]:
y_train

Unnamed: 0,outcome
357,1
73,0
352,0
497,0
145,0
...,...
71,0
106,0
270,1
435,1


In [8]:
X_train_pos = X_train[y_train['outcome'] == 1]
X_train_neg = X_train[y_train['outcome'] == 0]

In [9]:
X_train_pos.shape, X_train_neg.shape

((199, 78), (377, 78))

In [10]:
y_val_pred = pd.DataFrame(np.zeros_like(y_val), columns=['prediction'], index=y_val.index)
y_val_pred

Unnamed: 0,prediction
668,0
324,0
624,0
690,0
473,0
...,...
554,0
319,0
594,0
6,0


## Baseline

In [48]:
class LazyClassifierFCA:
    def __init__(self):
        self.X_train = None
        self.y_train = None

    def fit(self, X_train: pd.DataFrame, y_train: pd.Series) -> None:
        self.X_train = X_train
        self.y_train = y_train
        
    def classify_sample(self, sample: pd.Series) -> Any:
        # Split X_train into positive and negative classes
        X_train_positive = self.X_train[y_train['outcome'] == 1]
        X_train_negative = self.X_train[y_train['outcome'] == 0]
        
        positive_classifiers = 0
        negative_classifiers = 0
        
        # Function to check if intersection with a train sample is a positive classifier
        def is_positive_classifier(intersection):
            # Find samples in X_train_positive that contain the intersection
            num_positive = ((X_train_positive | (~intersection.astype(bool)).astype(np.int32)) == True).all(axis=1).sum()
            num_negative = ((X_train_negative | (~intersection.astype(bool)).astype(np.int32)) == True).all(axis=1).sum()
            return num_negative == 0 and num_positive > 1
        
        # Function to check if intersection is a negative classifier
        def is_negative_classifier(intersection):
            # Find samples in X_train_negative that contain the intersection
            num_positive = ((X_train_positive | (~intersection.astype(bool)).astype(np.int32)) == True).all(axis=1).sum()
            num_negative = ((X_train_negative | (~intersection.astype(bool)).astype(np.int32)) == True).all(axis=1).sum()
            return num_positive == 0 and num_negative > 1
        
        # Check for positive classifiers by intersecting sample with each positive object
        for _, pos_sample in X_train_positive.iterrows():
            intersection = sample & pos_sample
            if is_positive_classifier(intersection):
                positive_classifiers += 1

        # Check for negative classifiers by intersecting sample with each negative object
        for _, neg_sample in X_train_negative.iterrows():
            intersection = sample & neg_sample
            if is_negative_classifier(intersection):
                negative_classifiers += 1

        # Determine the class based on the number of classifiers
        if positive_classifiers > negative_classifiers:
            # print(f"sample {sample.name} is classified as 1, {positive_classifiers=}, {negative_classifiers=}")
            return 1, positive_classifiers  # Predict positive
            
        elif negative_classifiers > positive_classifiers:
            # print(f"sample {sample.name} is classified as 0, {positive_classifiers=}, {negative_classifiers=}")
            return 0, negative_classifiers  # Predict negative

        else:
            # If equal, you can decide on a rule, like defaulting to 0 or 1, or returning 'undetermined'
            # print(f"sample {sample.name} is classified as 1, default, {positive_classifiers=}, {negative_classifiers=}")
        
            return 1, positive_classifiers  # or 0, depending on the choice


    def predict(self, X_test: pd.DataFrame) -> List[Any]:
        # List to store predictions for each test sample
        predictions = []
        classifiers = []
        
        # Iterate through each sample in X_test
        for _, sample in X_test.iterrows():
            # Classify the sample and append the result to predictions
            prediction, n_clfs = self.classify_sample(sample)
            predictions.append(prediction)
            classifiers.append(n_clfs)
        
        self.avg_n_clfs = np.mean(classifiers)
        
        return predictions

In [49]:
from sklearn.metrics import (
    confusion_matrix,
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    roc_auc_score,
)

In [50]:
def evaluate_binary_classification(y_val, y_pred):
    """
    Evaluate a binary classification model's performance using sklearn metrics.
    
    Parameters:
    y_val (array-like): Ground truth binary labels (0 or 1).
    y_pred (array-like): Predicted binary labels (0 or 1).
    
    Prints:
    - Confusion Matrix components (TP, TN, FP, FN)
    - Specificity (True Negative Rate)
    - Negative Predictive Value (NPV)
    - False Positive Rate (FPR)
    - False Discovery Rate (FDR)
    - Accuracy
    - Precision
    - Recall (True Positive Rate)
    - F1 Score
    """
    # Confusion matrix components
    tn, fp, fn, tp = confusion_matrix(y_val, y_pred).ravel()

    # Metrics calculations
    specificity = tn / (tn + fp) if (tn + fp) > 0 else 0  # True Negative Rate
    npv = tn / (tn + fn) if (tn + fn) > 0 else 0  # Negative Predictive Value
    fpr = fp / (fp + tn) if (fp + tn) > 0 else 0  # False Positive Rate
    fdr = fp / (fp + tp) if (fp + tp) > 0 else 0  # False Discovery Rate
    accuracy = accuracy_score(y_val, y_pred)
    precision = precision_score(y_val, y_pred)
    recall = recall_score(y_val, y_pred)
    f1 = f1_score(y_val, y_pred)

    # Print results
    print(f"True Positive (TP): {tp}")
    print(f"True Negative (TN): {tn}")
    print(f"False Positive (FP): {fp}")
    print(f"False Negative (FN): {fn}")
    print(f"True Negative Rate (Specificity): {specificity:.4f}")
    print(f"Negative Predictive Value (NPV): {npv:.4f}")
    print(f"False Positive Rate (FPR): {fpr:.4f}")
    print(f"False Discovery Rate (FDR): {fdr:.4f}")
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall (True Positive Rate): {recall:.4f}")
    print(f"F1 Score: {f1:.4f}")

In [51]:
classifier = LazyClassifierFCA()
classifier.fit(X_train, y_train)

In [52]:
%%time
y_pred = classifier.predict(X_val)

CPU times: total: 2min 3s
Wall time: 3min 56s


In [53]:
evaluate_binary_classification(y_val, y_pred)

True Positive (TP): 26
True Negative (TN): 112
False Positive (FP): 11
False Negative (FN): 43
True Negative Rate (Specificity): 0.9106
Negative Predictive Value (NPV): 0.7226
False Positive Rate (FPR): 0.0894
False Discovery Rate (FDR): 0.2973
Accuracy: 0.7188
Precision: 0.7027
Recall (True Positive Rate): 0.3768
F1 Score: 0.4906


In [54]:
print(classification_report(y_val, y_pred))

              precision    recall  f1-score   support

           0       0.72      0.91      0.81       123
           1       0.70      0.38      0.49        69

    accuracy                           0.72       192
   macro avg       0.71      0.64      0.65       192
weighted avg       0.72      0.72      0.69       192



In [56]:
classifier.avg_n_clfs

111.10416666666667

In [57]:
class LazyClassifierFCA2:
    def __init__(self):
        self.X_train = None
        self.y_train = None
        # self.max_counter_examples = max_counter_examples

    def fit(self, X_train: pd.DataFrame, y_train: pd.Series) -> None:
        self.X_train = X_train
        self.y_train = y_train
        
    def classify_sample(self, sample: pd.Series) -> Any:
        # Split X_train into positive and negative classes
        X_train_positive = self.X_train[y_train['outcome'] == 1]
        X_train_negative = self.X_train[y_train['outcome'] == 0]
        
        positive_classifiers = 0
        negative_classifiers = 0
        
        # Function to check if intersection with a train sample is a positive classifier
        def is_positive_classifier(intersection):
            # Find samples in X_train_positive that contain the intersection
            num_positive = ((X_train_positive | (~intersection.astype(bool)).astype(np.int32)) == True).all(axis=1).sum()
            num_negative = ((X_train_negative | (~intersection.astype(bool)).astype(np.int32)) == True).all(axis=1).sum()
            return num_negative == 0 and num_positive > 1
        
        # Function to check if intersection is a negative classifier
        def is_negative_classifier(intersection):
            # Find samples in X_train_negative that contain the intersection
            num_positive = ((X_train_positive | (~intersection.astype(bool)).astype(np.int32)) == True).all(axis=1).sum()
            num_negative = ((X_train_negative | (~intersection.astype(bool)).astype(np.int32)) == True).all(axis=1).sum()
            return num_positive == 0 and num_negative > 1
        
        # Check for positive classifiers by intersecting sample with each positive object
        for _, pos_sample in X_train_positive.iterrows():
            intersection = sample & pos_sample
            if is_positive_classifier(intersection):
                positive_classifiers += 1

        # Check for negative classifiers by intersecting sample with each negative object
        for _, neg_sample in X_train_negative.iterrows():
            intersection = sample & neg_sample
            if is_negative_classifier(intersection):
                negative_classifiers += 1

        # Determine the class based on the number of classifiers
        if positive_classifiers / X_train_positive.shape[0] > negative_classifiers / X_train_negative.shape[0]:
            # print(f"sample {sample.name} is classified as 1, {positive_classifiers=}, {negative_classifiers=}")
            return 1, positive_classifiers  # Predict positive
            
        elif negative_classifiers / X_train_negative.shape[0] > positive_classifiers / X_train_positive.shape[0]:
            # print(f"sample {sample.name} is classified as 0, {positive_classifiers=}, {negative_classifiers=}")
            return 0, negative_classifiers  # Predict negative

        else:
            # If equal, you can decide on a rule, like defaulting to 0 or 1, or returning 'undetermined'
            # print(f"sample {sample.name} is classified as 1, default, {positive_classifiers=}, {negative_classifiers=}")
        
            return 1, positive_classifiers  # or 0, depending on the choice


    def predict(self, X_test: pd.DataFrame) -> List[Any]:
        # List to store predictions for each test sample
        predictions = []
        classifiers = []
        
        # Iterate through each sample in X_test
        for _, sample in X_test.iterrows():
            # Classify the sample and append the result to predictions
            prediction, n_clfs = self.classify_sample(sample)
            predictions.append(prediction)
            classifiers.append(n_clfs)
        
        self.avg_n_clfs = np.mean(classifiers)
        
        return predictions

In [58]:
classifier2 = LazyClassifierFCA2()
classifier2.fit(X_train, y_train)

In [59]:
%%time
y_pred2 = classifier2.predict(X_val)

CPU times: total: 54.3 s
Wall time: 3min 38s


In [60]:
evaluate_binary_classification(y_val, y_pred2)

True Positive (TP): 40
True Negative (TN): 103
False Positive (FP): 20
False Negative (FN): 29
True Negative Rate (Specificity): 0.8374
Negative Predictive Value (NPV): 0.7803
False Positive Rate (FPR): 0.1626
False Discovery Rate (FDR): 0.3333
Accuracy: 0.7448
Precision: 0.6667
Recall (True Positive Rate): 0.5797
F1 Score: 0.6202


In [61]:
classifier2.avg_n_clfs

109.234375

In [44]:
precision_score(y_val, y_pred2)

0.6666666666666666

In [45]:
print(classification_report(y_val, y_pred2))

              precision    recall  f1-score   support

           0       0.78      0.84      0.81       123
           1       0.67      0.58      0.62        69

    accuracy                           0.74       192
   macro avg       0.72      0.71      0.71       192
weighted avg       0.74      0.74      0.74       192



In [14]:
class LazyClassifierFCA3:
    def __init__(self, max_counter_examples=5, min_cardinality=25):
        self.X_train = None
        self.y_train = None
        self.max_counter_examples = max_counter_examples
        self.min_cardinality = min_cardinality

    def fit(self, X_train: pd.DataFrame, y_train: pd.Series) -> None:
        self.X_train = X_train
        self.y_train = y_train
        
    def classify_sample(self, sample: pd.Series) -> Any:
        # Split X_train into positive and negative classes
        X_train_positive = self.X_train[y_train['outcome'] == 1]
        X_train_negative = self.X_train[y_train['outcome'] == 0]
        
        positive_classifiers = 0
        negative_classifiers = 0
        
        # Function to check if intersection with a train sample is a positive classifier
        def is_positive_classifier(intersection):
            # Find samples in X_train_positive that contain the intersection
            num_positive = ((X_train_positive | (~intersection.astype(bool)).astype(np.int32)) == True).all(axis=1).sum()
            num_negative = ((X_train_negative | (~intersection.astype(bool)).astype(np.int32)) == True).all(axis=1).sum()
            return num_negative < self.max_counter_examples and num_positive > 1
        
        # Function to check if intersection is a negative classifier
        def is_negative_classifier(intersection):
            # Find samples in X_train_negative that contain the intersection
            num_positive = ((X_train_positive | (~intersection.astype(bool)).astype(np.int32)) == True).all(axis=1).sum()
            num_negative = ((X_train_negative | (~intersection.astype(bool)).astype(np.int32)) == True).all(axis=1).sum()
            return num_positive < self.max_counter_examples and num_negative > 1
        
        # Check for positive classifiers by intersecting sample with each positive object
        for _, pos_sample in X_train_positive.iterrows():
            intersection = sample & pos_sample
            if is_positive_classifier(intersection) and intersection.sum() >= self.min_cardinality:
                positive_classifiers += 1

        # Check for negative classifiers by intersecting sample with each negative object
        for _, neg_sample in X_train_negative.iterrows():
            intersection = sample & neg_sample
            if is_negative_classifier(intersection) and intersection.sum() >= self.min_cardinality:
                negative_classifiers += 1

        # Determine the class based on the number of classifiers
        if positive_classifiers / X_train_positive.shape[0] > negative_classifiers / X_train_negative.shape[0]:
            # print(f"sample {sample.name} is classified as 1, {positive_classifiers=}, {negative_classifiers=}")
            return 1, positive_classifiers  # Predict positive
            
        elif negative_classifiers / X_train_negative.shape[0] > positive_classifiers / X_train_positive.shape[0]:
            # print(f"sample {sample.name} is classified as 0, {positive_classifiers=}, {negative_classifiers=}")
            return 0, negative_classifiers  # Predict negative

        else:
            # If equal, you can decide on a rule, like defaulting to 0 or 1, or returning 'undetermined'
            # print(f"sample {sample.name} is classified as 1, default, {positive_classifiers=}, {negative_classifiers=}")
        
            return 1, positive_classifiers  # or 0, depending on the choice


    def predict(self, X_test: pd.DataFrame) -> List[Any]:
        # List to store predictions for each test sample
        predictions = []
        classifiers = []
        
        # Iterate through each sample in X_test
        for _, sample in X_test.iterrows():
            # Classify the sample and append the result to predictions
            prediction, n_clfs = self.classify_sample(sample)
            predictions.append(prediction)
            classifiers.append(n_clfs)
        
        self.avg_n_clfs = np.mean(classifiers)
        
        return predictions

In [15]:
min_cardinalities = [20, 22, 24, 26, 28, 30]
f1_scores = []

In [17]:
min_cardinalities2 = [10, 12, 14, 16, 18, 20]

In [18]:
for min_cardinality in min_cardinalities2:
    classifier3 = LazyClassifierFCA3(max_counter_examples=1, min_cardinality=min_cardinality)
    classifier3.fit(X_train, y_train)
    y_pred3 = classifier3.predict(X_val)
    print(f"Metrics with min_cardinality = {min_cardinality}")
    evaluate_binary_classification(y_val, y_pred3)
    print("\n")

Metrics with min_cardinality = 10
True Positive (TP): 40
True Negative (TN): 103
False Positive (FP): 20
False Negative (FN): 29
True Negative Rate (Specificity): 0.8374
Negative Predictive Value (NPV): 0.7803
False Positive Rate (FPR): 0.1626
False Discovery Rate (FDR): 0.3333
Accuracy: 0.7448
Precision: 0.6667
Recall (True Positive Rate): 0.5797
F1 Score: 0.6202


Metrics with min_cardinality = 12
True Positive (TP): 40
True Negative (TN): 103
False Positive (FP): 20
False Negative (FN): 29
True Negative Rate (Specificity): 0.8374
Negative Predictive Value (NPV): 0.7803
False Positive Rate (FPR): 0.1626
False Discovery Rate (FDR): 0.3333
Accuracy: 0.7448
Precision: 0.6667
Recall (True Positive Rate): 0.5797
F1 Score: 0.6202


Metrics with min_cardinality = 14
True Positive (TP): 40
True Negative (TN): 103
False Positive (FP): 20
False Negative (FN): 29
True Negative Rate (Specificity): 0.8374
Negative Predictive Value (NPV): 0.7803
False Positive Rate (FPR): 0.1626
False Discovery Ra

KeyboardInterrupt: 

In [16]:
for min_cardinality in min_cardinalities:
    classifier3 = LazyClassifierFCA3(max_counter_examples=1, min_cardinality=min_cardinality)
    classifier3.fit(X_train, y_train)
    y_pred3 = classifier3.predict(X_val)
    print(f"Metrics with min_cardinality = {min_cardinality}")
    evaluate_binary_classification(y_val, y_pred3)
    print("\n")

Metrics with min_cardinality = 20
True Positive (TP): 40
True Negative (TN): 103
False Positive (FP): 20
False Negative (FN): 29
True Negative Rate (Specificity): 0.8374
Negative Predictive Value (NPV): 0.7803
False Positive Rate (FPR): 0.1626
False Discovery Rate (FDR): 0.3333
Accuracy: 0.7448
Precision: 0.6667
Recall (True Positive Rate): 0.5797
F1 Score: 0.6202


Metrics with min_cardinality = 22
True Positive (TP): 40
True Negative (TN): 102
False Positive (FP): 21
False Negative (FN): 29
True Negative Rate (Specificity): 0.8293
Negative Predictive Value (NPV): 0.7786
False Positive Rate (FPR): 0.1707
False Discovery Rate (FDR): 0.3443
Accuracy: 0.7396
Precision: 0.6557
Recall (True Positive Rate): 0.5797
F1 Score: 0.6154


Metrics with min_cardinality = 24
True Positive (TP): 39
True Negative (TN): 101
False Positive (FP): 22
False Negative (FN): 30
True Negative Rate (Specificity): 0.8211
Negative Predictive Value (NPV): 0.7710
False Positive Rate (FPR): 0.1789
False Discovery Ra

In [50]:
classifier3 = LazyClassifierFCA3(max_counter_examples=1, min_cardinality=30)
classifier3.fit(X_train, y_train)

In [51]:
%%time
y_pred3 = classifier3.predict(X_val)

CPU times: total: 51.8 s
Wall time: 2min 53s


In [52]:
evaluate_binary_classification(y_val, y_pred3)

True Positive (TP): 39
True Negative (TN): 101
False Positive (FP): 22
False Negative (FN): 30
True Negative Rate (Specificity): 0.8211
Negative Predictive Value (NPV): 0.7710
False Positive Rate (FPR): 0.1789
False Discovery Rate (FDR): 0.3607
Accuracy: 0.7292
Precision: 0.6393
Recall (True Positive Rate): 0.5652
F1 Score: 0.6000


In [25]:
# class LazyClassifierFCA4:
#     def __init__(self):
#         self.X_train = None
#         self.y_train = None

#     def fit(self, X_train: pd.DataFrame, y_train: pd.Series) -> None:
#         self.X_train = X_train
#         self.y_train = y_train
        
#     def classify_sample(self, sample: pd.Series) -> Any:
#         X_train_positive = self.X_train[y_train['outcome'] == 1]
#         X_train_negative = self.X_train[y_train['outcome'] == 0]
        
#         positive_classifiers = 0
#         negative_classifiers = 0
        
#         max_pos_intersection = 0
#         n_max_pos_intersections = 0

#         max_neg_intersection = 0
#         n_max_neg_intersections = 0

#         for _, pos_sample in X_train_positive.iterrows():
#             intersection = sample & pos_sample
#             if intersection.sum() > max_pos_intersection:
#                 max_pos_intersection = intersection.sum()
#                 n_max_pos_intersections = 1
#             elif intersection.sum() == max_pos_intersection:
#                 n_max_pos_intersections += 1

#         # Check for negative classifiers by intersecting sample with each negative object
#         for _, neg_sample in X_train_negative.iterrows():
#             intersection = sample & neg_sample
#             if intersection.sum() > max_neg_intersection:
#                 max_neg_intersection = intersection.sum()
#                 n_max_neg_intersections = 1
#             elif intersection.sum() == max_neg_intersection:
#                 n_max_neg_intersections += 1

#         if max_pos_intersection > max_neg_intersection:
#             return 1
#         elif max_neg_intersection > max_pos_intersection:
#             return 0
#         else:
#             if n_max_pos_intersections > n_max_neg_intersections:
#                 return 1
#             elif n_max_neg_intersections > n_max_pos_intersections:
#                 return 0
#             else:
#                 return 1


#     def predict(self, X_test: pd.DataFrame) -> List[Any]:
#         # List to store predictions for each test sample
#         predictions = []
        
#         # Iterate through each sample in X_test
#         for _, sample in X_test.iterrows():
#             # Classify the sample and append the result to predictions
#             prediction= self.classify_sample(sample)
#             predictions.append(prediction)
        
#         return predictions

In [71]:
class LazyClassifierFCA4:
    def __init__(self):
        self.X_train = None
        self.y_train = None
        # self.max_counter_examples = max_counter_examples

    def fit(self, X_train: pd.DataFrame, y_train: pd.Series) -> None:
        self.X_train = X_train
        self.y_train = y_train
        
    def classify_sample(self, sample: pd.Series) -> Any:
        # Split X_train into positive and negative classes
        X_train_positive = self.X_train[y_train['outcome'] == 1]
        X_train_negative = self.X_train[y_train['outcome'] == 0]
        
        positive_classifiers = 0
        negative_classifiers = 0
        pos = 0
        neg = 0
        
        # Function to check if intersection with a train sample is a positive classifier
        def is_positive_classifier(intersection):
            # Find samples in X_train_positive that contain the intersection
            num_positive = ((X_train_positive | (~intersection.astype(bool)).astype(np.int32)) == True).all(axis=1).sum()
            num_negative = ((X_train_negative | (~intersection.astype(bool)).astype(np.int32)) == True).all(axis=1).sum()
            return num_negative < 1 and num_positive > 1
        
        # Function to check if intersection is a negative classifier
        def is_negative_classifier(intersection):
            # Find samples in X_train_negative that contain the intersection
            num_positive = ((X_train_positive | (~intersection.astype(bool)).astype(np.int32)) == True).all(axis=1).sum()
            num_negative = ((X_train_negative | (~intersection.astype(bool)).astype(np.int32)) == True).all(axis=1).sum()
            return num_positive < 1 and num_negative > 1
        
        # Check for positive classifiers by intersecting sample with each positive object
        for _, pos_sample in X_train_positive.iterrows():
            intersection = sample & pos_sample
            if is_positive_classifier(intersection):
                pos += intersection.sum() / sample.shape[0]
                positive_classifiers += 1

        # Check for negative classifiers by intersecting sample with each negative object
        for _, neg_sample in X_train_negative.iterrows():
            intersection = sample & neg_sample
            if is_negative_classifier(intersection):
                neg += intersection.sum() / sample.shape[0]
                negative_classifiers += 1

        # Determine the class based on the number of classifiers
        if pos / X_train_positive.shape[0] > neg / X_train_negative.shape[0]:
            # print(f"sample {sample.name} is classified as 1, {positive_classifiers=}, {negative_classifiers=}")
            return 1, positive_classifiers  # Predict positive
            
        elif neg / X_train_negative.shape[0] > pos / X_train_positive.shape[0]:
            # print(f"sample {sample.name} is classified as 0, {positive_classifiers=}, {negative_classifiers=}")
            return 0, negative_classifiers  # Predict negative

        else:
            # If equal, you can decide on a rule, like defaulting to 0 or 1, or returning 'undetermined'
            # print(f"sample {sample.name} is classified as 1, default, {positive_classifiers=}, {negative_classifiers=}")
        
            return 1, positive_classifiers  # or 0, depending on the choice


    def predict(self, X_test: pd.DataFrame) -> List[Any]:
        # List to store predictions for each test sample
        predictions = []
        classifiers = []
        
        # Iterate through each sample in X_test
        for _, sample in X_test.iterrows():
            # Classify the sample and append the result to predictions
            prediction, n_clfs = self.classify_sample(sample)
            predictions.append(prediction)
            classifiers.append(n_clfs)
        
        self.avg_n_clfs = np.mean(classifiers)
        
        return predictions

In [72]:
classifier4 = LazyClassifierFCA4()

In [73]:
classifier4.fit(X_train, y_train)

In [74]:
%%time
y_pred4 = classifier4.predict(X_val)

CPU times: total: 56.3 s
Wall time: 2min 40s


In [75]:
classifier4.avg_n_clfs

109.04166666666667

In [76]:
evaluate_binary_classification(y_val, y_pred4)

True Positive (TP): 40
True Negative (TN): 102
False Positive (FP): 21
False Negative (FN): 29
True Negative Rate (Specificity): 0.8293
Negative Predictive Value (NPV): 0.7786
False Positive Rate (FPR): 0.1707
False Discovery Rate (FDR): 0.3443
Accuracy: 0.7396
Precision: 0.6557
Recall (True Positive Rate): 0.5797
F1 Score: 0.6154



True Positive (TP): 40

True Negative (TN): 102

False Positive (FP): 21

False Negative (FN): 29

True Negative Rate (Specificity): 0.8293

Negative Predictive Value (NPV): 0.7786

False Positive Rate (FPR): 0.1707

False Discovery Rate (FDR): 0.

Accuracy: 0.7396

Precision: 0.6557

Recall (True Positive Rate): 0.5797

F1 Score: 0.6154