In [None]:
import numpy as np
from sklearn.cluster import KMeans, AgglomerativeClustering
from sklearn.metrics import adjusted_rand_score
from sklearn.datasets import load_wine, load_iris
from sklearn.preprocessing import KBinsDiscretizer

In [1]:
def apriori(data, min_support=0.5):
    # 1. Count single items
    single_counts = {}
    for row in data:
        for item in row:
            if item in single_counts:
                single_counts[item] += 1
            else:
                single_counts[item] = 1

    # Filter items by support
    items = [item for item, count in single_counts.items() if count / len(data) >= min_support]
    itemsets = [{item} for item in items]

    final_itemsets = []
    while itemsets:
        new_itemsets = []
        for i in range(len(itemsets)):
            for j in range(i+1, len(itemsets)):
                # Form new potential itemset
                union = itemsets[i].union(itemsets[j])
                if len(union) == len(itemsets[i]) + 1:
                    # Count the support of the new itemset
                    count = sum(1 for row in data if union.issubset(row))
                    if count / len(data) >= min_support and union not in new_itemsets:
                        new_itemsets.append(union)

        if not new_itemsets:
            break

        final_itemsets.extend(new_itemsets)
        itemsets = new_itemsets

    return final_itemsets

In [None]:
def discretize_data(data):
    discretizer = KBinsDiscretizer(n_bins=3, encode='ordinal', strategy='uniform')
    data_discrete = discretizer.fit_transform(data)
    transactions = []
    for row in data_discrete:
        transactions.append(set(np.where(row == 1)[0]))
    return transactions

def create_features(transactions, itemsets):
    features = []
    for transaction in transactions:
        row_features = [int(itemset.issubset(transaction)) for itemset in itemsets]
        features.append(row_features)
    return features

In [None]:
# Load datasets
wine_data = load_wine().data
wine_true_labels = load_wine().target
iris_data = load_iris().data
iris_true_labels = load_iris().target

In [None]:
# Step 1: Discretize the datasets using 2 bins
wine_transactions = discretize_data(wine_data)
iris_transactions = discretize_data(iris_data)

In [None]:
# Step 2: Use apriori to get frequent itemsets
wine_itemsets = apriori(wine_transactions, min_support=0.2)
iris_itemsets = apriori(iris_transactions, min_support=0.2)

In [None]:
# Step 3: Convert itemsets to binary features
wine_features = create_features(wine_transactions, wine_itemsets)
iris_features = create_features(iris_transactions, iris_itemsets)

In [None]:
# Step 4: Use sklearn for clustering
kmeans_wine = KMeans(n_clusters=3, random_state=0).fit(wine_features)
kmeans_iris = KMeans(n_clusters=3, random_state=0).fit(iris_features)
wine_kmeans_ari = adjusted_rand_score(wine_true_labels, kmeans_wine.labels_)
iris_kmeans_ari = adjusted_rand_score(iris_true_labels, kmeans_iris.labels_)

agglo_wine = AgglomerativeClustering(n_clusters=3).fit(wine_features)
agglo_iris = AgglomerativeClustering(n_clusters=3).fit(iris_features)
wine_agglo_ari = adjusted_rand_score(wine_true_labels, agglo_wine.labels_)
iris_agglo_ari = adjusted_rand_score(iris_true_labels, agglo_iris.labels_)

In [4]:
print(f"Wine K-Means ARI with Apriori Features:\t{wine_kmeans_ari:0.3f}")
print(f"Iris K-Means ARI with Apriori Features\t{iris_kmeans_ari:0.3f}")

Wine K-Means ARI with Apriori Features: 0.25665226660424906
Iris K-Means ARI with Apriori Features: 0.28315560728971084

Wine Hierarchical ARI with Apriori Features: 0.30799712653768524
Iris Hierarchical ARI with Apriori Features: 0.29358053234609816


In [None]:
print(f"Wine Hierarchical ARI with Apriori Features:\t{wine_agglo_ari:0.3f}")
print(f"Iris Hierarchical ARI with Apriori Features:\t{iris_agglo_ari:0.3f}")