Apriori 

In [5]:
pip install mlxtend

Defaulting to user installation because normal site-packages is not writeable
Collecting mlxtend
  Downloading mlxtend-0.23.1-py3-none-any.whl (1.4 MB)
[K     |████████████████████████████████| 1.4 MB 3.2 MB/s eta 0:00:01
Installing collected packages: mlxtend
Successfully installed mlxtend-0.23.1
Note: you may need to restart the kernel to use updated packages.


In [2]:
from itertools import combinations
class Apriori:
    def __init__(self, min_support, min_confidence):
        self.min_support = min_support
        self.min_confidence = min_confidence
        self.itemsets = {}
        self.transactions = []
    def load_data(self, dataset):
        self.transactions = dataset
    def get_itemsets(self, k):
        if k == 1:
            itemsets = {}
            for transaction in self.transactions:
                for item in transaction:
                    itemsets[item] = itemsets.get(item, 0) + 1
            return {frozenset([item]): count for item, count in itemsets.items() if count >= self.min_support}
        else:
            itemsets = {}
            prev_itemsets = self.itemsets[k - 1]
            for itemset1, support1 in prev_itemsets.items():
                for itemset2, support2 in prev_itemsets.items():
                    if len(itemset1.union(itemset2)) == k and len(itemset1.intersection(itemset2)) == k - 2:
                        merged_itemset = itemset1.union(itemset2)
                        if merged_itemset not in itemsets:
                            itemsets[merged_itemset] = 0
                            for transaction in self.transactions:
                                if merged_itemset.issubset(transaction):
                                    itemsets[merged_itemset] += 1
            return {itemset: support for itemset, support in itemsets.items() if support >= self.min_support}
    def generate_rules(self):
        rules = []
        for k, itemsets in self.itemsets.items():
            if k > 1:
                for itemset in itemsets.keys():
                    for i in range(1, k):
                        for antecedent in combinations(itemset, i):
                            antecedent = frozenset(antecedent)
                            consequent = itemset.difference(antecedent)
                            confidence = itemsets[itemset] / self.itemsets[len(antecedent)][antecedent]
                            if confidence >= self.min_confidence:
                                rules.append((antecedent, consequent, confidence))
        return rules
    def fit(self):
        k = 1
        self.itemsets[k] = self.get_itemsets(k)
        while self.itemsets[k]:
            k += 1
            self.itemsets[k] = self.get_itemsets(k)

    def print_itemsets(self):
        for k, itemsets in self.itemsets.items():
            print(f"Itemsets of size {k}:")
            for itemset, support in itemsets.items():
                print(f"{list(itemset)} - support: {support}")
    def print_rules(self):
        rules = self.generate_rules()
        for antecedent, consequent, confidence in rules:
            print(f"{list(antecedent)} => {list(consequent)} - confidence: {confidence}")
# Example usage:
if __name__ == "__main__":
    # Example dataset
    dataset = [
        {"bread", "milk"},
        {"bread", "diaper", "beer", "egg"},
        {"milk", "diaper", "beer", "cola"},
        {"bread", "milk", "diaper", "beer"},
        {"bread", "milk", "diaper", "cola"}
    ]
    apriori = Apriori(min_support=0.5, min_confidence=0.5)
    apriori.load_data(dataset)
    apriori.fit()
    print("Frequent Itemsets:")
    apriori.print_itemsets()
    print("\nAssociation Rules:")
    apriori.print_rules()

Frequent Itemsets:
Itemsets of size 1:
['milk'] - support: 4
['bread'] - support: 4
['beer'] - support: 3
['diaper'] - support: 4
['egg'] - support: 1
['cola'] - support: 2
Itemsets of size 2:
['milk', 'bread'] - support: 3
['beer', 'milk'] - support: 2
['milk', 'diaper'] - support: 3
['milk', 'cola'] - support: 2
['beer', 'bread'] - support: 2
['diaper', 'bread'] - support: 3
['egg', 'bread'] - support: 1
['bread', 'cola'] - support: 1
['beer', 'diaper'] - support: 3
['beer', 'egg'] - support: 1
['beer', 'cola'] - support: 1
['diaper', 'egg'] - support: 1
['diaper', 'cola'] - support: 2
Itemsets of size 3:
['beer', 'milk', 'bread'] - support: 1
['milk', 'diaper', 'bread'] - support: 2
['milk', 'bread', 'cola'] - support: 1
['beer', 'milk', 'diaper'] - support: 2
['beer', 'milk', 'cola'] - support: 1
['milk', 'diaper', 'cola'] - support: 2
['beer', 'diaper', 'bread'] - support: 2
['beer', 'egg', 'bread'] - support: 1
['diaper', 'bread', 'egg'] - support: 1
['diaper', 'bread', 'cola'] -

In [6]:
class Eclat:
    def __init__(self, min_support):
        self.min_support = min_support
        self.itemsets = {}

    def load_data(self, dataset):
        self.transactions = dataset

    def get_itemsets(self, items, prefix, support):
        for i, item in enumerate(sorted(items)):
            new_prefix = prefix + [item]
            new_support = support
            for transaction in self.transactions:
                if set(new_prefix).issubset(transaction):
                    new_support += 1
            if new_support >= self.min_support:
                self.itemsets[frozenset(new_prefix)] = new_support
                new_items = items[i+1:]
                self.get_itemsets(new_items, new_prefix, new_support)

    def fit(self):
        self.itemsets = {}
        items = set()
        for transaction in self.transactions:
            items |= set(transaction)
        self.get_itemsets(list(items), [], 0)

    def print_itemsets(self):
        for itemset, support in self.itemsets.items():
            print(f"{list(itemset)} - support: {support}")


# Example usage:
if __name__ == "__main__":
    # Example dataset
    dataset = [
        {"bread", "milk"},
        {"bread", "diaper", "beer", "egg"},
        {"milk", "diaper", "beer", "cola"},
        {"bread", "milk", "diaper", "beer"},
        {"bread", "milk", "diaper", "cola"}
    ]

    eclat = Eclat(min_support=2)
    eclat.load_data(dataset)
    eclat.fit()

    print("Frequent Itemsets:")
    eclat.print_itemsets()


Frequent Itemsets:
['beer'] - support: 18
['cola', 'beer'] - support: 5
['egg', 'beer'] - support: 5
['milk', 'beer'] - support: 5
['bread', 'beer'] - support: 12
['bread', 'cola', 'beer'] - support: 5
['bread', 'egg', 'beer'] - support: 6
['bread'] - support: 4
['cola', 'bread'] - support: 6
['egg', 'bread'] - support: 5
['milk', 'bread'] - support: 7
['cola'] - support: 4
['egg', 'cola'] - support: 2
['diaper'] - support: 4
['diaper', 'beer'] - support: 10
['cola', 'diaper'] - support: 6
['milk'] - support: 4


In [16]:
from mlxtend.frequent_patterns import fpgrowth
import pandas as pd

# Load the dataset
data = pd.read_csv("Sample.csv", header=None)

# Convert the dataset to a one-hot encoded format
encoded_data = pd.get_dummies(data)

# Convert the one-hot encoded DataFrame to boolean type
boolean_encoded_data = encoded_data.astype(bool)

# Find frequent itemsets using FP-Growth
frequent_itemsets = fpgrowth(boolean_encoded_data, min_support= 0.1, use_colnames=True)


print("Frequent Itemsets:")
print(frequent_itemsets)


Frequent Itemsets:
    support                                itemsets
0  0.181818                   (1_beer,potato chips)
1  0.181818  (1_egg,flour,butter,beer,potato chips)


In [15]:
from mlxtend.frequent_patterns import fpgrowth, association_rules
import pandas as pd

# Load the dataset
data = pd.read_csv("Sample.csv", header=None)

# Convert the dataset to a one-hot encoded format
encoded_data = pd.get_dummies(data)

# Convert the one-hot encoded DataFrame to boolean type
boolean_encoded_data = encoded_data.astype(bool)

# Find frequent itemsets using FP-Growth
frequent_itemsets = fpgrowth(boolean_encoded_data, min_support=0.1, use_colnames=True)

print("Frequent Itemsets:")
print(frequent_itemsets)

# Find association rules
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=1.5)

# Reset index
rules.reset_index(drop=True, inplace=False)

print("Association Rulesac:")
print(rules)


Frequent Itemsets:
    support                                itemsets
0  0.181818                   (1_beer,potato chips)
1  0.181818  (1_egg,flour,butter,beer,potato chips)
Association Rules:
Empty DataFrame
Columns: [antecedents, consequents, antecedent support, consequent support, support, confidence, lift, leverage, conviction, zhangs_metric]
Index: []


In [16]:
from mlxtend.frequent_patterns import fpgrowth, association_rules
import pandas as pd

# Load the dataset
data = pd.read_csv("Sample.csv", header=None)

# Convert the dataset to a one-hot encoded format
encoded_data = pd.get_dummies(data)

# Convert the one-hot encoded DataFrame to boolean type
boolean_encoded_data = encoded_data.astype(bool)

# Find frequent itemsets using FP-Growth
frequent_itemsets = fpgrowth(boolean_encoded_data, min_support=0.05, use_colnames=True)

print("Frequent Itemsets:")
print(frequent_itemsets)

# Find association rules
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.2)

print("Association Rules:")
print(rules)


Frequent Itemsets:
     support                                     itemsets
0   0.090909                              (0_transaction)
1   0.090909                                  (1_Product)
2   0.181818                        (1_beer,potato chips)
3   0.090909                                        (0_1)
4   0.090909                                        (0_2)
5   0.090909                  (1_egg,flour,butter,cheese)
6   0.181818       (1_egg,flour,butter,beer,potato chips)
7   0.090909                                        (0_3)
8   0.090909                              (1_wine,cheese)
9   0.090909                                        (0_4)
10  0.090909                                        (0_5)
11  0.090909                             (1_potato chips)
12  0.090909                                        (0_6)
13  0.090909             (1_egg,flour,butter,wine,cheese)
14  0.090909                                        (0_7)
15  0.090909                                        (