<a href="https://colab.research.google.com/github/rahmani3101/Machine-Learning-/blob/main/Apriori.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>


Apriori



In [None]:
transactions = [
    ['Milk', 'Bread', 'Butter'],
    ['Bread', 'Butter'],
    ['Milk', 'Bread'],
    ['Milk', 'Bread', 'Butter'],
    ['Bread', 'Butter', 'Jam']
]

In [None]:
min_support = 0.4     # Example: itemset must appear in at least 40% of transactions
min_confidence = 0.7  # Example: rules must be at least 70% confident

In [None]:
def get_unique_items(transactions):
    unique_items = set()
    for transaction in transactions:
        for item in transaction:
            unique_items.add(item)  # Add item to set (duplicate items are automatically handled)

    return sorted(unique_items)  # Sorting the final set (this is O(k log k), where k is the unique items count)

In [None]:
def get_support(itemset, transactions):
    count = 0
    for transaction in transactions:
        if all(item in transaction for item in itemset):
            count += 1
    return count / len(transactions)

In [None]:
# Function to generate new candidate itemsets
def generate_candidates(prev_frequent, k):
    candidates = []
    length = len(prev_frequent)
    for i in range(length):
        for j in range(i + 1, length):
            l1 = list(prev_frequent[i])[:k - 2]
            l2 = list(prev_frequent[j])[:k - 2]
            l1.sort()
            l2.sort()
            if l1 == l2:
                candidate = sorted(list(set(prev_frequent[i]) | set(prev_frequent[j])))
                if candidate not in candidates:
                    candidates.append(candidate)
    return candidates

In [None]:

def apriori(transactions, min_support):
    # Step 1: Create initial candidate set
    itemsets = get_unique_items(transactions)
    frequent_itemsets = []
    support_data = {}   # Move this outside the loop
    k = 1

    while itemsets:
        current_frequent = []
        for item in itemsets:
            support = get_support(item, transactions)
            if support >= min_support:
                current_frequent.append(item)
                support_data[tuple(sorted(item))] = support  # store all supports

        if not current_frequent:
            break

        frequent_itemsets.extend(current_frequent)

        # Step 3: Generate new candidates
        k += 1
        itemsets = generate_candidates(current_frequent, k)

    return frequent_itemsets, support_data

In [None]:
def generate_rules(frequent_itemsets, support_data, min_confidence):
    rules = []
    for itemset in frequent_itemsets:
        if len(itemset) > 1:
            for i in range(len(itemset)):
                antecedent = itemset[:i] + itemset[i + 1:]
                consequent = [itemset[i]]
                if antecedent:
                    support_itemset = support_data[tuple(sorted(itemset))]
                    support_antecedent = support_data.get(tuple(sorted(antecedent)), 0)
                    confidence = support_itemset / support_antecedent if support_antecedent > 0 else 0
                    if confidence >= min_confidence:
                        rules.append((antecedent, consequent, confidence))
    return rules

In [None]:
# Run Apriori
frequent_itemsets, support_data = apriori(transactions, min_support)

In [None]:
rules = generate_rules(frequent_itemsets, support_data, min_confidence)

In [None]:
print("\nFrequent Itemsets:")
for itemset in frequent_itemsets:
    print(f"{itemset} -> support = {support_data[tuple(itemset)]:.2f}")


Frequent Itemsets:
['Bread'] -> support = 1.00
['Butter'] -> support = 0.80
['Milk'] -> support = 0.60
['Bread', 'Butter'] -> support = 0.80
['Bread', 'Milk'] -> support = 0.60
['Butter', 'Milk'] -> support = 0.40
['Bread', 'Butter', 'Milk'] -> support = 0.40


In [None]:
print("\nAssociation Rules:")
for rule in rules:
    print(f"{rule[0]} => {rule[1]}, confidence = {rule[2]:.2f}")


Association Rules:
['Butter'] => ['Bread'], confidence = 1.00
['Bread'] => ['Butter'], confidence = 0.80
['Milk'] => ['Bread'], confidence = 1.00
['Butter', 'Milk'] => ['Bread'], confidence = 1.00
