In [2]:
import pandas as pd
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
from tabulate import tabulate

In [3]:
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules

In [4]:
transactions = [
    ['bread', 'milk', 'eggs'],
    ['bread', 'butter', 'jam'],
    ['milk', 'butter', 'bread', 'jam'],
    ['bread', 'milk'],
    ['eggs', 'butter']
]

In [5]:
# Convert the transaction data into a one-hot encoded format
te = TransactionEncoder()
te_ary = te.fit(transactions).transform(transactions)
df = pd.DataFrame(te_ary, columns=te.columns_)
# Display the DataFrame

print(tabulate(df, headers='keys', tablefmt='psql'))

+----+---------+----------+--------+-------+--------+
|    |   bread |   butter |   eggs |   jam |   milk |
|----+---------+----------+--------+-------+--------|
|  0 |       1 |        0 |      1 |     0 |      1 |
|  1 |       1 |        1 |      0 |     1 |      0 |
|  2 |       1 |        1 |      0 |     1 |      1 |
|  3 |       1 |        0 |      0 |     0 |      1 |
|  4 |       0 |        1 |      1 |     0 |      0 |
+----+---------+----------+--------+-------+--------+


In [6]:
#Support: The algorithm works based on the concept of support. 
#Support is a measure of how frequently an itemset (a combination of items) appears in the dataset.

#Frequent Itemsets: Apriori identifies frequent itemsets, which are sets of items that 
#have support greater than a specified minimum support threshold.


#Calculate Support:

#Count the occurrences of each candidate itemset in the dataset.
#Discard itemsets with support below the minimum threshold.

#Support formula
#
#Support(X) = No.of transactions containing X / Total no.of transactions.
#

# Count occurrences of itemset {bread, milk}
count_bread_milk = sum(['bread' in transaction and 'milk' in transaction for transaction in transactions])

print('count_bread_milk:' + str(count_bread_milk))

# Total number of transactions
total_transactions = len(transactions)

print('total_transactions:'+str(total_transactions))


# Calculate support for itemset {bread, milk}
support_bread_milk = count_bread_milk / total_transactions

print("Support for bread and milk: {:.2f}".format(support_bread_milk))




count_bread_milk:3
total_transactions:5
Support for bread and milk: 0.60


In [7]:
# Apply Apriori algorithm to find frequent itemsets

frequent_itemsets = apriori(df, min_support=0.2, use_colnames=True)
# Display the discovered association rules
print("Frequent Itemsets:")
print(tabulate(frequent_itemsets, headers='keys', tablefmt='psql'))

Frequent Itemsets:
+----+-----------+-----------------------------------------------+
|    |   support | itemsets                                      |
|----+-----------+-----------------------------------------------|
|  0 |       0.8 | frozenset({'bread'})                          |
|  1 |       0.6 | frozenset({'butter'})                         |
|  2 |       0.4 | frozenset({'eggs'})                           |
|  3 |       0.4 | frozenset({'jam'})                            |
|  4 |       0.6 | frozenset({'milk'})                           |
|  5 |       0.4 | frozenset({'butter', 'bread'})                |
|  6 |       0.2 | frozenset({'eggs', 'bread'})                  |
|  7 |       0.4 | frozenset({'bread', 'jam'})                   |
|  8 |       0.6 | frozenset({'milk', 'bread'})                  |
|  9 |       0.2 | frozenset({'butter', 'eggs'})                 |
| 10 |       0.4 | frozenset({'butter', 'jam'})                  |
| 11 |       0.2 | frozenset({'butter', 'mi

In [8]:

# Generate association rules based on frequent itemsets
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.2)

print("\nAssociation Rules:")
print(tabulate(rules, headers='keys', tablefmt='psql'))



Association Rules:
+----+----------------------------------------+----------------------------------------+----------------------+----------------------+-----------+--------------+----------+------------+--------------+-----------------+
|    | antecedents                            | consequents                            |   antecedent support |   consequent support |   support |   confidence |     lift |   leverage |   conviction |   zhangs_metric |
|----+----------------------------------------+----------------------------------------+----------------------+----------------------+-----------+--------------+----------+------------+--------------+-----------------|
|  0 | frozenset({'butter'})                  | frozenset({'bread'})                   |                  0.6 |                  0.8 |       0.4 |     0.666667 | 0.833333 |      -0.08 |      0.6     |       -0.333333 |
|  1 | frozenset({'bread'})                   | frozenset({'butter'})                  |                

In [9]:
'''
In the context of association rule mining and the Apriori algorithm, the terms "antecedents" and "consequents" refer to the two parts of an association rule. Let's break down their meanings:

Antecedents:

The antecedent is the part of the rule that appears before the arrow (->). It represents the items or conditions that are present in the data and are used to predict the occurrence of the consequent.
In simpler terms, the antecedent is the "if" part of the rule.
Consequents:

The consequent is the part of the rule that appears after the arrow (->). It represents the item or condition that is predicted or implied based on the presence of the antecedent.
In simpler terms, the consequent is the "then" part of the rule.
Let's illustrate this with an example:

Suppose we have an association rule:

Antecedents
→
Consequents
Antecedents→Consequents

If we have a rule like:

{
Bread
,
Milk
}
→
{
Eggs
}
{Bread,Milk}→{Eggs}

Antecedents: {Bread, Milk}
Consequents: {Eggs}
This rule suggests that if a customer buys Bread and Milk (the antecedents), they are likely to also buy Eggs (the consequent).

'''
for index, row in rules.iterrows():
    antecedent = set(row['antecedents'])
    consequent = set(row['consequents'])
    print(f"Rule: {antecedent} -> {consequent}")
    
    
    

Rule: {'butter'} -> {'bread'}
Rule: {'bread'} -> {'butter'}
Rule: {'eggs'} -> {'bread'}
Rule: {'bread'} -> {'eggs'}
Rule: {'bread'} -> {'jam'}
Rule: {'jam'} -> {'bread'}
Rule: {'milk'} -> {'bread'}
Rule: {'bread'} -> {'milk'}
Rule: {'butter'} -> {'eggs'}
Rule: {'eggs'} -> {'butter'}
Rule: {'butter'} -> {'jam'}
Rule: {'jam'} -> {'butter'}
Rule: {'butter'} -> {'milk'}
Rule: {'milk'} -> {'butter'}
Rule: {'eggs'} -> {'milk'}
Rule: {'milk'} -> {'eggs'}
Rule: {'milk'} -> {'jam'}
Rule: {'jam'} -> {'milk'}
Rule: {'butter', 'bread'} -> {'jam'}
Rule: {'butter', 'jam'} -> {'bread'}
Rule: {'bread', 'jam'} -> {'butter'}
Rule: {'butter'} -> {'bread', 'jam'}
Rule: {'bread'} -> {'butter', 'jam'}
Rule: {'jam'} -> {'butter', 'bread'}
Rule: {'butter', 'milk'} -> {'bread'}
Rule: {'butter', 'bread'} -> {'milk'}
Rule: {'milk', 'bread'} -> {'butter'}
Rule: {'butter'} -> {'milk', 'bread'}
Rule: {'milk'} -> {'butter', 'bread'}
Rule: {'bread'} -> {'butter', 'milk'}
Rule: {'eggs', 'milk'} -> {'bread'}
Rule: {'eg

In [10]:
'''https://chat.openai.com/share/39e88ea4-7bb6-4bfb-8257-8ae12b06c809'''

'https://chat.openai.com/share/39e88ea4-7bb6-4bfb-8257-8ae12b06c809'