In [1]:
# Association Rule Learning
"""
Market Basket Analysis
    - Discover frequent itemsets
    - Generate association rules
"""

transactions = [
    {"Bread","Milk","Egg","Butter","Salt","Apple"},
    {"Bread", "Milk","Egg","Apple"},
    {"Bread", "Milk","Butter","Apple"},
    {"Milk", "Egg","Butter","Apple"},
    {"Bread","Egg","Salt"},
    {"Bread", "Milk","Egg","Apple"},
]
print(transactions)

[{'Milk', 'Bread', 'Egg', 'Salt', 'Apple', 'Butter'}, {'Bread', 'Milk', 'Apple', 'Egg'}, {'Bread', 'Milk', 'Apple', 'Butter'}, {'Egg', 'Milk', 'Apple', 'Butter'}, {'Bread', 'Salt', 'Egg'}, {'Bread', 'Milk', 'Apple', 'Egg'}]


In [2]:
# To print transactions in dataframe
import pandas as pd
items=sorted(set().union(*transactions))

data=[]
for t in transactions:
    row={item:(1 if item in t else 0) for item in items}
    data.append(row)
    
df=pd.DataFrame(data)
df.index=range (1,len(df)+1)
df.index.name="Tranasaction"
print(df)

            Apple  Bread  Butter  Egg  Milk  Salt
Tranaction                                       
1               1      1       1    1     1     1
2               1      1       0    1     1     0
3               1      1       1    0     1     0
4               1      0       1    1     1     0
5               0      1       0    1     0     1
6               1      1       0    1     1     0


In [6]:
# Support and Confidence Functions

def support(itemset, tranactions):
    return sum(1 for t in transactions if itemset.issubset(t))
len(transactions)

def confidence(X, Y, transactions):
    return support(X.union(Y), transactions) / support(X, transactions)

print("Support(Bread):", support({"Bread"}, transactions))

print("Confidence(Bread -> Milk):", confidence({"Bread"}, {"Milk"}, transactions))

print("Confidence(Bread, Milk -> Egg):", confidence({"Bread", "Milk"}, {"Egg"}, transactions))

Support(Bread): 5
Confidence(Bread -> Milk): 0.8
Confidence(Bread, Milk -> Egg): 0.75


In [7]:
# Apriori Principle
"""
Aprioiri Principle:
    -If an itemset is frequent, all its subsets must be frequent
"""

#Minimum support threshold
min_support = 0.5

def support(itemset):
    return sum(1 for t in transactions if itemset.issubset(t))/len(transactions)

#Apriori principle demonstration
#Step 1 - FInd frequent 1-itemsets
items = sorted(set().union(*transactions))
freq_1 = [{i} for i in items if support({i})>=min_support]
print("Frequent 1-itemsets: ")
for i in freq_1:
    print(i, "Support: ",support(i))
#Step 2 - Generate candidate 2-itemsets
from itertools import combinations
candidates_2 = [set(c) for  c in combinations([list(i)[0] for i in freq_1],2)]
#Apply apriori pruning 
freq_2 = [c for c in candidates_2 if support(c)>=min_support]
print("\nFrequent 2-itemsets(After Apriori pruning): ")
for c in freq_2:
    print(c,"Support: ",support(c))

Frequent 1-itemsets: 
{'Apple'} Support:  0.8333333333333334
{'Bread'} Support:  0.8333333333333334
{'Butter'} Support:  0.5
{'Egg'} Support:  0.8333333333333334
{'Milk'} Support:  0.8333333333333334

Frequent 2-itemsets(After Apriori pruning): 
{'Bread', 'Apple'} Support:  0.6666666666666666
{'Apple', 'Butter'} Support:  0.5
{'Egg', 'Apple'} Support:  0.6666666666666666
{'Milk', 'Apple'} Support:  0.8333333333333334
{'Bread', 'Egg'} Support:  0.6666666666666666
{'Bread', 'Milk'} Support:  0.6666666666666666
{'Milk', 'Butter'} Support:  0.5
{'Egg', 'Milk'} Support:  0.6666666666666666
