In [41]:
import pandas as pd
from itertools import combinations

In [42]:
data = {

    'TransactionID': [1, 2, 3, 4, 5]

    ,'ItemsPurchased': ['Bread,Butter,Milk','Bread,Diaper,Beer,Milk','Milk,Diaper,Beer','Bread,Butter','Bread,Butter,Diaper,Milk']

}

In [43]:
df = pd.DataFrame(data)

In [44]:
df.head()

Unnamed: 0,TransactionID,ItemsPurchased
0,1,"Bread,Butter,Milk"
1,2,"Bread,Diaper,Beer,Milk"
2,3,"Milk,Diaper,Beer"
3,4,"Bread,Butter"
4,5,"Bread,Butter,Diaper,Milk"


In [45]:
all_words_rows = df['ItemsPurchased'].tolist()

In [46]:
all_word = set()
for transaction in all_words_rows:
    all_word.update(transaction.split(','))

In [47]:
df['ItemsPurchased'] = df['ItemsPurchased'].apply(lambda x : x.split(','))

In [48]:
all_words_rows = df['ItemsPurchased'].tolist()
print(all_words_rows)

[['Bread', 'Butter', 'Milk'], ['Bread', 'Diaper', 'Beer', 'Milk'], ['Milk', 'Diaper', 'Beer'], ['Bread', 'Butter'], ['Bread', 'Butter', 'Diaper', 'Milk']]


In [49]:
all_words = set()


In [51]:
for i in all_words_rows:
    for word in i:
        all_words.add(word)



In [52]:
all_words

{'Beer', 'Bread', 'Butter', 'Diaper', 'Milk'}

In [53]:
def generate_itemsets(all_words):

    all_itemsets = set()
    for x in  range(1, len(all_words)+1):
        all_itemsets.update(combinations(all_words, x))
    return(all_itemsets)

In [55]:
all_itemset = generate_itemsets(all_words)

In [58]:
example_itemset = ['Beer', 'Bread', 'Butter', 'Diaper']

In [85]:
def generate_rules(item_set):
    rules = []

    itemset_length = len(item_set)
    set_itemset = set(item_set)

    for n in range(1, itemset_length):
        for combo in combinations(set_itemset, n):
            antecedent = set(combo)
            consequent = set_itemset - antecedent
            rules.append((antecedent, consequent, set_itemset))
    return rules

In [61]:
get_itemsets = generate_itemsets(all_words)

In [77]:
binary_df = pd.DataFrame(columns=list(all_words))

In [78]:
binary_df.head()

Unnamed: 0,Butter,Diaper,Bread,Milk,Beer


In [79]:
for idx, row in enumerate(all_words_rows):
    for word in row:
        binary_df.loc[idx, word] = 1

In [80]:
binary_df.head()

Unnamed: 0,Butter,Diaper,Bread,Milk,Beer
0,1.0,,1.0,1.0,
1,,1.0,1.0,1.0,1.0
2,,1.0,,1.0,1.0
3,1.0,,1.0,,
4,1.0,1.0,1.0,1.0,


In [82]:
binary_df = binary_df.map(lambda x : True if x == 1 else False)

In [83]:
binary_df.dtypes

Butter    bool
Diaper    bool
Bread     bool
Milk      bool
Beer      bool
dtype: object

In [86]:
all_rules = []

for itemset in all_itemset:
    all_rules.extend(generate_rules(itemset))

In [90]:
support_dict = {}

for row in all_itemset:
    total_cols = len(row)
    for idx in range(len(binary_df)):
        if (binary_df.loc[idx, list(row)]).sum() == total_cols:
            support_dict[row] = support_dict.get(row, 0) +1


In [92]:
support_list = [(set(k), v / len(binary_df)) for k, v in support_dict.items()]

In [93]:
support_list

[({'Bread', 'Diaper', 'Milk'}, 0.4),
 ({'Beer', 'Diaper', 'Milk'}, 0.4),
 ({'Bread', 'Butter', 'Diaper', 'Milk'}, 0.2),
 ({'Beer', 'Bread', 'Diaper', 'Milk'}, 0.2),
 ({'Bread', 'Butter'}, 0.6),
 ({'Beer', 'Diaper'}, 0.4),
 ({'Bread'}, 0.8),
 ({'Butter', 'Milk'}, 0.4),
 ({'Beer', 'Bread'}, 0.2),
 ({'Milk'}, 0.8),
 ({'Beer', 'Bread', 'Diaper'}, 0.2),
 ({'Butter', 'Diaper'}, 0.2),
 ({'Bread', 'Butter', 'Diaper'}, 0.2),
 ({'Butter', 'Diaper', 'Milk'}, 0.2),
 ({'Beer', 'Bread', 'Milk'}, 0.2),
 ({'Butter'}, 0.6),
 ({'Bread', 'Butter', 'Milk'}, 0.4),
 ({'Beer'}, 0.4),
 ({'Bread', 'Diaper'}, 0.4),
 ({'Diaper', 'Milk'}, 0.6),
 ({'Diaper'}, 0.6),
 ({'Beer', 'Milk'}, 0.4),
 ({'Bread', 'Milk'}, 0.6)]

In [None]:
col = ['antecedent', 'consequent', 'itemset']
rules_df = pd.DataFrame(all_rules, columns=col)


In [98]:
rules_df

Unnamed: 0,antecedent,consequent,itemset
0,{Butter},"{Bread, Milk, Beer}","{Butter, Bread, Milk, Beer}"
1,{Bread},"{Butter, Milk, Beer}","{Butter, Bread, Milk, Beer}"
2,{Milk},"{Butter, Bread, Beer}","{Butter, Bread, Milk, Beer}"
3,{Beer},"{Butter, Bread, Milk}","{Butter, Bread, Milk, Beer}"
4,"{Butter, Bread}","{Milk, Beer}","{Butter, Bread, Milk, Beer}"
...,...,...,...
175,"{Diaper, Beer}",{Butter},"{Butter, Diaper, Beer}"
176,{Milk},{Beer},"{Milk, Beer}"
177,{Beer},{Milk},"{Milk, Beer}"
178,{Bread},{Milk},"{Bread, Milk}"


In [99]:
def get_support_value(itemset):
    for support_metric in support_list:
        if support_metric[0] == itemset:
            return support_metric[1]
    
    return 0

In [100]:
rules_df['antecedent_support'] = rules_df['antecedent'].apply(get_support_value)
rules_df['consequent_support'] = rules_df['consequent'].apply(get_support_value)
rules_df['itemset_support'] = rules_df['itemset'].apply(get_support_value)

### Confidence

Support(A & B) / Support(A)


In [101]:
rules_df['Confidence'] = rules_df['itemset_support'] / rules_df['antecedent_support']

### Lift

(Support(A & B) / Support(A)) / Support(B)


In [102]:
rules_df['Lift'] = rules_df['Confidence'] / rules_df['consequent_support']

In [103]:
rules_df.head()

Unnamed: 0,antecedent,consequent,itemset,antecedent_support,consequent_support,itemset_support,Confidence,Lift
0,{Butter},"{Bread, Milk, Beer}","{Butter, Bread, Milk, Beer}",0.6,0.2,0.0,0.0,0.0
1,{Bread},"{Butter, Milk, Beer}","{Butter, Bread, Milk, Beer}",0.8,0.0,0.0,0.0,
2,{Milk},"{Butter, Bread, Beer}","{Butter, Bread, Milk, Beer}",0.8,0.0,0.0,0.0,
3,{Beer},"{Butter, Bread, Milk}","{Butter, Bread, Milk, Beer}",0.4,0.4,0.0,0.0,0.0
4,"{Butter, Bread}","{Milk, Beer}","{Butter, Bread, Milk, Beer}",0.6,0.4,0.0,0.0,0.0
