# Association Rule Mining

In [6]:
import pandas as pd
pd.set_option("max_colwidth", 150)

# Loading online retail dataset
f = "https://github.com/cs6220/cs6220.spring2019/raw/master/data/Online%20Retail.xlsx"
df = pd.read_excel(f)

# Transform transactions into baskets of items
basket = (df[df["Country"] == "United Kingdom"]
          .groupby(["InvoiceNo", "Description"])["Quantity"]
          .sum().unstack().reset_index().fillna(0)
          .set_index("InvoiceNo"))
# Convert counts to booleans
basket_sets = basket.applymap(lambda x: 1 if x >= 1 else 0)

## 1.1 Frequent Itemset Generation
### What are the top 5 1-itemsets with the highest support?

In [39]:
from mlxtend.frequent_patterns import apriori

frequent_itemsets = apriori(basket_sets, min_support=0.025, use_colnames=True)
frequent_itemsets
frequent_itemsets['length'] = frequent_itemsets['itemsets'].apply(lambda x: len(x))
frequent_itemsets
frequent_one_itemsets = frequent_itemsets[(frequent_itemsets['length'] == 1)]
frequent_one_itemsets
top_5_one_itemsets = frequent_one_itemsets.sort_values(by='support', ascending=False)
top_5_one_itemsets
top_5_one_itemsets = top_5_one_itemsets.head()
top_5_one_itemsets

Unnamed: 0,support,itemsets,length
123,0.098276,(WHITE HANGING HEART T-LIGHT HOLDER),1
54,0.087931,(JUMBO BAG RED RETROSPOT),1
99,0.076452,(REGENCY CAKESTAND 3 TIER),1
87,0.072323,(PARTY BUNTING),1
72,0.063158,(LUNCH BAG RED RETROSPOT),1


### What are the top 5 2-itemsets with the highest support?

In [40]:
frequent_two_itemsets = frequent_itemsets[(frequent_itemsets['length'] == 2)]
frequent_two_itemsets
top_5_two_itemsets = frequent_two_itemsets.sort_values(by='support', ascending=False).head()
top_5_two_itemsets

Unnamed: 0,support,itemsets,length
132,0.035617,"(JUMBO BAG RED RETROSPOT, JUMBO BAG PINK POLKADOT)",2
130,0.031806,"(ROSES REGENCY TEACUP AND SAUCER , GREEN REGENCY TEACUP AND SAUCER)",2
134,0.03167,"(JUMBO BAG RED RETROSPOT, JUMBO STORAGE BAG SUKI)",2
133,0.029809,"(JUMBO SHOPPER VINTAGE RED PAISLEY, JUMBO BAG RED RETROSPOT)",2
135,0.027541,"(LUNCH BAG BLACK SKULL., LUNCH BAG RED RETROSPOT)",2


### What is the highest support value for the 1-itemsets?

In [41]:
top_5_one_itemsets[:1]

Unnamed: 0,support,itemsets,length
123,0.098276,(WHITE HANGING HEART T-LIGHT HOLDER),1



### What is the highest support value for the 2-itemsets?


In [42]:
top_5_two_itemsets[:1]

Unnamed: 0,support,itemsets,length
132,0.035617,"(JUMBO BAG RED RETROSPOT, JUMBO BAG PINK POLKADOT)",2


## 1.2 Association Rule Generation
### What are the top 5 association rules?

In [43]:
from mlxtend.frequent_patterns import association_rules

### What items make up one of the top association rules? Search online for the items (or at least items with the same name). Do you think they are likely to be bought together?