In [1]:
import pandas as pd
import pickle

data = pd.read_csv('Groceries_dataset.csv')
data

Unnamed: 0,Member_number,Date,itemDescription
0,1808,21-07-2015,tropical fruit
1,2552,05-01-2015,whole milk
2,2300,19-09-2015,pip fruit
3,1187,12-12-2015,other vegetables
4,3037,01-02-2015,whole milk
...,...,...,...
38760,4471,08-10-2014,sliced cheese
38761,2022,23-02-2014,candy
38762,1097,16-04-2014,cake bar
38763,1510,03-12-2014,fruit/vegetable juice


In [2]:
# data preparation
transaction = data.groupby('Member_number')['itemDescription'].apply(list).reset_index()
transaction

Unnamed: 0,Member_number,itemDescription
0,1000,"[soda, canned beer, sausage, sausage, whole mi..."
1,1001,"[frankfurter, frankfurter, beef, sausage, whol..."
2,1002,"[tropical fruit, butter milk, butter, frozen v..."
3,1003,"[sausage, root vegetables, rolls/buns, deterge..."
4,1004,"[other vegetables, pip fruit, root vegetables,..."
...,...,...
3893,4996,"[dessert, salty snack, rolls/buns, misc. bever..."
3894,4997,"[tropical fruit, white wine, whole milk, curd,..."
3895,4998,"[rolls/buns, curd]"
3896,4999,"[bottled water, butter milk, tropical fruit, b..."


In [3]:
from mlxtend.frequent_patterns import apriori, association_rules
from mlxtend.preprocessing import TransactionEncoder

te = TransactionEncoder()
item_lists = transaction['itemDescription'].tolist()
filtered_transactions = [t for t in item_lists if len(t) > 1]
te_array = te.fit(filtered_transactions).transform(filtered_transactions)
df = pd.DataFrame(te_array, columns=te.columns_)

frequent_itemsets = apriori(df, min_support=0.1, use_colnames=True)
frequent_itemsets

Unnamed: 0,support,itemsets
0,0.119548,(beef)
1,0.158799,(bottled beer)
2,0.213699,(bottled water)
3,0.135967,(brown bread)
4,0.126475,(butter)
5,0.165213,(canned beer)
6,0.100564,(chicken)
7,0.18548,(citrus fruit)
8,0.114931,(coffee)
9,0.120831,(curd)


In [4]:
num_itemsets = len(frequent_itemsets)
rules = association_rules(frequent_itemsets, num_itemsets=num_itemsets, metric='support', min_threshold=0.1)

rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,representativity,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski
0,(whole milk),(bottled water),0.458184,0.213699,0.112365,0.245241,1.147597,1.0,0.014452,1.04179,0.237376,0.200825,0.040114,0.385526
1,(bottled water),(whole milk),0.213699,0.458184,0.112365,0.52581,1.147597,1.0,0.014452,1.142615,0.163569,0.200825,0.124815,0.385526
2,(other vegetables),(rolls/buns),0.376603,0.349666,0.146742,0.389646,1.114335,1.0,0.015056,1.065502,0.164589,0.253209,0.061475,0.404654
3,(rolls/buns),(other vegetables),0.349666,0.376603,0.146742,0.419663,1.114335,1.0,0.015056,1.074197,0.157772,0.253209,0.069072,0.404654
4,(other vegetables),(soda),0.376603,0.313494,0.124166,0.3297,1.051695,1.0,0.006103,1.024178,0.078849,0.219402,0.023607,0.362886
5,(soda),(other vegetables),0.313494,0.376603,0.124166,0.396072,1.051695,1.0,0.006103,1.032237,0.071601,0.219402,0.03123,0.362886
6,(whole milk),(other vegetables),0.458184,0.376603,0.19138,0.417693,1.109106,1.0,0.018827,1.070564,0.181562,0.297448,0.065913,0.462934
7,(other vegetables),(whole milk),0.376603,0.458184,0.19138,0.508174,1.109106,1.0,0.018827,1.101643,0.157802,0.297448,0.092265,0.462934
8,(other vegetables),(yogurt),0.376603,0.282966,0.120318,0.319482,1.12905,1.0,0.013752,1.05366,0.18335,0.223121,0.050927,0.372343
9,(yogurt),(other vegetables),0.282966,0.376603,0.120318,0.425204,1.12905,1.0,0.013752,1.084553,0.159406,0.223121,0.077961,0.372343


In [5]:
# deployment
with open('model.pkl', 'wb') as f:
    pickle.dump(rules, f)

with open('model.pkl', 'rb') as f:
    rules = pickle.load(f)

print(rules)

           antecedents         consequents  antecedent support  \
0         (whole milk)     (bottled water)            0.458184   
1      (bottled water)        (whole milk)            0.213699   
2   (other vegetables)        (rolls/buns)            0.376603   
3         (rolls/buns)  (other vegetables)            0.349666   
4   (other vegetables)              (soda)            0.376603   
5               (soda)  (other vegetables)            0.313494   
6         (whole milk)  (other vegetables)            0.458184   
7   (other vegetables)        (whole milk)            0.376603   
8   (other vegetables)            (yogurt)            0.376603   
9             (yogurt)  (other vegetables)            0.282966   
10              (soda)        (rolls/buns)            0.313494   
11        (rolls/buns)              (soda)            0.349666   
12        (whole milk)        (rolls/buns)            0.458184   
13        (rolls/buns)        (whole milk)            0.349666   
14        