In [1]:
! pip install --index-url https://test.pypi.org/simple/ PyARMViz

Looking in indexes: https://test.pypi.org/simple/
Collecting PyARMViz
  Obtaining dependency information for PyARMViz from https://test-files.pythonhosted.org/packages/ab/15/879fc7ca0904e5080c9ca7fdc239c5304c1ef03fe5c9809128ec9bf6177d/PyARMViz-0.1.3-py3-none-any.whl.metadata
  Downloading https://test-files.pythonhosted.org/packages/ab/15/879fc7ca0904e5080c9ca7fdc239c5304c1ef03fe5c9809128ec9bf6177d/PyARMViz-0.1.3-py3-none-any.whl.metadata (2.1 kB)
Downloading https://test-files.pythonhosted.org/packages/ab/15/879fc7ca0904e5080c9ca7fdc239c5304c1ef03fe5c9809128ec9bf6177d/PyARMViz-0.1.3-py3-none-any.whl (12.9 MB)
   ---------------------------------------- 0.0/12.9 MB ? eta -:--:--
    --------------------------------------- 0.3/12.9 MB 8.3 MB/s eta 0:00:02
   --- ------------------------------------ 1.0/12.9 MB 16.4 MB/s eta 0:00:01
   --- ------------------------------------ 1.0/12.9 MB 16.4 MB/s eta 0:00:01
   --- ------------------------------------ 1.0/12.9 MB 16.4 MB/s eta 0:00:01
 

In [2]:
import pandas as pd 
import numpy as np
import networkx as nx 
import plotly.express as px
import matplotlib.pyplot as plt
import warnings 
import seaborn as sns 
from PyARMViz import PyARMViz

warnings.filterwarnings('ignore')

plt.style.use('seaborn')

In [13]:
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules

dtype_spec = {1: str}
data = pd.read_csv('Groceries_data.csv', dtype=dtype_spec)
data.head()

Unnamed: 0,Member_number,Date,itemDescription,year,month,day,day_of_week
0,1808,2015-07-21,tropical fruit,2015,7,21,1
1,2552,2015-05-01,whole milk,2015,5,1,4
2,2300,2015-09-19,pip fruit,2015,9,19,5
3,1187,2015-12-12,other vegetables,2015,12,12,5
4,3037,2015-01-02,whole milk,2015,1,2,4


In [15]:
from mlxtend.frequent_patterns import association_rules, apriori

def encoder(x):
    if x <= 0:
        return 0
    if x >= 1:
        return 1

apriori_data = data.groupby(['Member_number','itemDescription'])['itemDescription'].count().reset_index(name ='Count')
apriori_basket = apriori_data.pivot_table(index = 'Member_number', columns = 'itemDescription', values = 'Count', aggfunc = 'sum').fillna(0)
apriori_basket_set = apriori_basket.applymap(encoder)
apriori_basket_set.head()

itemDescription,Instant food products,UHT-milk,abrasive cleaner,artif. sweetener,baby cosmetics,bags,baking powder,bathroom cleaner,beef,berries,...,turkey,vinegar,waffles,whipped/sour cream,whisky,white bread,white wine,whole milk,yogurt,zwieback
Member_number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1000,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,1,0
1001,0,0,0,0,0,0,0,0,1,0,...,0,0,0,1,0,1,0,1,0,0
1002,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
1003,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1004,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0


In [20]:
f_items = apriori(apriori_basket_set, min_support = 0.01, use_colnames = True)
f_items



Unnamed: 0,support,itemsets
0,0.015393,(Instant food products)
1,0.078502,(UHT-milk)
2,0.031042,(baking powder)
3,0.119548,(beef)
4,0.079785,(berries)
...,...,...
3011,0.011031,"(soda, whipped/sour cream, yogurt, whole milk)"
3012,0.010518,"(other vegetables, whole milk, rolls/buns, bot..."
3013,0.013597,"(other vegetables, whole milk, rolls/buns, sau..."
3014,0.010005,"(other vegetables, whole milk, rolls/buns, sho..."


In [17]:
print("Frequent Itemsets:")
print(f_items)

Frequent Itemsets:
      support                                        itemsets
0    0.078502                                      (UHT-milk)
1    0.119548                                          (beef)
2    0.079785                                       (berries)
3    0.062083                                     (beverages)
4    0.158799                                  (bottled beer)
..        ...                                             ...
160  0.050539  (tropical fruit, other vegetables, whole milk)
161  0.071832          (yogurt, other vegetables, whole milk)
162  0.065162                  (rolls/buns, soda, whole milk)
163  0.065931                (rolls/buns, yogurt, whole milk)
164  0.054387                      (soda, yogurt, whole milk)

[165 rows x 2 columns]


In [21]:
# Generate association rules with a different metric and higher threshold
apriori_rules = association_rules(f_items, metric='confidence', min_threshold=0.1)

# Check if any rules are generated
print("Association Rules:")
print(apriori_rules)

if apriori_rules.empty:
    print("No association rules found. Try adjusting the min_threshold or the metric used.")
else:
    # Sort the rules by confidence if there are any
    apriori_rules.sort_values('confidence', ascending=False, inplace=True)

    # Display the sorted DataFrame of association rules
    print("Sorted Association Rules:")
    print(apriori_rules)

Association Rules:
                     antecedents                                 consequents  \
0                     (UHT-milk)                                      (beef)   
1                     (UHT-milk)                              (bottled beer)   
2                     (UHT-milk)                             (bottled water)   
3                     (UHT-milk)                               (brown bread)   
4                     (UHT-milk)                                    (butter)   
...                          ...                                         ...   
9724    (soda, other vegetables)            (rolls/buns, yogurt, whole milk)   
9725          (soda, rolls/buns)      (yogurt, other vegetables, whole milk)   
9726              (soda, yogurt)  (rolls/buns, other vegetables, whole milk)   
9727  (yogurt, other vegetables)              (soda, rolls/buns, whole milk)   
9728        (rolls/buns, yogurt)        (soda, other vegetables, whole milk)   

      antecedent sup

In [22]:
apriori_rules = association_rules(f_items, metric = 'lift', min_threshold = 0.1)
apriori_rules.sort_values('confidence', ascending = False, inplace = True)
apriori_rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
7196,"(domestic eggs, meat)",(whole milk),0.013084,0.458184,0.010262,0.784314,1.711789,0.004267,2.512057,0.421328
5864,"(chocolate, fruit/vegetable juice)",(whole milk),0.014366,0.458184,0.010775,0.750000,1.636898,0.004192,2.167265,0.394760
15363,"(rolls/buns, yogurt, other vegetables, bottled...",(whole milk),0.014110,0.458184,0.010518,0.745455,1.626978,0.004053,2.128564,0.390879
11846,"(yogurt, pip fruit, bottled water)",(whole milk),0.013853,0.458184,0.010262,0.740741,1.616689,0.003914,2.089863,0.386811
12210,"(rolls/buns, brown bread, yogurt)",(whole milk),0.017445,0.458184,0.012827,0.735294,1.604802,0.004834,2.046862,0.383561
...,...,...,...,...,...,...,...,...,...,...
14701,(whole milk),"(rolls/buns, pip fruit, root vegetables)",0.458184,0.019497,0.010005,0.021837,1.119983,0.001072,1.002392,0.197722
6499,(whole milk),"(white bread, citrus fruit)",0.458184,0.018984,0.010005,0.021837,1.150253,0.001307,1.002916,0.241089
15446,(whole milk),"(rolls/buns, yogurt, shopping bags, other vege...",0.458184,0.014110,0.010005,0.021837,1.547613,0.003540,1.007899,0.653069
5845,(whole milk),"(chicken, whipped/sour cream)",0.458184,0.017701,0.010005,0.021837,1.233604,0.001895,1.004227,0.349505


In [24]:
apriori_rules.sort_values('lift', ascending = False, inplace = True)
apriori_rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
15396,"(sausage, other vegetables, whole milk)","(rolls/buns, yogurt)",0.050282,0.111339,0.013597,0.270408,2.428689,0.007998,1.218025,0.619400
15413,"(rolls/buns, yogurt)","(sausage, other vegetables, whole milk)",0.111339,0.050282,0.013597,0.122120,2.428689,0.007998,1.081831,0.661957
15399,"(rolls/buns, yogurt, other vegetables)","(sausage, whole milk)",0.052335,0.106978,0.013597,0.259804,2.428575,0.007998,1.206467,0.620721
15410,"(sausage, whole milk)","(rolls/buns, yogurt, other vegetables)",0.106978,0.052335,0.013597,0.127098,2.428575,0.007998,1.085650,0.658702
13015,"(sausage, whole milk)","(yogurt, curd)",0.106978,0.040277,0.010005,0.093525,2.322046,0.005696,1.058742,0.637549
...,...,...,...,...,...,...,...,...,...,...
8436,(long life bakery product),"(other vegetables, whole milk)",0.065418,0.191380,0.011031,0.168627,0.881112,-0.001488,0.972632,-0.126160
8847,"(sausage, other vegetables)",(newspapers),0.092868,0.139815,0.011288,0.121547,0.869340,-0.001697,0.979204,-0.142136
8850,(newspapers),"(sausage, other vegetables)",0.139815,0.092868,0.011288,0.080734,0.869340,-0.001697,0.986800,-0.148739
884,(citrus fruit),(cream cheese ),0.185480,0.088507,0.014110,0.076072,0.859502,-0.002306,0.986541,-0.167144


# Summary

1. Association Rules:
+ Each row represents an association rule in the form: (antecedents → consequents) (antecedents → consequents). 
+ For example, the first rule (sausage, other vegetables, whole milk) → (rolls/buns) means that customers who bought sausage, other vegetables, and whole milk are also likely to buy rolls/buns.

2. Support: 
+ This metric indicates the proportion of transactions in the dataset that contain the antecedents and consequents.
+ For example, the first rule has a support of 0.013597, meaning approximately 1.36% of all transactions include both the antecedent and the consequent items.

3. Confidence:

+ This is the probability that a transaction containing the antecedents also contains the consequents.
+ For the first rule, the confidence is 0.270408, indicating that 27.04% of the transactions that include sausage, other vegetables, and whole milk also include rolls/buns.

4. Lift:

+ Lift measures how much more likely the consequent is, given the antecedent, compared to its general popularity.
+ A lift greater than 1 suggests a positive association; a lift less than 1 suggests a negative association.
+ In the first rule, the lift is 2.428689, which means that customers who buy sausage, other vegetables, and whole milk are about 2.43 times more likely to buy rolls/buns compared to the general customer base.