In [2]:
# Read the transactions.csv file and convert it to a dataframe where each row represents a transaction and True is used to identify the item purchased and False for rest.

import pandas as pd

# Ensure to update the path
file_path = r"/content/transactions.csv"
df = pd.read_csv(file_path)

# Transform the dataset into a transaction matrix
transaction_matrix = df.pivot_table(index='Transaction', columns='Product', aggfunc=lambda x: True, fill_value=False)

# Convert boolean values to explicit True/False
transaction_matrix = transaction_matrix.astype(bool)

# Save the transformed data to a CSV file
transaction_matrix.to_csv("group03transactions01.csv")

print(transaction_matrix.head())

Product        Bow  Candy Bar  Deodorant  Greeting Cards  Magazine  Markers  \
Transaction                                                                   
12359        False       True      False           False     False    False   
12362        False      False      False           False     False    False   
12365        False      False      False           False     False    False   
12371         True      False      False           False     False    False   
12380        False      False      False            True     False    False   

Product      Pain Reliever  Pencils   Pens  Perfume  Photo Processing  \
Transaction                                                             
12359                False    False  False    False             False   
12362                 True    False  False    False             False   
12365                False    False  False    False             False   
12371                False    False  False    False             False   
12380   

If mlxtend is not installed run the line below

In [3]:
# !pip install mlxtend



In [4]:
import numpy as np
import pandas as pd
import mlxtend
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules

#identifying the frequent itemsets that meet the minimum support threshold of 1%

fi = apriori(transaction_matrix, min_support = 0.01, use_colnames = True)
fi

Unnamed: 0,support,itemsets
0,0.054645,(Bow)
1,0.171005,(Candy Bar)
2,0.146885,(Greeting Cards)
3,0.241305,(Magazine)
4,0.0267,(Pain Reliever)
5,0.134925,(Pencils)
6,0.143575,(Pens)
7,0.08996,(Perfume)
8,0.05848,(Photo Processing)
9,0.014505,(Prescription Med)


In [5]:
# Identifying association rules with a minumum confidence of 10%

rules = association_rules(fi, metric="confidence", min_threshold=0.1)
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,representativity,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski
0,(Toothbrush),(Bow),0.06735,0.054645,0.01134,0.168374,3.081236,1.0,0.00766,1.136755,0.724232,0.102481,0.120303,0.187948
1,(Bow),(Toothbrush),0.054645,0.06735,0.01134,0.207521,3.081236,1.0,0.00766,1.176877,0.714499,0.102481,0.150294,0.187948
2,(Candy Bar),(Greeting Cards),0.171005,0.146885,0.04366,0.255314,1.738191,1.0,0.018542,1.145604,0.512294,0.159209,0.127098,0.276277
3,(Greeting Cards),(Candy Bar),0.146885,0.171005,0.04366,0.297239,1.738191,1.0,0.018542,1.179626,0.49781,0.159209,0.152274,0.276277
4,(Magazine),(Candy Bar),0.241305,0.171005,0.040535,0.167982,0.982325,1.0,-0.000729,0.996367,-0.023167,0.109031,-0.003646,0.202511
5,(Candy Bar),(Magazine),0.171005,0.241305,0.040535,0.23704,0.982325,1.0,-0.000729,0.99441,-0.021244,0.109031,-0.005622,0.202511
6,(Candy Bar),(Pencils),0.171005,0.134925,0.033015,0.193065,1.430903,1.0,0.009942,1.07205,0.36326,0.120972,0.067208,0.218878
7,(Pencils),(Candy Bar),0.134925,0.171005,0.033015,0.244691,1.430903,1.0,0.009942,1.097558,0.348109,0.120972,0.088887,0.218878
8,(Candy Bar),(Toothpaste),0.171005,0.160425,0.03978,0.232625,1.450053,1.0,0.012347,1.094087,0.374393,0.136396,0.085996,0.240296
9,(Toothpaste),(Candy Bar),0.160425,0.171005,0.03978,0.247966,1.450053,1.0,0.012347,1.102338,0.369675,0.136396,0.092837,0.240296


In [6]:
# Listing the rules in descending order of lift.

rules = rules.sort_values('lift', ascending = False)
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,representativity,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski
27,(Perfume),(Toothbrush),0.08996,0.06735,0.02182,0.242552,3.60137,1.0,0.015761,1.231306,0.793732,0.161045,0.187854,0.283266
26,(Toothbrush),(Perfume),0.06735,0.08996,0.02182,0.323979,3.60137,1.0,0.015761,1.346172,0.77449,0.161045,0.257153,0.283266
0,(Toothbrush),(Bow),0.06735,0.054645,0.01134,0.168374,3.081236,1.0,0.00766,1.136755,0.724232,0.102481,0.120303,0.187948
1,(Bow),(Toothbrush),0.054645,0.06735,0.01134,0.207521,3.081236,1.0,0.00766,1.176877,0.714499,0.102481,0.150294,0.187948
28,"(Magazine, Candy Bar)",(Greeting Cards),0.040535,0.146885,0.016665,0.411126,2.798966,1.0,0.010711,1.448723,0.669879,0.097596,0.309737,0.262291
31,(Greeting Cards),"(Magazine, Candy Bar)",0.146885,0.040535,0.016665,0.113456,2.798966,1.0,0.010711,1.082253,0.753386,0.097596,0.076002,0.262291
43,"(Toothpaste, Pencils)",(Candy Bar),0.02456,0.171005,0.01139,0.463762,2.71198,1.0,0.00719,1.545947,0.64716,0.061843,0.353147,0.265184
29,"(Magazine, Greeting Cards)",(Candy Bar),0.036335,0.171005,0.016665,0.458649,2.682078,1.0,0.010452,1.531344,0.650802,0.0874,0.346979,0.278051
47,"(Magazine, Toothpaste)",(Greeting Cards),0.031665,0.146885,0.011945,0.37723,2.568202,1.0,0.007294,1.369873,0.63059,0.071697,0.270005,0.229276
39,"(Magazine, Toothpaste)",(Candy Bar),0.031665,0.171005,0.01372,0.433286,2.533762,1.0,0.008305,1.46281,0.625124,0.072612,0.316384,0.256759


In [7]:
# Top 5 Rules with highest confidence

t5_c = rules.copy()
t5_c['rank'] = rules['confidence'].rank(ascending =  False, method = 'min')
t5_c[t5_c['rank']<=5].sort_values('rank')[['antecedents', 'consequents','support', 'confidence', 'rank']]

Unnamed: 0,antecedents,consequents,support,confidence,rank
43,"(Toothpaste, Pencils)",(Candy Bar),0.01139,0.463762,1.0
29,"(Magazine, Greeting Cards)",(Candy Bar),0.016665,0.458649,2.0
39,"(Magazine, Toothpaste)",(Candy Bar),0.01372,0.433286,3.0
28,"(Magazine, Candy Bar)",(Greeting Cards),0.016665,0.411126,4.0
34,"(Toothpaste, Greeting Cards)",(Candy Bar),0.013175,0.410692,5.0


In [8]:
# Top 5 Rules with highest lift

t5_l = rules.copy()
t5_l['rank'] = rules['lift'].rank(ascending =  False, method = 'min')
t5_l[t5_l['rank']<=5].sort_values('rank')[['antecedents', 'consequents', 'consequent support', 'confidence', 'lift', 'rank']]

Unnamed: 0,antecedents,consequents,consequent support,confidence,lift,rank
27,(Perfume),(Toothbrush),0.06735,0.242552,3.60137,1.0
26,(Toothbrush),(Perfume),0.08996,0.323979,3.60137,2.0
0,(Toothbrush),(Bow),0.054645,0.168374,3.081236,3.0
1,(Bow),(Toothbrush),0.06735,0.207521,3.081236,3.0
28,"(Magazine, Candy Bar)",(Greeting Cards),0.146885,0.411126,2.798966,5.0
31,(Greeting Cards),"(Magazine, Candy Bar)",0.040535,0.113456,2.798966,5.0


In [9]:
# Top 5 Rules with highest leverage

t5_lev = rules.copy()
t5_lev['rank'] = rules['leverage'].rank(ascending =  False, method = 'min')
t5_lev[t5_lev['rank']<=5].sort_values('rank')[['antecedents', 'consequents', 'antecedent support', 'consequent support','support', 'leverage', 'rank']]

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,leverage,rank
3,(Greeting Cards),(Candy Bar),0.146885,0.171005,0.04366,0.018542,1.0
2,(Candy Bar),(Greeting Cards),0.171005,0.146885,0.04366,0.018542,1.0
26,(Toothbrush),(Perfume),0.06735,0.08996,0.02182,0.015761,3.0
27,(Perfume),(Toothbrush),0.08996,0.06735,0.02182,0.015761,3.0
8,(Candy Bar),(Toothpaste),0.171005,0.160425,0.03978,0.012347,5.0
9,(Toothpaste),(Candy Bar),0.160425,0.171005,0.03978,0.012347,5.0


In [10]:
# Top 5 Rules with highest conviction

t5_conviction = rules.copy()
t5_conviction['rank'] = rules['conviction'].rank(ascending =  False, method = 'min')
t5_conviction[t5_conviction['rank']<=5].sort_values('rank')[['antecedents', 'consequents', 'consequent support', 'confidence', 'conviction', 'rank']]

Unnamed: 0,antecedents,consequents,consequent support,confidence,conviction,rank
43,"(Toothpaste, Pencils)",(Candy Bar),0.171005,0.463762,1.545947,1.0
29,"(Magazine, Greeting Cards)",(Candy Bar),0.171005,0.458649,1.531344,2.0
39,"(Magazine, Toothpaste)",(Candy Bar),0.171005,0.433286,1.46281,3.0
28,"(Magazine, Candy Bar)",(Greeting Cards),0.146885,0.411126,1.448723,4.0
34,"(Toothpaste, Greeting Cards)",(Candy Bar),0.171005,0.410692,1.406726,5.0
