In [None]:
!pip install apyori



Apriori has a few parameters which must be adjusted upon working with a new dataset. These parameters include minimum support, minimum confidence, and minimum lift. All of these parameters will assume their unitless form here (as opposed to units of baskets). The idea is to start these parameters high, keeping minimum support greater than minimum confidence, and minimum lift greater than 1.

These parameter constraints enable to determine itemsets and their supports from association rules, the standard Apriori output. When a rule has an empty antecedent, then the support of the rule is the support of the consequent. This allows us to obtain frequent 1-itemsets from Apriori output. When the antecedent is not empty, the support of the rule is the support of the itemset formed by merging antecedent with consequent.

The goal of parameter tuning for Apriori is to find as many interesting association rules without excessive computational complexity. Usually, one starts by revealing no association rules. This is because “interesting” requires achievement of minimum support, confidence and lift. This is okay.
Here is one way to tune Apriori parameters. Start with the lift threshold very high, like 1. Start with the minimum support higher than 1−exp(−basketsize/number of items), and the minimum confidence about 50% less than the minimum support. When we run Apriori in this mode, it should be pretty fast on this dataset, and we will see few if any association rules from the output. Next, drop the lift gradually down to 1, each time re-running Apriori and looking for association rules. If we find some, great! If we find too many, raise the minimum support and minimum confidence proportionally, and reset the minimum lift to 2. If no rules are produced, drop the minimum support and minimum confidence proportionally, and reset the minimum lift to its initial value. Repeat as before. Eventually, we should see some, but not too many “interesting” association rules. Once this happens, we should adjust the tuning parameters more gradually, and see what we find. There are many others, please explore!

Our work focuses on below computations:

a.) By trial and error, what are the values of the minimum support, minimum confidence, and minimum lift for which the number of interesting association rules is between 1 and 2 times the number of items?

b.) What relationship between these variables keeps the number of interesting association rules in this range?

c.) Provide a table of the 75 most frequent itemsets for the following parameters: minimum support=0.015, minimum confidence=0.01, minimum lift=1. Order the itemsets by descending count. (hint: you may use the output rules to determine these itemsets). You may use the association rule format for this table, whose columns are: antecedent, consequent, support, confidence, lift, and count.

d.) For the same parameter settings as in Q3, determine the 50 most interesting association rules, as measured by lift. Present your findings in a table with similar format as in your answer to Q3, with rules arranged by descending lift.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules
from apyori import apriori

In [None]:
data = pd.read_csv('/content/Lab1MBA.csv',header=None)
data.head()

Unnamed: 0,0,1,2,3,4
0,5,6,11,36.0,46.0
1,6,17,24,31.0,35.0
2,9,20,25,30.0,
3,7,21,27,35.0,41.0
4,1,24,29,40.0,43.0


In [None]:
data.shape

(1000, 5)

a.)

In [None]:
item_vals=[]
for i in range(0,1000):
    item_vals.append([str(data.values[i,j]) for j in range(0,5)])

In [None]:
item_vals

[['5.0', '6.0', '11.0', '36.0', '46.0'],
 ['6.0', '17.0', '24.0', '31.0', '35.0'],
 ['9.0', '20.0', '25.0', '30.0', 'nan'],
 ['7.0', '21.0', '27.0', '35.0', '41.0'],
 ['1.0', '24.0', '29.0', '40.0', '43.0'],
 ['16.0', '30.0', '38.0', '40.0', '45.0'],
 ['10.0', '13.0', '25.0', '36.0', '37.0'],
 ['22.0', '26.0', '29.0', '37.0', '49.0'],
 ['23.0', '24.0', '42.0', '45.0', '50.0'],
 ['1.0', '16.0', '21.0', 'nan', 'nan'],
 ['19.0', '22.0', '25.0', '37.0', '50.0'],
 ['7.0', '12.0', '19.0', '32.0', '50.0'],
 ['4.0', '8.0', '26.0', '31.0', '35.0'],
 ['2.0', '5.0', '7.0', '37.0', '41.0'],
 ['3.0', '10.0', '13.0', '14.0', '19.0'],
 ['12.0', '28.0', '34.0', '48.0', '49.0'],
 ['3.0', '6.0', '10.0', '33.0', 'nan'],
 ['12.0', '18.0', '35.0', '38.0', 'nan'],
 ['2.0', '3.0', '4.0', '26.0', '29.0'],
 ['16.0', '20.0', '24.0', '31.0', '40.0'],
 ['15.0', '16.0', '27.0', '45.0', '49.0'],
 ['20.0', '24.0', '31.0', '40.0', '43.0'],
 ['23.0', '24.0', '32.0', '39.0', '47.0'],
 ['7.0', '22.0', '29.0', '40.0', '4

In [None]:
rule=apriori(item_vals,min_support=0.015,min_confidence=0.01,min_lift=1.1)
asso_rules=list(rule)
print(len(asso_rules))

37


In [None]:
for x in asso_rules:
    items = x[0]
    print("Rules: "+ str(items))
    print("Support: "+str(x[1]))
    print("Confidence: "+str(x[2][0][2]))
    print("Lift: "+str(x[2][0][3]))
    print("Count: ",float(x[1])*1000)

Rules: frozenset({'1.0', '45.0'})
Support: 0.017
Confidence: 0.18085106382978725
Lift: 1.6901968582223108
Count:  17.0
Rules: frozenset({'10.0', '21.0'})
Support: 0.015
Confidence: 0.13636363636363635
Lift: 1.3636363636363635
Count:  15.0
Rules: frozenset({'23.0', '10.0'})
Support: 0.016
Confidence: 0.14545454545454545
Lift: 1.484230055658627
Count:  16.0
Rules: frozenset({'10.0', '34.0'})
Support: 0.017
Confidence: 0.15454545454545457
Lift: 1.4309764309764312
Count:  17.0
Rules: frozenset({'10.0', '37.0'})
Support: 0.015
Confidence: 0.13636363636363635
Lift: 1.1086474501108647
Count:  15.0
Rules: frozenset({'23.0', '11.0'})
Support: 0.015
Confidence: 0.15957446808510636
Lift: 1.6283108988276158
Count:  15.0
Rules: frozenset({'14.0', '35.0'})
Support: 0.018
Confidence: 0.19148936170212766
Lift: 1.6091543000178796
Count:  18.0
Rules: frozenset({'3.0', '15.0'})
Support: 0.015
Confidence: 0.1744186046511628
Lift: 1.5856236786469344
Count:  15.0
Rules: frozenset({'18.0', '6.0'})
Support: 0

In [None]:
rule=[]
support=[]
confidence=[]
lift=[]
count =[]
for item in asso_rules:
    pair = item[0]
    rule.append(str(pair))
    support.append(str(item[1]))
    confidence.append(str(item[2][0][2]))
    lift.append(str(item[2][0][3]))
    count.append(float(item[1])*1000)

b.)

When, min_support=0.013,min_confidence=0.009,min_lift=1.1, the algorithm gives 93 association rules

When, min_support=0.015,min_confidence=0.009,min_lift=1.2, the algorithm gives 34 association rules

This depicts minimum support and minimum lift are critical in determining association rules

c.)

In [None]:
df = {"Rule":rule,"Support":support,"confidence":confidence,"Lift":lift,"Count":count}
df = pd.DataFrame(df)
df_count = df.sort_values(by=['Count'],ascending=False)

In [None]:
df_count = df_count[0:75]
df_count

Unnamed: 0,Rule,Support,confidence,Lift,Count
33,"frozenset({'9.0', '37.0'})",0.019,0.1544715447154471,1.3550135501355014,19.0
6,"frozenset({'14.0', '35.0'})",0.018,0.1914893617021276,1.6091543000178796,18.0
11,"frozenset({'2.0', '8.0'})",0.018,0.1682242990654205,1.5721897108917808,18.0
0,"frozenset({'1.0', '45.0'})",0.017,0.1808510638297872,1.6901968582223108,17.0
34,"frozenset({'42.0', '41.0'})",0.017,0.1574074074074074,1.7297517297517295,17.0
3,"frozenset({'10.0', '34.0'})",0.017,0.1545454545454545,1.4309764309764312,17.0
9,"frozenset({'2.0', '33.0'})",0.017,0.1588785046728972,1.48484583806446,17.0
15,"frozenset({'9.0', '21.0'})",0.016,0.16,1.4035087719298245,16.0
31,"frozenset({'36.0', '45.0'})",0.016,0.1666666666666666,1.557632398753894,16.0
30,"frozenset({'35.0', '5.0'})",0.016,0.134453781512605,1.305376519539855,16.0


d.)

In [None]:
df_lift = df.sort_values(by=['Lift'],ascending=False)
df_lift=df_lift[0:50]
df_lift.head()

Unnamed: 0,Rule,Support,confidence,Lift,Count
34,"frozenset({'42.0', '41.0'})",0.017,0.1574074074074074,1.7297517297517295,17.0
35,"frozenset({'48.0', '43.0'})",0.015,0.15625,1.7170329670329672,15.0
8,"frozenset({'18.0', '6.0'})",0.015,0.1724137931034483,1.6903313049357676,15.0
0,"frozenset({'1.0', '45.0'})",0.017,0.1808510638297872,1.6901968582223108,17.0
5,"frozenset({'23.0', '11.0'})",0.015,0.1595744680851063,1.6283108988276158,15.0


In [None]:
df_lift

Unnamed: 0,Rule,Support,confidence,Lift,Count
34,"frozenset({'42.0', '41.0'})",0.017,0.1574074074074074,1.7297517297517295,17.0
35,"frozenset({'48.0', '43.0'})",0.015,0.15625,1.7170329670329672,15.0
8,"frozenset({'18.0', '6.0'})",0.015,0.1724137931034483,1.6903313049357676,15.0
0,"frozenset({'1.0', '45.0'})",0.017,0.1808510638297872,1.6901968582223108,17.0
5,"frozenset({'23.0', '11.0'})",0.015,0.1595744680851063,1.6283108988276158,15.0
6,"frozenset({'14.0', '35.0'})",0.018,0.1914893617021276,1.6091543000178796,18.0
7,"frozenset({'3.0', '15.0'})",0.015,0.1744186046511628,1.5856236786469344,15.0
11,"frozenset({'2.0', '8.0'})",0.018,0.1682242990654205,1.5721897108917808,18.0
31,"frozenset({'36.0', '45.0'})",0.016,0.1666666666666666,1.557632398753894,16.0
20,"frozenset({'3.0', '26.0'})",0.016,0.1684210526315789,1.5311004784688995,16.0


In [None]:
print(df_lift.shape)

(37, 5)
