# Association Rule

In [1]:
#! pip install apyori



In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from apyori import apriori

In [3]:
store_data = pd.read_csv('./data/store_data.csv', header=None)

In [4]:
store_data.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
0,shrimp,almonds,avocado,vegetables mix,green grapes,whole weat flour,yams,cottage cheese,energy drink,tomato juice,low fat yogurt,green tea,honey,salad,mineral water,salmon,antioxydant juice,frozen smoothie,spinach,olive oil
1,burgers,meatballs,eggs,,,,,,,,,,,,,,,,,
2,chutney,,,,,,,,,,,,,,,,,,,
3,turkey,avocado,,,,,,,,,,,,,,,,,,
4,mineral water,milk,energy bar,whole wheat rice,green tea,,,,,,,,,,,,,,,


In [5]:
store_data.shape

(7501, 20)

### Data Preprocessing
The Apriori library we are going to use requires our dataset to be in the form of a list of lists.

In [6]:
records = []
for i in range(0, store_data.shape[0]):
    records.append([str(store_data.values[i,j]) for j in range(0, store_data.shape[1])])

In [8]:
records[0]

['shrimp',
 'almonds',
 'avocado',
 'vegetables mix',
 'green grapes',
 'whole weat flour',
 'yams',
 'cottage cheese',
 'energy drink',
 'tomato juice',
 'low fat yogurt',
 'green tea',
 'honey',
 'salad',
 'mineral water',
 'salmon',
 'antioxydant juice',
 'frozen smoothie',
 'spinach',
 'olive oil']

### Support
The default probability of an item. <br>
$Support(A) = \frac{\text{Transactions containing }A}{\text{Total transactions}}$ <br>
$Support(A,B) = \frac{\text{Transactions containing }A \text{ and } B}{\text{Total transactions}}$

In [10]:
n_chicken = 0
for transaction in records:
    if 'chicken' in transaction: 
        n_chicken +=1
n_chicken

450

In [11]:
support_chicken = n_chicken/len(records)
support_chicken

0.05999200106652446

In [12]:
n_cream = 0
for transaction in records:
    if 'light cream' in transaction:
        n_cream +=1
n_cream

117

In [13]:
support_cream = n_cream/len(records)
support_cream

0.01559792027729636

In [14]:
n_chicken_cream = 0
for transaction in records:
    if 'chicken' in transaction and 'light cream' in transaction:
        n_chicken_cream +=1
n_chicken_cream

34

In [15]:
support_chicken_cream = n_chicken_cream/len(records)
support_chicken_cream

0.004532728969470737

### Confidence
The likelihood of purchasing item B if item A is purchased.<br>
$Confidence(A→B) = \frac{\text{Transactions containing both } A\text{ and } B}{\text{Transactions containing } A}$

In [16]:
confidence_chicken_cream = n_chicken_cream/n_chicken
confidence_chicken_cream

0.07555555555555556

In [17]:
confidence_cream_chicken = n_chicken_cream/n_cream
confidence_cream_chicken

0.2905982905982906

### Lift
The increase in the sales ratio of B when A is sold.<br>
$Lift(A→B) = \frac{Confidence (A→B)}{Support (B)}$

In [20]:
lift_chicken_cream = confidence_cream_chicken/support_chicken
lift_chicken_cream

4.8439506172839515

In [21]:
lift_cream_chicken = confidence_chicken_cream/support_cream
lift_cream_chicken

4.843950617283951

## Apriori algorithm
For applying the apriori algorithm we should set few parameters:
- min_support: this parameter is used to select the items with support values greater than the value specified by the parameter.
- min_confidence: filters those rules that have confidence greater than the confidence threshold specified by the parameter.
- min_lift: specifies the minimum lift value for the short listed rules.
- min_length: specifies the minimum number of items that you want in your rules.

In [49]:
association_rules = apriori(transactions = records, min_support = 0.003, min_confidence = 0.2,
                            min_lift = 3, min_length = 2, max_length = 2)
association_results = list(association_rules)

### Viewing the Results
Number of rules mined by the apriori

In [50]:
len(association_results)

9

In [52]:
association_results[0]

RelationRecord(items=frozenset({'chicken', 'light cream'}), support=0.004532728969470737, ordered_statistics=[OrderedStatistic(items_base=frozenset({'light cream'}), items_add=frozenset({'chicken'}), confidence=0.29059829059829057, lift=4.84395061728395)])

In [53]:
def inspect(output):
    lhs         = [tuple(result[2][0][0])[0] for result in output]
    rhs         = [tuple(result[2][0][1])[0] for result in output]
    support    = [result[1] for result in output]
    confidence = [result[2][0][2] for result in output]
    lift       = [result[2][0][3] for result in output]
    return list(zip(lhs, rhs, support, confidence, lift))
output_DataFrame = pd.DataFrame(inspect(association_results), columns = ['Left_Hand_Side', 'Right_Hand_Side', 'Support', 'Confidence', 'Lift'])

Unnamed: 0,Left_Hand_Side,Right_Hand_Side,Support,Confidence,Lift
0,light cream,chicken,0.004533,0.290598,4.843951
1,mushroom cream sauce,escalope,0.005733,0.300699,3.790833
2,pasta,escalope,0.005866,0.372881,4.700812
3,fromage blanc,honey,0.003333,0.245098,5.164271
4,herb & pepper,ground beef,0.015998,0.32345,3.291994
5,tomato sauce,ground beef,0.005333,0.377358,3.840659
6,light cream,olive oil,0.0032,0.205128,3.11471
7,whole wheat pasta,olive oil,0.007999,0.271493,4.12241
8,pasta,shrimp,0.005066,0.322034,4.506672


### Displaying the results sorted by descending order of Lift column

In [58]:
output_DataFrame.nlargest(n = 5, columns = 'Lift')

Unnamed: 0,Left_Hand_Side,Right_Hand_Side,Support,Confidence,Lift
3,fromage blanc,honey,0.003333,0.245098,5.164271
0,light cream,chicken,0.004533,0.290598,4.843951
2,pasta,escalope,0.005866,0.372881,4.700812
8,pasta,shrimp,0.005066,0.322034,4.506672
7,whole wheat pasta,olive oil,0.007999,0.271493,4.12241
