In [18]:
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt
import seaborn as sns

from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules

## Market Basket Analysis

Market Basket Analysis is the process of discovering frequent item sets in large transactional database is called market basket analysis.

Market basket analysis might tell a retailer that customers often purchase colgate toothpaste and brush together, so putting both items on promotion at the same time would not create a significant increase in revenue, while a promotion involving just one of the items would likely drive sales of the other.

## Assosciation Rules

Association Rules are widely used to analyze retail basket or transaction data, and are intended to identify strong rules discovered in transaction data using measures of interestingness, based on the concept of strong rules.

##### There are various metrics in place to help us understand the strength of assosciation between antecedent and consequent:

1. **Support:**  

It is calculated to check how much popular a given item is. It is measured by the proportion of transactions in which an itemset appears

2. **Confidence:** 

It is calculated to check how likely if item X is purchased when item Y is purchased. This is measured by the proportion of transactions with item X, in which item Y also appears. 

3. **Lift:**  

It is calculated to measure how likely item Y is purchased when item X is purchased, while controlling for how popular item Y is. The formula for lift is: (lift = support (X ->Y) / (support(X) * support(Y)).

4. **Levarage or Piatetsky-Snapiro**: 

It computes the difference between the observed frequency of X & Y appearing together and the frequency that we would expect if A and C are independent.

5. **Conviction**: 

It can be interpreted as the ratio of the expected frequency that X occurs without Y (that is to say, the frequency that the rule makes an incorrect prediction) if X and Y were independent divided by the observed frequency of incorrect predictions.

In [19]:
data=pd.read_csv('C:/Users/nehal/Music/9.Machine Learning 3/Faculty Notebook/Market.csv',header=None).rename(columns={0:'items'})
data.head()

Unnamed: 0,items
0,"MILK,BREAD,BISCUIT"
1,"BREAD,MILK,BISCUIT,CORNFLAKES"
2,"BREAD,TEA,BOURNVITA"
3,"JAM,MAGGI,BREAD,MILK"
4,"MAGGI,TEA,BISCUIT"


In [20]:
df=data['items'].str.split(',',expand=True)
df.columns=['item1','item2','item3','item4']
df=df.fillna('NA')
df

Unnamed: 0,item1,item2,item3,item4
0,MILK,BREAD,BISCUIT,
1,BREAD,MILK,BISCUIT,CORNFLAKES
2,BREAD,TEA,BOURNVITA,
3,JAM,MAGGI,BREAD,MILK
4,MAGGI,TEA,BISCUIT,
5,BREAD,TEA,BOURNVITA,
6,MAGGI,TEA,CORNFLAKES,
7,MAGGI,BREAD,TEA,BISCUIT
8,JAM,MAGGI,BREAD,TEA
9,BREAD,MILK,,


In [21]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20 entries, 0 to 19
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   item1   20 non-null     object
 1   item2   20 non-null     object
 2   item3   20 non-null     object
 3   item4   20 non-null     object
dtypes: object(4)
memory usage: 768.0+ bytes


In [22]:
df.shape[0]

20

In [23]:
records=[]
for i in range(df.shape[0]):
    records.append(list(df.values[i]))
records

[['MILK', 'BREAD', 'BISCUIT', 'NA'],
 ['BREAD', 'MILK', 'BISCUIT', 'CORNFLAKES'],
 ['BREAD', 'TEA', 'BOURNVITA', 'NA'],
 ['JAM', 'MAGGI', 'BREAD', 'MILK'],
 ['MAGGI', 'TEA', 'BISCUIT', 'NA'],
 ['BREAD', 'TEA', 'BOURNVITA', 'NA'],
 ['MAGGI', 'TEA', 'CORNFLAKES', 'NA'],
 ['MAGGI', 'BREAD', 'TEA', 'BISCUIT'],
 ['JAM', 'MAGGI', 'BREAD', 'TEA'],
 ['BREAD', 'MILK', 'NA', 'NA'],
 ['COFFEE', 'COCK', 'BISCUIT', 'CORNFLAKES'],
 ['COFFEE', 'COCK', 'BISCUIT', 'CORNFLAKES'],
 ['COFFEE', 'SUGER', 'BOURNVITA', 'NA'],
 ['BREAD', 'COFFEE', 'COCK', 'NA'],
 ['BREAD', 'SUGER', 'BISCUIT', 'NA'],
 ['COFFEE', 'SUGER', 'CORNFLAKES', 'NA'],
 ['BREAD', 'SUGER', 'BOURNVITA', 'NA'],
 ['BREAD', 'COFFEE', 'SUGER', 'NA'],
 ['BREAD', 'COFFEE', 'SUGER', 'NA'],
 ['TEA', 'MILK', 'COFFEE', 'CORNFLAKES']]

In [24]:
from mlxtend.preprocessing import TransactionEncoder

te=TransactionEncoder()
df1=pd.DataFrame(te.fit(records).transform(records),columns=te.columns_)
df1.head()


Unnamed: 0,BISCUIT,BOURNVITA,BREAD,COCK,COFFEE,CORNFLAKES,JAM,MAGGI,MILK,NA,SUGER,TEA
0,True,False,True,False,False,False,False,False,True,True,False,False
1,True,False,True,False,False,True,False,False,True,False,False,False
2,False,True,True,False,False,False,False,False,False,True,False,True
3,False,False,True,False,False,False,True,True,True,False,False,False
4,True,False,False,False,False,False,False,True,False,True,False,True


In [25]:
df1=df1.replace({True:1,False:0})
df1.head()

Unnamed: 0,BISCUIT,BOURNVITA,BREAD,COCK,COFFEE,CORNFLAKES,JAM,MAGGI,MILK,NA,SUGER,TEA
0,1,0,1,0,0,0,0,0,1,1,0,0
1,1,0,1,0,0,1,0,0,1,0,0,0
2,0,1,1,0,0,0,0,0,0,1,0,1
3,0,0,1,0,0,0,1,1,1,0,0,0
4,1,0,0,0,0,0,0,1,0,1,0,1


In [26]:
df1.drop('NA',axis=1,inplace=True)
df1

Unnamed: 0,BISCUIT,BOURNVITA,BREAD,COCK,COFFEE,CORNFLAKES,JAM,MAGGI,MILK,SUGER,TEA
0,1,0,1,0,0,0,0,0,1,0,0
1,1,0,1,0,0,1,0,0,1,0,0
2,0,1,1,0,0,0,0,0,0,0,1
3,0,0,1,0,0,0,1,1,1,0,0
4,1,0,0,0,0,0,0,1,0,0,1
5,0,1,1,0,0,0,0,0,0,0,1
6,0,0,0,0,0,1,0,1,0,0,1
7,1,0,1,0,0,0,0,1,0,0,1
8,0,0,1,0,0,0,1,1,0,0,1
9,0,0,1,0,0,0,0,0,1,0,0


In [30]:
freq_items=apriori(df1,min_support=0.2,use_colnames=True)

In [31]:
freq_items.sort_values(by='support',ascending=False)

Unnamed: 0,support,itemsets
2,0.65,(BREAD)
3,0.4,(COFFEE)
0,0.35,(BISCUIT)
8,0.35,(TEA)
4,0.3,(CORNFLAKES)
7,0.3,(SUGER)
5,0.25,(MAGGI)
6,0.25,(MILK)
1,0.2,(BOURNVITA)
9,0.2,"(BISCUIT, BREAD)"


In [32]:
rules=association_rules(freq_items,metric='confidence',min_threshold=0.2)
rules=rules.sort_values(by='confidence',ascending=False)
rules.sort_values('confidence',ascending=False)


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
2,(MILK),(BREAD),0.25,0.65,0.2,0.8,1.230769,0.0375,1.75
12,(MAGGI),(TEA),0.25,0.35,0.2,0.8,2.285714,0.1125,3.25
4,(SUGER),(BREAD),0.3,0.65,0.2,0.666667,1.025641,0.005,1.05
8,(CORNFLAKES),(COFFEE),0.3,0.4,0.2,0.666667,1.666667,0.08,1.8
10,(SUGER),(COFFEE),0.3,0.4,0.2,0.666667,1.666667,0.08,1.8
0,(BISCUIT),(BREAD),0.35,0.65,0.2,0.571429,0.879121,-0.0275,0.816667
7,(TEA),(BREAD),0.35,0.65,0.2,0.571429,0.879121,-0.0275,0.816667
13,(TEA),(MAGGI),0.35,0.25,0.2,0.571429,2.285714,0.1125,1.75
9,(COFFEE),(CORNFLAKES),0.4,0.3,0.2,0.5,1.666667,0.08,1.4
11,(COFFEE),(SUGER),0.4,0.3,0.2,0.5,1.666667,0.08,1.4
