# Items in Grocery

In [1]:
A = [['Milk', 'Onion', 'Nutmeg', 'Kidney Beans', 'Eggs', 'Yogurt'],
         ['Dill', 'Onion', 'Nutmeg', 'Kidney Beans', 'Eggs', 'Yogurt'],
         ['Milk', 'Apple', 'Kidney Beans', 'Eggs'],
         ['Milk', 'Unicorn', 'Corn', 'Kidney Beans', 'Yogurt'],
         ['Corn', 'Onion', 'Onion', 'Kidney Beans', 'Ice cream', 'Eggs']]

# Creating transaction encoder of grocery items

In [2]:
from mlxtend.preprocessing import TransactionEncoder
from pandas import DataFrame
te = TransactionEncoder()
B = DataFrame(te.fit_transform(A),columns=te.columns_)

In [3]:
B

Unnamed: 0,Apple,Corn,Dill,Eggs,Ice cream,Kidney Beans,Milk,Nutmeg,Onion,Unicorn,Yogurt
0,False,False,False,True,False,True,True,True,True,False,True
1,False,False,True,True,False,True,False,True,True,False,True
2,True,False,False,True,False,True,True,False,False,False,False
3,False,True,False,False,False,True,True,False,False,True,True
4,False,True,False,True,True,True,False,False,True,False,False


# Finding frequent patterns in the transaction encoder dataframe

In [4]:
from mlxtend.frequent_patterns import apriori
C = apriori(B,min_support=0.5,use_colnames=True,max_len=2)

# Related items for grocery

In [5]:
C

Unnamed: 0,support,itemsets
0,0.8,(Eggs)
1,1.0,(Kidney Beans)
2,0.6,(Milk)
3,0.6,(Onion)
4,0.6,(Yogurt)
5,0.8,"(Kidney Beans, Eggs)"
6,0.6,"(Onion, Eggs)"
7,0.6,"(Kidney Beans, Milk)"
8,0.6,"(Kidney Beans, Onion)"
9,0.6,"(Yogurt, Kidney Beans)"


In [6]:
q = []
for i in C["itemsets"]:
    q.append(len(i))

In [7]:
q

[1, 1, 1, 1, 1, 2, 2, 2, 2, 2]

In [8]:
C["length"] = q

# Support and Length value of related grocery items

In [9]:
C

Unnamed: 0,support,itemsets,length
0,0.8,(Eggs),1
1,1.0,(Kidney Beans),1
2,0.6,(Milk),1
3,0.6,(Onion),1
4,0.6,(Yogurt),1
5,0.8,"(Kidney Beans, Eggs)",2
6,0.6,"(Onion, Eggs)",2
7,0.6,"(Kidney Beans, Milk)",2
8,0.6,"(Kidney Beans, Onion)",2
9,0.6,"(Yogurt, Kidney Beans)",2


In [10]:
B["Milk"].sum()

3

In [11]:
B[["Milk","Yogurt"]][(B["Milk"]==True) & (B["Yogurt"]==True)].shape[0]

2

# Confidence value of related grocery items

In [12]:
def confi(df,item1,item2):
    x = df[item1].sum()
    y = df[[item1,item2]][(df[item1]==True) & (df[item2]==True)].shape[0]
    return round(y/x,2)

In [13]:
confi(B,"Kidney Beans","Eggs")

0.8

In [14]:
confi(B,"Onion","Eggs")

1.0

In [15]:
confi(B,"Kidney Beans","Milk")

0.6

In [16]:
confi(B,"Kidney Beans","Onion")

0.6

In [17]:
confi(B,"Kidney Beans","Yogurt")

0.6

# Lift value of related grocery items

In [18]:
def lift(df,item1,item2):
    x = df[[item1,item2]][(df[item1]==True) & (df[item2]==True)].shape[0]
    a = df[item1].sum()
    b = df[item2].sum()
    y = a * b
    return round(x/y,2)

In [19]:
lift(B,"Kidney Beans","Eggs")

0.2

In [20]:
lift(B,"Onion","Eggs")

0.25

In [21]:
lift(B,"Kidney Beans","Milk")

0.2

In [22]:
lift(B,"Kidney Beans","Onion")

0.2

In [23]:
confi(B,"Kidney Beans","Yogurt")

0.6