Given a purchase history for a customer and a large inventory of products, identify those products in which that customer will be interested and likely to purchase. Create a machine learning model for this.

#Loading the Dataset

In [1]:
from google.colab import drive
drive.mount("/content/drive/")

Mounted at /content/drive/


In [2]:
path = '/content/drive/My Drive/Colab Notebooks/Datasets/association/groceries.csv'

In [3]:
import pandas as pd

In [4]:
all_txns = []
# opening the file
with open(path) as f:
  # reading each line
  content = f.readlines()
  # removing the traling and leading whitespaces
  txns = [x.strip() for x in content]
  # iterating through each line and creating a list of txns
  for each_txns in txns:
    # each txns will contains a list of item in the txns
    all_txns.append(each_txns.split(','))

In [5]:
all_txns[0:10]

[['citrus fruit', 'semi-finished bread', 'margarine', 'ready soups'],
 ['tropical fruit', 'yogurt', 'coffee'],
 ['whole milk'],
 ['pip fruit', 'yogurt', 'cream cheese ', 'meat spreads'],
 ['other vegetables',
  'whole milk',
  'condensed milk',
  'long life bakery product'],
 ['whole milk', 'butter', 'yogurt', 'rice', 'abrasive cleaner'],
 ['rolls/buns'],
 ['other vegetables',
  'UHT-milk',
  'rolls/buns',
  'bottled beer',
  'liquor (appetizer)'],
 ['pot plants'],
 ['whole milk', 'cereals']]

# Encoding transactions

In [6]:
# importing reqd libraries
import numpy as np
from mlxtend.preprocessing import TransactionEncoder

In [7]:
txns_encoder = TransactionEncoder()
# data transformation
encoder_txns = txns_encoder.fit(all_txns).transform(all_txns)

In [8]:
# conversion of matrix to df
encoder_txns_df = pd.DataFrame(encoder_txns, columns=txns_encoder.columns_).astype(int)

In [9]:
encoder_txns_df.iloc[5:10,10:20]

Unnamed: 0,berries,beverages,bottled beer,bottled water,brandy,brown bread,butter,butter milk,cake bar,candles
5,0,0,0,0,0,0,1,0,0,0
6,0,0,0,0,0,0,0,0,0,0
7,0,0,1,0,0,0,0,0,0,0
8,0,0,0,0,0,0,0,0,0,0
9,0,0,0,0,0,0,0,0,0,0


In [10]:
encoder_txns_df.shape

(9835, 171)

# Association rules

In [11]:
len(encoder_txns_df.columns) #no. of items in the data

171

In [12]:
from mlxtend.frequent_patterns import apriori

In [13]:
frequent_itemsets = apriori(encoder_txns_df,min_support = 0.02, use_colnames= True)

In [15]:
frequent_itemsets.sample(10,random_state=45)

Unnamed: 0,support,itemsets
88,0.03274,"(soda, other vegetables)"
99,0.024301,"(root vegetables, rolls/buns)"
21,0.058973,(frankfurter)
89,0.035892,"(tropical fruit, other vegetables)"
39,0.193493,(other vegetables)
82,0.026131,"(pip fruit, other vegetables)"
117,0.032232,"(whipped/sour cream, whole milk)"
26,0.026029,(ham)
90,0.028876,"(whipped/sour cream, other vegetables)"
101,0.038332,"(soda, rolls/buns)"


In [16]:
from mlxtend.frequent_patterns import apriori, association_rules

In [17]:
rules = association_rules(frequent_itemsets, metric = 'lift', min_threshold=1)

In [18]:
rules.sample(10)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
26,(other vegetables),(domestic eggs),0.193493,0.063447,0.022267,0.115081,1.813824,0.009991,1.05835
61,(whole milk),(other vegetables),0.255516,0.193493,0.074835,0.292877,1.513634,0.025394,1.140548
87,(rolls/buns),(yogurt),0.183935,0.139502,0.034367,0.186844,1.339363,0.008708,1.05822
80,(soda),(rolls/buns),0.174377,0.183935,0.038332,0.219825,1.195124,0.006258,1.046003
72,(pip fruit),(whole milk),0.075648,0.255516,0.030097,0.397849,1.557043,0.010767,1.236375
79,(sausage),(rolls/buns),0.09395,0.183935,0.030605,0.325758,1.771048,0.013324,1.210344
111,(whipped/sour cream),(yogurt),0.071683,0.139502,0.020742,0.289362,2.074251,0.010742,1.210881
17,(butter),(whole milk),0.055414,0.255516,0.027555,0.497248,1.946053,0.013395,1.480817
100,(soda),(tropical fruit),0.174377,0.104931,0.020844,0.119534,1.139159,0.002546,1.016585
115,"(root vegetables, whole milk)",(other vegetables),0.048907,0.193493,0.023183,0.474012,2.44977,0.013719,1.53332


In [20]:
# top 10 rules
rules.sort_values('confidence', ascending=False)[0:10]

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
120,"(yogurt, other vegetables)",(whole milk),0.043416,0.255516,0.022267,0.512881,2.007235,0.011174,1.52834
17,(butter),(whole milk),0.055414,0.255516,0.027555,0.497248,1.946053,0.013395,1.480817
25,(curd),(whole milk),0.053279,0.255516,0.026131,0.490458,1.919481,0.012517,1.461085
114,"(root vegetables, other vegetables)",(whole milk),0.047382,0.255516,0.023183,0.48927,1.914833,0.011076,1.457687
115,"(root vegetables, whole milk)",(other vegetables),0.048907,0.193493,0.023183,0.474012,2.44977,0.013719,1.53332
29,(domestic eggs),(whole milk),0.063447,0.255516,0.029995,0.472756,1.850203,0.013783,1.41203
108,(whipped/sour cream),(whole milk),0.071683,0.255516,0.032232,0.449645,1.759754,0.013916,1.352735
90,(root vegetables),(whole milk),0.108998,0.255516,0.048907,0.448694,1.756031,0.021056,1.350401
50,(root vegetables),(other vegetables),0.108998,0.193493,0.047382,0.434701,2.246605,0.026291,1.426693
32,(frozen vegetables),(whole milk),0.048094,0.255516,0.020437,0.424947,1.663094,0.008149,1.294636


It could be inferred from the above table that if the customer buys whole milk if he has bought yogurt and vegetable is 0.51 i.e.

Given that he has bought yogurt and vegetable he is likely to buy whole milk

In [25]:
print(rules.head())

          antecedents         consequents  antecedent support  \
0              (beef)        (whole milk)            0.052466   
1        (whole milk)              (beef)            0.255516   
2     (bottled water)  (other vegetables)            0.110524   
3  (other vegetables)     (bottled water)            0.193493   
4     (bottled water)        (rolls/buns)            0.110524   

   consequent support   support  confidence      lift  leverage  conviction  
0            0.255516  0.021251    0.405039  1.585180  0.007845    1.251315  
1            0.052466  0.021251    0.083168  1.585180  0.007845    1.033487  
2            0.193493  0.024809    0.224471  1.160101  0.003424    1.039945  
3            0.110524  0.024809    0.128219  1.160101  0.003424    1.020297  
4            0.183935  0.024199    0.218951  1.190373  0.003870    1.044832  


In [28]:
rules.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 126 entries, 0 to 125
Data columns (total 9 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   antecedents         126 non-null    object 
 1   consequents         126 non-null    object 
 2   antecedent support  126 non-null    float64
 3   consequent support  126 non-null    float64
 4   support             126 non-null    float64
 5   confidence          126 non-null    float64
 6   lift                126 non-null    float64
 7   leverage            126 non-null    float64
 8   conviction          126 non-null    float64
dtypes: float64(7), object(2)
memory usage: 9.0+ KB


Enter the no. of items and items bought it will print the list of recommended products

In [52]:
flag = True
try:
  while flag:
    print("How many items you bought?")
    n = int(input())
    cart = []
    if(n > 0):
      print("Amazing: Input the things your bought: ")
      for i in range(0,n):
        string = input()
        cart.append(string)
      # print(cart)
      print("Product recommended: ")
      if len(cart)>0:
        items_1 = "frozenset({'"+cart[-1]+"'})"
        for i in range(0,len(rules)):
          if str(rules.antecedents[i]) == items_1:
            string = str(rules.consequents[i])
            start = string.find("'") + 1
            end = string.find("'", start)
            substring = string[start:end]
            print(substring)
      print("Do you wish to continue? 0.Exit 1. Continue")
      inp = int(input())
      if(inp==0):
        flag = False

  print("Thank you....")
except:
  print("Invalid no.")

How many items you bought?
3
Amazing: Input the things your bought: 
milk
oil
yogurt
Product recommended: 
bottled water
citrus fruit
other vegetables
rolls/buns
root vegetables
soda
tropical fruit
whipped/sour cream
whole milk
other vegetables
Do you wish to continue? 0.Exit 1. Continue
0
Thank you....
