## **Machine learning model to analyse customer product purchase in market using eclat (association rule learning).**
### **Steps:**
  1. Importing needed libraries
  2. Data preprocessing
  3. Training eclat model
  4. Visualizing results

In [1]:
# 1. importing needed libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
# we have to install apyori into colab since it not installed as default.
# apriori modules are not available in scikit-learn
!pip install apyori

Collecting apyori
  Downloading https://files.pythonhosted.org/packages/5e/62/5ffde5c473ea4b033490617ec5caa80d59804875ad3c3c57c0976533a21a/apyori-1.1.2.tar.gz
Building wheels for collected packages: apyori
  Building wheel for apyori (setup.py) ... [?25l[?25hdone
  Created wheel for apyori: filename=apyori-1.1.2-cp36-none-any.whl size=5977 sha256=714df891a1c6c4fee4288fa3079a65e8a84eb76da0a116d037648b0e18b9b845
  Stored in directory: /root/.cache/pip/wheels/5d/92/bb/474bbadbc8c0062b9eb168f69982a0443263f8ab1711a8cad0
Successfully built apyori
Installing collected packages: apyori
Successfully installed apyori-1.1.2


In [3]:
# 2. data preprocessing
# 2.1. importing needed dataset

# since column heading is not present in the dataset, we've to use header=None to avoid losing first row data.
# otherwise it will treat first row in the dataset as column heading

df = pd.read_csv('Market_Basket_Optimisation.csv', header=None)
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
0,shrimp,almonds,avocado,vegetables mix,green grapes,whole weat flour,yams,cottage cheese,energy drink,tomato juice,low fat yogurt,green tea,honey,salad,mineral water,salmon,antioxydant juice,frozen smoothie,spinach,olive oil
1,burgers,meatballs,eggs,,,,,,,,,,,,,,,,,
2,chutney,,,,,,,,,,,,,,,,,,,
3,turkey,avocado,,,,,,,,,,,,,,,,,,
4,mineral water,milk,energy bar,whole wheat rice,green tea,,,,,,,,,,,,,,,


In [4]:
df.shape

(7501, 20)

In [5]:
# eclat is a simplified version of apriori
# apriori model doesn't take pandas dataframe as input to train the model.
# so we need to change it. apriori requires list of lists as input.

df_purchase = []
for i in range(len(df)):
  df_purchase.append([str(df.values[i,j]) for j in range(0, 20)])

In [6]:
# 3. training eclat model
from apyori import apriori
rules = apriori(transactions=df_purchase, min_support=0.003, min_confidence=0.2, min_lift=3, min_length=2, max_length=2)

In [7]:
# 4. visualizing the results
# 4.1. result of trained apriori function
rules_pred = list(rules)
rules_pred

[RelationRecord(items=frozenset({'light cream', 'chicken'}), support=0.004532728969470737, ordered_statistics=[OrderedStatistic(items_base=frozenset({'light cream'}), items_add=frozenset({'chicken'}), confidence=0.29059829059829057, lift=4.84395061728395)]),
 RelationRecord(items=frozenset({'escalope', 'mushroom cream sauce'}), support=0.005732568990801226, ordered_statistics=[OrderedStatistic(items_base=frozenset({'mushroom cream sauce'}), items_add=frozenset({'escalope'}), confidence=0.3006993006993007, lift=3.790832696715049)]),
 RelationRecord(items=frozenset({'escalope', 'pasta'}), support=0.005865884548726837, ordered_statistics=[OrderedStatistic(items_base=frozenset({'pasta'}), items_add=frozenset({'escalope'}), confidence=0.3728813559322034, lift=4.700811850163794)]),
 RelationRecord(items=frozenset({'fromage blanc', 'honey'}), support=0.003332888948140248, ordered_statistics=[OrderedStatistic(items_base=frozenset({'fromage blanc'}), items_add=frozenset({'honey'}), confidence=0

In [8]:
 # 4.2. result in type of dataframe
def inspect(results):
    lhs         = [tuple(result[2][0][0])[0] for result in results]
    rhs         = [tuple(result[2][0][1])[0] for result in results]
    supports    = [result[1] for result in results]
    return list(zip(lhs, rhs, supports))
resultsinDataFrame = pd.DataFrame(inspect(rules_pred), columns = ['Product_1', 'Product_2', 'Support'])

In [9]:
# 4.3 result in sorted format by descending support
resultsinDataFrame.nlargest(n = 10, columns = 'Support')

Unnamed: 0,Product_1,Product_2,Support
4,herb & pepper,ground beef,0.015998
7,whole wheat pasta,olive oil,0.007999
2,pasta,escalope,0.005866
1,mushroom cream sauce,escalope,0.005733
5,tomato sauce,ground beef,0.005333
8,pasta,shrimp,0.005066
0,light cream,chicken,0.004533
3,fromage blanc,honey,0.003333
6,light cream,olive oil,0.0032
