# Eclat

In [14]:
!pip install apyori



## Importing the libraries

In [15]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

## Data Preprocessing

In [16]:
#there is no title column in this data set
dataset = pd.read_csv("Market_Basket_Optimisation.csv", header=None)
num_of_rows = dataset.shape[0]
num_of_cols = dataset.shape[1]
transactions = []
# apriori model need list of strings (str) 
for i in range(0, num_of_rows):
    transactions.append([str(dataset.values[i, j]) for j in range(0, num_of_cols)])

## Training the Eclat model on the dataset

In [17]:
from apyori import apriori
# dataset have 7 days data, let's take products appear least 3 times a day, therefor 7 days product appear 3*7 and therefor min support = (3*7)/7501 = 0.002799 ~ 0.003.
# min confidence started with 0.8 but no rules found so tryied 0.4 but not much and then 0.2 found some good results.
# min lift just by experience 3 is a good value
# min and max lengths we need to find good product to by one and get one (product 1 -> product 2)
rules = apriori(transactions = transactions, min_support = 0.003, min_confidence = 0.2, min_lift = 3, min_length = 2, max_length = 2)

## Visualising the results

### Displaying the first results coming directly from the output of the Eclat function

In [18]:
results = list(rules)
print(results)

[RelationRecord(items=frozenset({'light cream', 'chicken'}), support=0.004532728969470737, ordered_statistics=[OrderedStatistic(items_base=frozenset({'light cream'}), items_add=frozenset({'chicken'}), confidence=0.29059829059829057, lift=4.84395061728395)]), RelationRecord(items=frozenset({'escalope', 'mushroom cream sauce'}), support=0.005732568990801226, ordered_statistics=[OrderedStatistic(items_base=frozenset({'mushroom cream sauce'}), items_add=frozenset({'escalope'}), confidence=0.3006993006993007, lift=3.790832696715049)]), RelationRecord(items=frozenset({'escalope', 'pasta'}), support=0.005865884548726837, ordered_statistics=[OrderedStatistic(items_base=frozenset({'pasta'}), items_add=frozenset({'escalope'}), confidence=0.3728813559322034, lift=4.700811850163794)]), RelationRecord(items=frozenset({'honey', 'fromage blanc'}), support=0.003332888948140248, ordered_statistics=[OrderedStatistic(items_base=frozenset({'fromage blanc'}), items_add=frozenset({'honey'}), confidence=0.24

In [19]:
#RelationRecord
#items=frozenset('chicken', 'light cream')
#support=0.004532728969470737
#ordered_statistics = [OrderedStatistic(items_base=frozenset({'light cream'}), items_add=frozenset({'chicken'})
#confidence=0.29059829059829057
#lift=4.84395061728395

# Customer buy light cream have high chance (confidence=0.29059829059829057 or 29% chance) of buy chicken

### Putting the results well organised into a Pandas DataFrame

In [20]:
def inspect(results):
    lhs         = [tuple(result[2][0][0])[0] for result in results]
    rhs         = [tuple(result[2][0][1])[0] for result in results]
    supports    = [result[1] for result in results]
    return list(zip(lhs, rhs, supports))
resultsinDataFrame = pd.DataFrame(inspect(results), columns = ['Product 1', 'Product 2', 'Support'])

### Displaying the results non sorted

In [21]:
resultsinDataFrame

Unnamed: 0,Product 1,Product 2,Support
0,light cream,chicken,0.004533
1,mushroom cream sauce,escalope,0.005733
2,pasta,escalope,0.005866
3,fromage blanc,honey,0.003333
4,herb & pepper,ground beef,0.015998
5,tomato sauce,ground beef,0.005333
6,light cream,olive oil,0.0032
7,whole wheat pasta,olive oil,0.007999
8,pasta,shrimp,0.005066


### Displaying the results sorted by descending lifts

In [22]:
resultsinDataFrame.nlargest(n = resultsinDataFrame.shape[0], columns = 'Support')

Unnamed: 0,Product 1,Product 2,Support
4,herb & pepper,ground beef,0.015998
7,whole wheat pasta,olive oil,0.007999
2,pasta,escalope,0.005866
1,mushroom cream sauce,escalope,0.005733
5,tomato sauce,ground beef,0.005333
8,pasta,shrimp,0.005066
0,light cream,chicken,0.004533
3,fromage blanc,honey,0.003333
6,light cream,olive oil,0.0032
