# Apriori

## Importing the libraries

In [42]:
# import an external package
!pip install apyori



In [43]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

## Data Preprocessing

In [48]:
# so as you can see in our data set we dont have any column names we are directely having the enteries
# so to specify our model that we dont have any column name we are giving header = None

# our Apriori model does not accept the data set in pandas format(pd.read_csv) it will only accept the data set
# which is in list format(transactions) so we need to convert our pandas data set into list data set so that it
# can be accepted by apriori model

# number of transaction or number of rows in our dataset = 7501
# our first for loop is trying to convert pd dataset to list dataset by ittearting through all rows
# and our second for loop is trying to itterate all the each  elements from that all rows or we can say it as each
# row consistes of 20 columns it is ittearting through each column
# and finally we are taking str to take our data set in string format in the list

dataset = pd.read_csv('Market_Basket_Optimisation.csv', header = None)
transactions = []
for i in range(0, 7501):
  transactions.append([str(dataset.values[i,j]) for j in range(0, 20)])

## Training the Apriori model on the dataset

In [49]:
# transactions       = transactions
# name of a parameter| name of the dataset

# NOTE : this is the case study of this particular dataset each time case study will change according to the dataset

# after that we are taking minimum support ML-A-z pg = 338 which will tell us the percentage of
# out of the all the products how many products will the people will buy in a day

# and we are taking the minimum probability as = 3 af a person will buy that product in a day
# you can take any number as minimum probability

# and our data is the recordings of the people who buy the products in a week so we need to multiply
# that 3 with 7 = 3*7 = 21

#and after that dividing it by 7501 our total transactions 21/7501 = 0.003

# after that we are taking minimum confidence(min_confidence) ML-A-z pg = 341 which will tell us the probability of
# the people that  by a product(support) out of all products can buy this product also

# after that we are taking minimum lift(min_lift) ML-A-z pg = 343

# so as per our case study we are saying that if a person is buying a product then he will get a product for free
# for that we are taking min_length, max_length where we are saying min product on the left hand side of the rule
# and max product on the right hand side of the rule and to set that rules as buy 1 get 1 free we are setting both
#  of it to a same value which is 2

# if suppose iam setting a different senecio as buy 2 and get  3 for free then min_length = 3, max_length = 3

from apyori import apriori
rules = apriori(transactions = transactions, min_support = 0.003, min_confidence = 0.2, min_lift = 3, min_length = 2, max_length = 2)

## Visualising the results

### Displaying the first results coming directly from the output of the apriori function

In [50]:
results = list(rules)
results

[RelationRecord(items=frozenset({'chicken', 'light cream'}), support=0.004532728969470737, ordered_statistics=[OrderedStatistic(items_base=frozenset({'light cream'}), items_add=frozenset({'chicken'}), confidence=0.29059829059829057, lift=4.84395061728395)]),
 RelationRecord(items=frozenset({'mushroom cream sauce', 'escalope'}), support=0.005732568990801226, ordered_statistics=[OrderedStatistic(items_base=frozenset({'mushroom cream sauce'}), items_add=frozenset({'escalope'}), confidence=0.3006993006993007, lift=3.790832696715049)]),
 RelationRecord(items=frozenset({'escalope', 'pasta'}), support=0.005865884548726837, ordered_statistics=[OrderedStatistic(items_base=frozenset({'pasta'}), items_add=frozenset({'escalope'}), confidence=0.3728813559322034, lift=4.700811850163794)]),
 RelationRecord(items=frozenset({'honey', 'fromage blanc'}), support=0.003332888948140248, ordered_statistics=[OrderedStatistic(items_base=frozenset({'fromage blanc'}), items_add=frozenset({'honey'}), confidence=0

### Putting the results well organised into a Pandas DataFrame

In [52]:
# lhs = left hand side and rhs = right hand side

def inspect(results):
    lhs         = [tuple(result[2][0][0])[0] for result in results]
    rhs         = [tuple(result[2][0][1])[0] for result in results]
    supports    = [result[1] for result in results]
    confidences = [result[2][0][2] for result in results]
    lifts       = [result[2][0][3] for result in results]
    return list(zip(lhs, rhs, supports, confidences, lifts))
resultsinDataFrame = pd.DataFrame(inspect(results), columns = ['Left Hand Side', 'Right Hand Side', 'Support', 'Confidence', 'Lift'])

### Displaying the results non sorted

In [54]:
resultsinDataFrame

Unnamed: 0,Left Hand Side,Right Hand Side,Support,Confidence,Lift
0,light cream,chicken,0.004533,0.290598,4.843951
1,mushroom cream sauce,escalope,0.005733,0.300699,3.790833
2,pasta,escalope,0.005866,0.372881,4.700812
3,fromage blanc,honey,0.003333,0.245098,5.164271
4,herb & pepper,ground beef,0.015998,0.32345,3.291994
5,tomato sauce,ground beef,0.005333,0.377358,3.840659
6,light cream,olive oil,0.0032,0.205128,3.11471
7,whole wheat pasta,olive oil,0.007999,0.271493,4.12241
8,pasta,shrimp,0.005066,0.322034,4.506672


### Displaying the results sorted by descending lifts

In [57]:
resultsinDataFrame.nlargest(n = 10, columns = 'Lift')

Unnamed: 0,Left Hand Side,Right Hand Side,Support,Confidence,Lift
3,fromage blanc,honey,0.003333,0.245098,5.164271
0,light cream,chicken,0.004533,0.290598,4.843951
2,pasta,escalope,0.005866,0.372881,4.700812
8,pasta,shrimp,0.005066,0.322034,4.506672
7,whole wheat pasta,olive oil,0.007999,0.271493,4.12241
5,tomato sauce,ground beef,0.005333,0.377358,3.840659
1,mushroom cream sauce,escalope,0.005733,0.300699,3.790833
4,herb & pepper,ground beef,0.015998,0.32345,3.291994
6,light cream,olive oil,0.0032,0.205128,3.11471
