# Market Basket Optimisation Using Apriori Algorithm

## Importing The Main Libraries

In [12]:
import warnings
warnings.filterwarnings('ignore')   # ignore any warning messages

import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

## Importing The Dataset

In [2]:
data = pd.read_csv('/content/Market_Basket_Optimisation.csv' , header = None )

data.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
0,shrimp,almonds,avocado,vegetables mix,green grapes,whole weat flour,yams,cottage cheese,energy drink,tomato juice,low fat yogurt,green tea,honey,salad,mineral water,salmon,antioxydant juice,frozen smoothie,spinach,olive oil
1,burgers,meatballs,eggs,,,,,,,,,,,,,,,,,
2,chutney,,,,,,,,,,,,,,,,,,,
3,turkey,avocado,,,,,,,,,,,,,,,,,,
4,mineral water,milk,energy bar,whole wheat rice,green tea,,,,,,,,,,,,,,,


## Data Preprocessing

### Restructring the Dataset to be suitable for the model


In [3]:
dataValues = data.values
dataValues

array([['shrimp', 'almonds', 'avocado', ..., 'frozen smoothie',
        'spinach', 'olive oil'],
       ['burgers', 'meatballs', 'eggs', ..., nan, nan, nan],
       ['chutney', nan, nan, ..., nan, nan, nan],
       ...,
       ['chicken', nan, nan, ..., nan, nan, nan],
       ['escalope', 'green tea', nan, ..., nan, nan, nan],
       ['eggs', 'frozen smoothie', 'yogurt cake', ..., nan, nan, nan]],
      dtype=object)

In [5]:
dataset = []

for i in range(0, len(data)):
  dataset.append([str(dataValues[i,j]) for j in range(0, 20) if str(dataValues[i,j]) != 'nan'])

print("Finished")
print(dataset[0:10])

Finished
[['shrimp', 'almonds', 'avocado', 'vegetables mix', 'green grapes', 'whole weat flour', 'yams', 'cottage cheese', 'energy drink', 'tomato juice', 'low fat yogurt', 'green tea', 'honey', 'salad', 'mineral water', 'salmon', 'antioxydant juice', 'frozen smoothie', 'spinach', 'olive oil'], ['burgers', 'meatballs', 'eggs'], ['chutney'], ['turkey', 'avocado'], ['mineral water', 'milk', 'energy bar', 'whole wheat rice', 'green tea'], ['low fat yogurt'], ['whole wheat pasta', 'french fries'], ['soup', 'light cream', 'shallot'], ['frozen vegetables', 'spaghetti', 'green tea'], ['french fries']]


### Encoding the values in the lists

In [6]:
from mlxtend.preprocessing import TransactionEncoder

In [7]:
TE = TransactionEncoder()
TeArray = TE.fit_transform(dataset)

TeArray

array([[False,  True,  True, ...,  True, False, False],
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       ...,
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False,  True, False]])

In [8]:
df = pd.DataFrame(TeArray , columns= TE.columns_ )

print("Final Data after convertion : ")
df.head()


Final Data after convertion : 


Unnamed: 0,asparagus,almonds,antioxydant juice,asparagus.1,avocado,babies food,bacon,barbecue sauce,black tea,blueberries,...,turkey,vegetables mix,water spray,white wine,whole weat flour,whole wheat pasta,whole wheat rice,yams,yogurt cake,zucchini
0,False,True,True,False,True,False,False,False,False,False,...,False,True,False,False,True,False,False,True,False,False
1,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
3,False,False,False,False,True,False,False,False,False,False,...,True,False,False,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,True,False,False,False


## Apriori Model Training

In [13]:
from mlxtend.frequent_patterns import apriori, association_rules

In [14]:
frequent_itemsets = apriori( df , min_support= 0.01 , use_colnames= True )

frequent_itemsets['length'] = frequent_itemsets['itemsets'].apply(lambda x: len(x))

frequent_itemsets.sort_values(by='support', ascending=False).head(10)

Unnamed: 0,support,itemsets,length
46,0.238368,(mineral water),1
19,0.179709,(eggs),1
63,0.17411,(spaghetti),1
24,0.170911,(french fries),1
13,0.163845,(chocolate),1
32,0.132116,(green tea),1
45,0.129583,(milk),1
33,0.098254,(ground beef),1
30,0.095321,(frozen vegetables),1
53,0.095054,(pancakes),1


In [15]:
rules = association_rules( frequent_itemsets , metric= "lift" , min_threshold= 1.2 )

final_rules = rules[['antecedents' , 'consequents' , 'antecedent support' , 'consequent support', 'support', 'confidence', 'lift']]
final_rules = final_rules.sort_values( by='lift' , ascending=False )   # sort the results by the most lift values

print("Top 10 strong rules")
final_rules.head(10)

Top 10 strong rules


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift
159,(herb & pepper),(ground beef),0.04946,0.098254,0.015998,0.32345,3.291994
158,(ground beef),(herb & pepper),0.098254,0.04946,0.015998,0.162822,3.291994
327,(ground beef),"(spaghetti, mineral water)",0.098254,0.059725,0.017064,0.173677,2.907928
326,"(spaghetti, mineral water)",(ground beef),0.059725,0.098254,0.017064,0.285714,2.907928
340,(olive oil),"(spaghetti, mineral water)",0.065858,0.059725,0.010265,0.15587,2.609786
337,"(spaghetti, mineral water)",(olive oil),0.059725,0.065858,0.010265,0.171875,2.609786
143,(frozen vegetables),(tomatoes),0.095321,0.068391,0.016131,0.169231,2.474464
142,(tomatoes),(frozen vegetables),0.068391,0.095321,0.016131,0.235867,2.474464
138,(shrimp),(frozen vegetables),0.071457,0.095321,0.016664,0.233209,2.446574
139,(frozen vegetables),(shrimp),0.095321,0.071457,0.016664,0.174825,2.446574


## Target Only One Sort Of Items

In [None]:
target = 'eggs'
print(f"what customers usually buy with {target} :\n")
eggs_rules = final_rules[final_rules['antecedents'].apply(lambda x: target in x)]

if not eggs_rules.empty:
    display (eggs_rules)
else:
    print(f"No rules exist with this item : {target}")

what customers usually buy with eggs :



Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift
285,"(eggs, ground beef)",(mineral water),0.019997,0.238368,0.010132,0.506667,2.125563
284,"(eggs, mineral water)",(ground beef),0.050927,0.098254,0.010132,0.198953,2.024892
290,"(eggs, mineral water)",(milk),0.050927,0.129583,0.013065,0.256545,1.979774
4,(eggs),(burgers),0.179709,0.087188,0.028796,0.160237,1.83783
255,"(eggs, chocolate)",(spaghetti),0.033196,0.17411,0.010532,0.317269,1.822232
291,"(eggs, milk)",(mineral water),0.030796,0.238368,0.013065,0.424242,1.779778
254,"(eggs, spaghetti)",(chocolate),0.036528,0.163845,0.010532,0.288321,1.759721
106,(eggs),(turkey),0.179709,0.062525,0.019464,0.108309,1.732245
249,"(eggs, chocolate)",(mineral water),0.033196,0.238368,0.013465,0.405622,1.701663
297,"(eggs, spaghetti)",(mineral water),0.036528,0.238368,0.014265,0.390511,1.638268


## Visualizing the results

In [None]:
fig = px.scatter(final_rules.assign(antecedents=final_rules['antecedents'].apply(lambda x: ', '.join(list(x))),
                                   consequents=final_rules['consequents'].apply(lambda x: ', '.join(list(x)))),
                 x="support",
                 y="confidence",
                 color="lift",
                 hover_name="antecedents",
                 hover_data={'antecedents': True, 'consequents': True, 'support': ':.4f', 'confidence': ':.4f', 'lift': ':.4f'},
                 title="Association Rules (Support vs. Confidence colored by Lift)")
fig.show()