Importing the essential libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

Loading the dataset

In [2]:
dataset = pd.read_csv("Bakery.csv")
dataset.head()

Unnamed: 0,TransactionNo,Items,DateTime,Daypart,DayType
0,1,Bread,2016-10-30 09:58:11,Morning,Weekend
1,2,Scandinavian,2016-10-30 10:05:34,Morning,Weekend
2,2,Scandinavian,2016-10-30 10:05:34,Morning,Weekend
3,3,Hot chocolate,2016-10-30 10:07:57,Morning,Weekend
4,3,Jam,2016-10-30 10:07:57,Morning,Weekend


In [3]:
print("Shape of Dataset: ",dataset.shape) # 20507 Rows and 5 Columns

Shape of Dataset:  (20507, 5)


In [4]:
dataset.describe(include=object)

Unnamed: 0,Items,DateTime,Daypart,DayType
count,20507,20507,20507,20507
unique,94,9465,4,2
top,Coffee,2017-02-17 14:18:20,Afternoon,Weekday
freq,5471,11,11569,12807


In [5]:
dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20507 entries, 0 to 20506
Data columns (total 5 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   TransactionNo  20507 non-null  int64 
 1   Items          20507 non-null  object
 2   DateTime       20507 non-null  object
 3   Daypart        20507 non-null  object
 4   DayType        20507 non-null  object
dtypes: int64(1), object(4)
memory usage: 801.2+ KB


In [6]:
datetime = pd.to_datetime(dataset['DateTime'])
dataset['Day'] = datetime.dt.day_name()
dataset['Month'] = datetime.dt.month_name()
dataset['Year'] = datetime.dt.year
dataset.head()

Unnamed: 0,TransactionNo,Items,DateTime,Daypart,DayType,Day,Month,Year
0,1,Bread,2016-10-30 09:58:11,Morning,Weekend,Sunday,October,2016
1,2,Scandinavian,2016-10-30 10:05:34,Morning,Weekend,Sunday,October,2016
2,2,Scandinavian,2016-10-30 10:05:34,Morning,Weekend,Sunday,October,2016
3,3,Hot chocolate,2016-10-30 10:07:57,Morning,Weekend,Sunday,October,2016
4,3,Jam,2016-10-30 10:07:57,Morning,Weekend,Sunday,October,2016


In [7]:
day_calc = dataset.groupby('Day')
day_freq = day_calc['Items'].count()
day_freq.sort_values(ascending=False)

Day
Saturday     3554
Friday       3266
Sunday       3118
Monday       3035
Tuesday      2645
Thursday     2601
Wednesday    2288
Name: Items, dtype: int64

In [8]:
month_calc = dataset.groupby('Month')
month_freq = month_calc['Items'].count()
month_freq.sort_values(ascending=False)

Month
March        3220
November     3076
January      3027
February     2748
December     2647
April        1048
October      1041
May           924
July          741
June          739
August        700
September     596
Name: Items, dtype: int64

Preparing the data for Associate Rule Learning

In [9]:
transactions = list()

for i in dataset['TransactionNo'].unique():
    l = list(set(dataset[dataset['TransactionNo']==i]['Items']))
    transactions.append(l)

In [10]:
transactions[:10]

[['Bread'],
 ['Scandinavian'],
 ['Cookies', 'Jam', 'Hot chocolate'],
 ['Muffin'],
 ['Coffee', 'Pastry', 'Bread'],
 ['Muffin', 'Pastry', 'Medialuna'],
 ['Coffee', 'Pastry', 'Medialuna', 'Tea'],
 ['Pastry', 'Bread'],
 ['Muffin', 'Bread'],
 ['Scandinavian', 'Medialuna']]

In [11]:
maxi = [0,len(transactions[0])]
for j in transactions:
    if len(j)>maxi[1]:
        maxi = [transactions.index(j),len(j)]

In [12]:
transactions.append(transactions[maxi[0]])
transactions[::-1]

[['Cookies',
  'Chocolates',
  'Tea',
  'Mineral water',
  'Tiffin',
  'Coke',
  'Coffee',
  'Pastry',
  'Juice',
  'Alfajores'],
 ['Smoothies'],
 ['Coffee', 'Pastry'],
 ['Tacos/Fajita', 'Coffee', 'Muffin', 'Tea'],
 ['Truffles', 'Spanish Brunch', 'Christmas common', 'Tea'],
 ['Bread'],
 ['Spanish Brunch', 'Bread'],
 ['Tacos/Fajita', 'Coffee', 'Toast'],
 ['Tacos/Fajita', 'Coke'],
 ['Sandwich', 'Smoothies'],
 ['Coffee'],
 ['Coffee', 'Spanish Brunch', 'Bread'],
 ['Extra Salami or Feta', 'Coffee', 'Cookies'],
 ['Bread'],
 ['Farm House'],
 ['Farm House'],
 ['Bread', 'Tea'],
 ['Sandwich', 'Bread'],
 ['Coffee'],
 ['Coffee', 'Spanish Brunch', 'Juice'],
 ['Coffee', 'Bread'],
 ['Bread'],
 ['Cake', 'Juice', 'Tea'],
 ['Coffee', 'Spanish Brunch', 'Extra Salami or Feta'],
 ['Coffee'],
 ['Coffee', 'Toast'],
 ['Tea'],
 ['Half slice Monster ', 'My-5 Fruit Shoot', 'Hot chocolate'],
 ['Cake', 'Coffee'],
 ['Baguette'],
 ['Coffee', 'Pastry'],
 ['Coffee', 'Pastry', 'Bread'],
 ['Cake', 'Coffee', 'Medialuna',

Using Apriori Model for this Recommendation System

In [16]:
def inspect(results):
    lhs         = [tuple(result[2][0][0])[0] for result in results]
    rhs         = [tuple(result[2][0][1])[0] for result in results]
    supports    = [result[1] for result in results]
    confidences = [result[2][0][2] for result in results]
    lifts       = [result[2][0][3] for result in results]
    return list(zip(lhs, rhs, supports, confidences, lifts))

In [36]:
from apyori import apriori
rules = apriori(transactions = transactions, min_support = 0.0001, min_confidence=0.2,min_lift=3,min_length=2,max_length=2)
print("Done with the training of apriori")
results = list(rules)
resultsinDataFrame = pd.DataFrame(inspect(results), columns = ['Left Hand Side', 'Right Hand Side', 'Support', 'Confidence', 'Lift'])
print(resultsinDataFrame)

Done with the training of apriori
  Left Hand Side Right Hand Side   Support  Confidence      Lift
0           Coke        Sandwich  0.005176    0.264865  3.687075
1  Mineral water        Sandwich  0.003275    0.229630  3.196580
