In [None]:
!pip install apyori

In [None]:
import numpy as np
import pandas as pd

import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go

from apyori import apriori

# **1. Pre-process data**

In [None]:
df_bakery = pd.read_csv('./Bakery.csv')
df_bakery.head()

Unnamed: 0,TransactionNo,Items,DateTime,Daypart,DayType
0,1,Bread,2016-10-30 09:58:11,Morning,Weekend
1,2,Scandinavian,2016-10-30 10:05:34,Morning,Weekend
2,2,Scandinavian,2016-10-30 10:05:34,Morning,Weekend
3,3,Hot chocolate,2016-10-30 10:07:57,Morning,Weekend
4,3,Jam,2016-10-30 10:07:57,Morning,Weekend


In [None]:
# Processing datetime

dateTime =pd.to_datetime(df_bakery['DateTime'])
df_bakery['Day'] = dateTime.dt.day_name()
df_bakery['Month'] = dateTime.dt.month_name()
df_bakery['Year'] = dateTime.dt.year
df_bakery.head()

Unnamed: 0,TransactionNo,Items,DateTime,Daypart,DayType,Day,Month,Year
0,1,Bread,2016-10-30 09:58:11,Morning,Weekend,Sunday,October,2016
1,2,Scandinavian,2016-10-30 10:05:34,Morning,Weekend,Sunday,October,2016
2,2,Scandinavian,2016-10-30 10:05:34,Morning,Weekend,Sunday,October,2016
3,3,Hot chocolate,2016-10-30 10:07:57,Morning,Weekend,Sunday,October,2016
4,3,Jam,2016-10-30 10:07:57,Morning,Weekend,Sunday,October,2016


# **2. Data Exploration**

In [None]:
def Plot_Freq_barchart(df, column, top):
  freq = df[column].value_counts()

  fig = px.bar(freq.head(top),
              color = freq.head(top),
              color_continuous_scale = px.colors.sequential.Mint,
              title=f'Bar Chart of {column}')

  fig.update_traces(width=0.3)
  fig.update_layout(width=600)
  fig.show()

def Plot_Pie_barchart(df, column):

    counts = df[column].value_counts()

    fig = px.pie(names=counts.index,
                 values=counts.values,
                 color_discrete_sequence=px.colors.sequential.Sunset,
                 title=f'Pie Chart of {column}')

    fig.update_layout(width=600)
    fig.show()

In [None]:
Plot_Freq_barchart(df_bakery, 'Items', 15)

In [None]:
Plot_Freq_barchart(df_bakery, 'Day', 7)

In [None]:
Plot_Freq_barchart(df_bakery, 'Month', 12)

In [None]:
Plot_Pie_barchart(df_bakery, 'Daypart')

# **3. Insights from Data**

In [None]:
def Tranform_Transaction(df_bakery):

  transaction = []

  for trans_no in df_bakery['TransactionNo'].unique():
    list_item_of_trans = df_bakery[df_bakery['TransactionNo']==trans_no]['Items'].tolist()
    transaction.append(tuple(list_item_of_trans))

  return transaction

In [None]:
def print_rules(rules, top_n):
    print(f"Top {top_n} rules with highest support:")

    sorted_by_support = sorted(rules, key=lambda x: x.support, reverse=True)

    for i in range(min(top_n, len(sorted_by_support))):
        # Extract information from the RelationRecord object
        rule = sorted_by_support[i]
        items = list(rule.items)
        support = rule.support

        # Loop through all ordered statistics (for rules with multiple antecedents)
        for os in rule.ordered_statistics:
            antecedent = list(os.items_base)
            consequent = list(os.items_add)
            confidence = os.confidence
            lift = os.lift

            print(f"Items: {items}")
            print(f"Support: {support:.3f}")
            print(f"  Rule: {antecedent} -> {consequent}")
            print(f"  Confidence: {confidence:.3f}")
            print(f"  Lift: {lift:.3f}")
            print()
            print('-----------------')
            print()

In [None]:
transaction = Tranform_Transaction(df_bakery)
# Generate rule based on Apriori algorithm
rules = list(apriori(transaction, min_support = 0.01, min_confidence = 0.05))
filtered_rules = [rule for rule in rules if len(rule.items) >= 2]

In [None]:
print_rules(filtered_rules, 10)

Top 10 rules with highest support:
Items: ['Bread', 'Coffee']
Support: 0.090
  Rule: [] -> ['Bread', 'Coffee']
  Confidence: 0.090
  Lift: 1.000

-----------------

Items: ['Bread', 'Coffee']
Support: 0.090
  Rule: ['Bread'] -> ['Coffee']
  Confidence: 0.275
  Lift: 0.575

-----------------

Items: ['Bread', 'Coffee']
Support: 0.090
  Rule: ['Coffee'] -> ['Bread']
  Confidence: 0.188
  Lift: 0.575

-----------------

Items: ['Cake', 'Coffee']
Support: 0.055
  Rule: [] -> ['Cake', 'Coffee']
  Confidence: 0.055
  Lift: 1.000

-----------------

Items: ['Cake', 'Coffee']
Support: 0.055
  Rule: ['Cake'] -> ['Coffee']
  Confidence: 0.527
  Lift: 1.102

-----------------

Items: ['Cake', 'Coffee']
Support: 0.055
  Rule: ['Coffee'] -> ['Cake']
  Confidence: 0.114
  Lift: 1.102

-----------------

Items: ['Tea', 'Coffee']
Support: 0.050
  Rule: ['Coffee'] -> ['Tea']
  Confidence: 0.104
  Lift: 0.731

-----------------

Items: ['Tea', 'Coffee']
Support: 0.050
  Rule: ['Tea'] -> ['Coffee']
  Con

In [120]:
for rule in filtered_rules:
  print(rule)

RelationRecord(items=frozenset({'Alfajores', 'Bread'}), support=0.01035393555203381, ordered_statistics=[OrderedStatistic(items_base=frozenset({'Alfajores'}), items_add=frozenset({'Bread'}), confidence=0.28488372093023256, lift=0.8706568997754768)])
RelationRecord(items=frozenset({'Alfajores', 'Coffee'}), support=0.0196513470681458, ordered_statistics=[OrderedStatistic(items_base=frozenset({'Alfajores'}), items_add=frozenset({'Coffee'}), confidence=0.5406976744186046, lift=1.1302348693401265)])
RelationRecord(items=frozenset({'Bread', 'Brownie'}), support=0.010776545166402536, ordered_statistics=[OrderedStatistic(items_base=frozenset({'Brownie'}), items_add=frozenset({'Bread'}), confidence=0.2691292875989446, lift=0.8225084621001004)])
RelationRecord(items=frozenset({'Bread', 'Cake'}), support=0.02334918119387216, ordered_statistics=[OrderedStatistic(items_base=frozenset({'Bread'}), items_add=frozenset({'Cake'}), confidence=0.07135938004520505, lift=0.6870971842602908), OrderedStatisti