In [1]:
import pandas as pd
from apyori import apriori

In [2]:
alcohol_transactions = pd.read_csv('data/sample_data_apriori.csv')
alcohol_transactions.head()

Unnamed: 0,transaction_id,product1,product2,product3,product4,product5,product6,product7,product8,product9,...,product36,product37,product38,product39,product40,product41,product42,product43,product44,product45
0,1,Premium Lager,Iberia,,,,,,,,...,,,,,,,,,,
1,2,Sparkling,Premium Lager,Premium Cider,Own Label,Italy White,Italian White,Italian Red,French Red,Bottled Ale,...,,,,,,,,,,
2,3,Small Sizes White,Small Sizes Red,Sherry Spanish,No/Low Alc Cider,Cooking Wine,Cocktails/Liqueurs,Bottled Ale,,,...,,,,,,,,,,
3,4,White Uk,Sherry Spanish,Port,Italian White,Italian Red,,,,,...,,,,,,,,,,
4,5,Premium Lager,Over-Ice Cider,French White South,French Rose,Cocktails/Liqueurs,Bottled Ale,,,,...,,,,,,,,,,


In [3]:
# drop id column
alcohol_transactions = alcohol_transactions.drop('transaction_id', axis = 1)

In [4]:
# modify data for apriori algorithm
transactions_list = []

for index, row in alcohol_transactions.iterrows():
    transaction = list(row.dropna())
    transactions_list.append(transaction)

transactions_list[0:5]

[['Premium Lager', 'Iberia'],
 ['Sparkling',
  'Premium Lager',
  'Premium Cider',
  'Own Label',
  'Italy White',
  'Italian White',
  'Italian Red',
  'French Red',
  'Bottled Ale'],
 ['Small Sizes White',
  'Small Sizes Red',
  'Sherry Spanish',
  'No/Low Alc Cider',
  'Cooking Wine',
  'Cocktails/Liqueurs',
  'Bottled Ale'],
 ['White Uk', 'Sherry Spanish', 'Port', 'Italian White', 'Italian Red'],
 ['Premium Lager',
  'Over-Ice Cider',
  'French White South',
  'French Rose',
  'Cocktails/Liqueurs',
  'Bottled Ale']]

In [5]:
# apply apriori algorithm
apriori_rules = apriori(
    transactions_list,
    min_support = 0.003,
    min_confidence = 0.2,
    min_lift = 3,
    min_length = 2,
    max_length = 2
)

# convert to list
apriori_rules = list(apriori_rules)

In [6]:
# convert results to dataframe
product_one = [list(rule[2][0][0])[0] for rule in apriori_rules]
product_two = [list(rule[2][0][1])[0] for rule in apriori_rules]
support = [rule[1] for rule in apriori_rules]
confidence = [rule[2][0][2] for rule in apriori_rules]
lift = [rule[2][0][3] for rule in apriori_rules]

apriori_rules_df = pd.DataFrame({
    'product_one': product_one,
    'product_two': product_two,
    'support': support,
    'confidence': confidence,
    'lift': lift
})

apriori_rules_df.head()

Unnamed: 0,product_one,product_two,support,confidence,lift
0,American Rose,America White,0.020746,0.532374,3.997849
1,America White,American White,0.054387,0.408421,3.597131
2,Australian Rose,America White,0.005046,0.486486,3.653257
3,Low Alcohol A.C,America White,0.003364,0.461538,3.465911
4,American Rose,American Red,0.015699,0.402878,3.574788


In [7]:
# sort results by lift
apriori_rules_df = apriori_rules_df.sort_values(by = 'lift', ascending = False)
apriori_rules_df.head()

Unnamed: 0,product_one,product_two,support,confidence,lift
35,Wine Gifts,Beer/Lager Gifts,0.004486,0.313725,10.173262
34,Beer/Lager Gifts,Spirits & Fortified,0.013176,0.427273,9.896635
129,Wine Gifts,Spirits & Fortified,0.005887,0.411765,9.537433
118,Red Wine Bxes & 25Cl,White Boxes,0.015419,0.474138,9.343923
52,French White Rhone,French Red,0.003364,0.48,8.691168


In [8]:
# search rules
apriori_rules_df[apriori_rules_df['product_one'].str.contains('New Zealand')]

Unnamed: 0,product_one,product_two,support,confidence,lift
109,New Zealand Red,Malt Whisky,0.005327,0.271429,5.628987
103,New Zealand Red,Iberia White,0.007289,0.371429,4.616327
111,New Zealand Red,New Zealand White,0.012616,0.642857,4.613826
90,New Zealand Red,French White South,0.004486,0.228571,4.431056
75,New Zealand Red,French White 2,0.009532,0.485714,4.256862
53,New Zealand Red,French Red,0.004205,0.214286,3.879985
63,New Zealand Red,French Red South,0.006448,0.328571,3.868034
113,New Zealand Red,South America,0.010934,0.557143,3.799863
112,New Zealand Red,Other Red,0.004486,0.228571,3.591693
102,New Zealand Red,Iberia,0.012055,0.614286,3.528433
