In [1]:
# importing pandas to manage datasets
import pandas as pd

In [2]:
# loading transaction from transactions.tsv
df = pd.read_csv("./transactions.tsv", delimiter=";")
df.head()

Unnamed: 0,TransactionID,Items
0,1,"pão,queijo,presunto"
1,2,"queijo,presunto"
2,3,"arroz,feijao"
3,4,"pao,leite"
4,5,"arroz,macarrao,alho"


In [3]:
# creating a list with all the transactions
transactions = []

for line in df.values:
    transaction_id = line[0]
    items = line[1].split(",")
    transactions.append([*items])

In [4]:
# importing apriori
from apyori import apriori

In [5]:
# training apriori algorithm
rule = apriori(
    transactions = transactions,  
    min_support = 0.03, 
    min_confidence = 0.2, 
    min_lift = 2, 
    min_length = 2, 
    max_length = 2
)

rule_output = list(rule)

In [6]:
# transforming apriori output into a tabular list
lhs         = [tuple(result[2][0][0])[0] for result in rule_output]
rhs         = [tuple(result[2][0][1])[0] for result in rule_output]
support    = [result[1] for result in rule_output]
confidence = [result[2][0][2] for result in rule_output]
lift       = [result[2][0][3] for result in rule_output]
rule_table = list(zip(lhs, rhs, support, confidence, lift))

In [7]:
# transforming the rule table into a pandas.DateFrame
columns = ['Item', 'Match', 'Support', 'Confidence', 'Lift']
output_df = pd.DataFrame(rule_table, columns = columns)
output_df.head()

Unnamed: 0,Item,Match,Support,Confidence,Lift
0,alface,mortadela,0.047619,0.5,10.5
1,alface,queijo,0.047619,0.5,2.625
2,alface,tomate,0.095238,1.0,10.5
3,alho,arroz,0.047619,0.5,3.5
4,alho,cebola,0.047619,0.5,10.5


In [8]:
# sorting by lift descending: as greater is the lift, grater is the match
output_df.sort_values(by=["Lift"], ascending=False)

Unnamed: 0,Item,Match,Support,Confidence,Lift
16,cebola,coentro,0.047619,1.0,21.0
0,alface,mortadela,0.047619,0.5,10.5
32,mortadela,tomate,0.047619,1.0,10.5
30,manteiga,pão,0.047619,1.0,10.5
22,gelo,whisky,0.047619,1.0,10.5
21,extrato de tomate,macarrao,0.047619,1.0,10.5
38,refrigerante,whisky,0.047619,1.0,10.5
2,alface,tomate,0.095238,1.0,10.5
5,alho,coentro,0.047619,0.5,10.5
4,alho,cebola,0.047619,0.5,10.5


In [9]:
# showing presunto best matches
output_df[output_df["Item"] == "presunto"].sort_values(by=["Confidence", "Lift"], ascending=False)

Unnamed: 0,Item,Match,Support,Confidence,Lift
35,presunto,queijo,0.095238,1.0,5.25
34,presunto,pão,0.047619,0.5,5.25


In [10]:
# showing best items that "pão" matches
output_df[output_df["Match"] == "pão"].sort_values(by=["Confidence", "Lift"], ascending=False)

Unnamed: 0,Item,Match,Support,Confidence,Lift
30,manteiga,pão,0.047619,1.0,10.5
34,presunto,pão,0.047619,0.5,5.25
27,leite,pão,0.047619,0.333333,3.5
