# Comparison: Lift and Conviction
by Lily Djami

In this evaluation module, results from Apriori and ECLAT are compared with regards to lift and conviction. 

In [1]:
import matplotlib.pyplot as plt
import plotly.express as px
import pandas as pd
import numpy as np
pd.set_option('display.max_rows', None)

from rules import save_results, load_results, rules_from_support
from mlxtend.frequent_patterns import association_rules


In [6]:
#read apriori support values
retail_apriori = load_results("Apriori\\apr_retail_0.04.support")
entree_apriori = load_results("Apriori\\apr_entree_0.1.support")

#read eclat support values
retail_eclat = load_results("ECLAT\\retail_0.04.support")
retail_eclat.update(retail_eclat_3)
entree_eclat = load_results("ECLAT\\entree_0.1.support")

In [8]:
retail_eclat_rules = rules_from_support(retail_eclat)
retail_apriori_rules = association_rules(retail_apriori, min_threshold = 0)


entree_eclat_rules = rules_from_support(entree_eclat[1])
entree_apriori_rules = association_rules(entree_apriori, min_threshold = 0)

In [9]:
def frozenset_to_str(t):
    t = list(t)
    t.sort()
    return "(" + ", ".join([str(i) for i in t]) + ")"

def rule_str(rule_df):
    rule_list = [frozenset_to_str(rule_df['antecedents'][i]) + " -> " + frozenset_to_str(rule_df['consequents'][i])
                for i in range(rule_df.shape[0])]

    rule_df['rule'] = rule_list

In [10]:
rule_str(entree_eclat_rules)
rule_str(entree_apriori_rules)
entree_eclat_rules = entree_eclat_rules.sort_values('rule')
entree_apriori_rules = entree_apriori_rules.sort_values('rule')

In [11]:
#join results on rule for better comparison
entree_joined = pd.merge(entree_eclat_rules, entree_apriori_rules, on="rule", suffixes=("_eclat", "_apriori"))
entree_joined = entree_joined.sort_values('rule')
print(entree_joined.shape)

(150, 19)


In [12]:
rule_str(retail_eclat_rules)
rule_str(retail_apriori_rules)
retail_eclat_rules = retail_eclat_rules.sort_values('rule')
retail_apriori_rules = retail_apriori_rules.sort_values('rule')

In [13]:
retail_joined = pd.merge(retail_eclat_rules, retail_apriori_rules, on="rule", suffixes=("_eclat", "_apriori"))
retail_joined = retail_joined.sort_values('rule')
print(retail_joined.shape)

(52, 19)


## Lift




In [14]:
fig = px.scatter(retail_joined, y=['lift_apriori', 'lift_eclat'], labels={'index':'Rule Index', 'value':'Lift', 'variable': 'algorithm', 'lift_apriori':'Apriori', 'lift_eclat': 'ECLAT'})
fig.show()

In [15]:
print("Apriori")
print("Average Lift: ", retail_joined['lift_apriori'].mean())
print("Max Lift: ", retail_joined['lift_apriori'].max())
print("Min Lift: ", retail_joined['lift_apriori'].min())

print("\n\nEclat")
print("Average Lift: ", retail_joined['lift_eclat'].mean())
print("Max Lift: ", retail_joined['lift_eclat'].max())
print("Min Lift: ", retail_joined['lift_eclat'].min())

Apriori
Average Lift:  3.8012644977040133
Max Lift:  15.379807692307693
Min Lift:  1.831997084548105


Eclat
Average Lift:  3.8012644977040133
Max Lift:  15.379807692307693
Min Lift:  1.831997084548105


In [16]:
#fig = px.scatter(entree_joined, y=['lift_apriori', 'lift_eclat'], labels={'index':'Rule Index', 'value':'Lift'}, template='plotly_dark')
fig = px.scatter(entree_joined, y=['lift_apriori', 'lift_eclat'], labels={'index':'Rule Index', 'value':'Lift', 'variable': 'algorithm', 'lift_apriori':'Apriori', 'lift_eclat': 'ECLAT'})
fig.show()

In [17]:
print("Apriori")
print("Average Lift: ", entree_joined['lift_apriori'].mean())
print("Max Lift: ", entree_joined['lift_apriori'].max())
print("Min Lift: ", entree_joined['lift_apriori'].min())

print("\n\nEclat")
print("Average Lift: ", entree_joined['lift_eclat'].mean())
print("Max Lift: ", entree_joined['lift_eclat'].max())
print("Min Lift: ", entree_joined['lift_eclat'].min())

Apriori
Average Lift:  1.799809745873609
Max Lift:  9.563218390804597
Min Lift:  0.9287865703393222


Eclat
Average Lift:  1.799809745873609
Max Lift:  9.563218390804597
Min Lift:  0.9287865703393222


## Conviction


In [18]:
fig = px.scatter(entree_joined, y=['conviction_apriori', 'conviction_eclat'], labels={'index':'Rule Index', 'value':'Conviction', 'variable': 'algorithm', 'lift_apriori':'Apriori', 'lift_eclat': 'ECLAT'})
fig.show()

In [19]:
print("Apriori")
print("Average Conviction: ", entree_joined['conviction_apriori'].mean())
print("Max Conviction: ", entree_joined['conviction_apriori'].max())
print("Min Conviction: ", entree_joined['conviction_apriori'].min())

print("\n\nEclat")
print("Average Conviction: ", entree_joined['conviction_eclat'].mean())
print("Max Conviction: ", entree_joined['conviction_eclat'].max())
print("Min Conviction: ", entree_joined['conviction_eclat'].min())

Apriori
Average Conviction:  inf
Max Conviction:  inf
Min Conviction:  0.9397970085470085


Eclat
Average Conviction:  inf
Max Conviction:  inf
Min Conviction:  0.9397970085470085


In [24]:
t_apr =  entree_joined['conviction_apriori'].replace(np.inf, np.nan).dropna()
t_ecl =  entree_joined['conviction_eclat'].replace(np.inf, np.nan).dropna()
print("Mean without inf: ")
print("Apriori: ", t_apr.mean())
print("ECLAT: ", t_ecl.mean())

Mean without inf: 
Apriori:  6.085571902478058
ECLAT:  6.085571902478058


In [25]:
print("Max without outliers: ")
t_ecl[t_ecl<9].max()

Max without outliers: 


8.65740613553113

In [23]:
fig = px.scatter(retail_joined, y=['conviction_apriori', 'conviction_eclat'], labels={'index':'Rule Index', 'value':'Conviction', 'variable': 'algorithm', 'lift_apriori':'Apriori', 'lift_eclat': 'ECLAT'})
fig.show()

In [20]:
print("Apriori")
print("Average Conviction: ", retail_joined['conviction_apriori'].mean())
print("Max Conviction: ", retail_joined['conviction_apriori'].max())
print("Min Conviction: ", retail_joined['conviction_apriori'].min())

print("\n\nEclat")
print("Average Conviction: ", retail_joined['conviction_eclat'].mean())
print("Max Conviction: ", retail_joined['conviction_eclat'].max())
print("Min Conviction: ", retail_joined['conviction_eclat'].min())

Apriori
Average Conviction:  1.9906493258162359
Max Conviction:  7.544857768052516
Min Conviction:  1.111013858497447


Eclat
Average Conviction:  1.9906493258162359
Max Conviction:  7.544857768052516
Min Conviction:  1.111013858497447


## Average, Min, Max Support and Confidence

In [21]:
print("Retail")
print("Apriori")
print("Average Support: ", retail_joined['support_apriori'].mean())
print("Max Support: ", retail_joined['support_apriori'].max())
print("Min Support: ", retail_joined['support_apriori'].min())

print("\n\nEclat")
print("Average Support: ", retail_joined['support_eclat'].mean())
print("Max Support: ", retail_joined['support_eclat'].max())
print("Min Support: ", retail_joined['support_eclat'].min())

Retail
Apriori
Average Support:  0.053694664197946475
Max Support:  0.13129102844638948
Min Support:  0.04157549234135667


Eclat
Average Support:  0.053694664197946475
Max Support:  0.13129102844638948
Min Support:  0.04157549234135667


In [22]:
print("Retail")
print("Apriori")
print("Average Confidence: ", retail_joined['confidence_apriori'].mean())
print("Max Confidence: ", retail_joined['confidence_apriori'].max())
print("Min Confidence: ", retail_joined['confidence_apriori'].min())

print("\n\nEclat")
print("Average Confidence: ", retail_joined['confidence_eclat'].mean())
print("Max Confidence: ", retail_joined['confidence_eclat'].max())
print("Min Confidence: ", retail_joined['confidence_eclat'].min())

Retail
Apriori
Average Confidence:  0.4540531365575575
Max Confidence:  0.875
Min Confidence:  0.16964285714285712


Eclat
Average Confidence:  0.4540531365575575
Max Confidence:  0.875
Min Confidence:  0.16964285714285712
