In [1]:
from pathlib import Path

import pandas as pd

from config import ROOT_DIR

In [2]:
# mar imports
from src.mar.apriori_df.apriori.apriori import DataFrameRuleGenerator
from src.mar.apriori_df.interest_measures import BatchConfidence
from src.mar.apriori_df.interest_measures import BatchSupport

In [7]:
# External libraries imports
import apyori
import efficient_apriori
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules
from mlxtend.preprocessing import TransactionEncoder

In [3]:
# Read dataset and define constant with thresholds
path = Path(ROOT_DIR) / "sources" / "survey.csv"
df = pd.read_csv(path)
MIN_SUPPORT = 0.4
MIN_CONFIDENCE = 0.4

In [4]:
# Convert dataset to format required by external libraries
transactions = [row[row == 1].index.tolist() for _, row in df.iterrows()]

### MAR

In [5]:
%%time
itemset_measures = {BatchSupport: MIN_SUPPORT}
rule_measures = {BatchConfidence: MIN_CONFIDENCE}
with DataFrameRuleGenerator(itemset_measures=itemset_measures, rule_measures=rule_measures) as rule_gen:
    rules = rule_gen.generate_strong_association_rules(transactions=df)

Finding 129 frequent itemsets took 0.00294131999999081
Generating 1180 association rules took 0.03731217800032027
Rules generated using df database in 0.04103628499979095 seconds


CPU times: user 39.6 ms, sys: 4.07 ms, total: 43.7 ms
Wall time: 42.2 ms


### efficient-apriori

In [11]:
%%time
itemsets, rules = efficient_apriori.apriori(transactions, min_support=MIN_SUPPORT, min_confidence=MIN_CONFIDENCE)

CPU times: user 10.8 ms, sys: 2.84 ms, total: 13.6 ms
Wall time: 13.5 ms


### apyori

In [12]:
%%time
rules = list(apyori.apriori(transactions, min_support=MIN_SUPPORT, min_confidence=MIN_CONFIDENCE))

CPU times: user 195 ms, sys: 4.8 ms, total: 200 ms
Wall time: 198 ms


### mlxtend

In [8]:
%%time
te = TransactionEncoder()
te_ary = te.fit(transactions).transform(transactions)
df = pd.DataFrame(te_ary, columns=te.columns_)
frequent_itemsets = apriori(df, min_support=MIN_SUPPORT, use_colnames=True)
rules_df = association_rules(frequent_itemsets, metric="confidence", min_threshold=MIN_CONFIDENCE)

CPU times: user 12.6 ms, sys: 224 μs, total: 12.9 ms
Wall time: 13.5 ms


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,representativity,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski
0,(Allegro),(Chrome),0.578014,0.771868,0.460993,0.797546,1.033268,1.0,0.014842,1.126836,0.076298,0.518617,0.112559,0.697395
1,(Chrome),(Allegro),0.771868,0.578014,0.460993,0.597243,1.033268,1.0,0.014842,1.047744,0.141132,0.518617,0.045569,0.697395
2,(Allegro),(Facebook),0.578014,0.881797,0.537825,0.930470,1.055198,1.0,0.028134,1.700042,0.123963,0.583333,0.411779,0.770195
3,(Facebook),(Allegro),0.881797,0.578014,0.537825,0.609920,1.055198,1.0,0.028134,1.081792,0.442549,0.583333,0.075608,0.770195
4,(Allegro),(Facebook Messenger),0.578014,0.868794,0.517730,0.895706,1.030975,1.0,0.015555,1.258031,0.071198,0.557252,0.205107,0.745812
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1175,(Facebook Messenger),"(YouTube, Google Maps, Chrome, Facebook, Gmail)",0.868794,0.531915,0.510638,0.587755,1.104980,1.0,0.048514,1.135454,0.724099,0.573705,0.119295,0.773878
1176,(Google Maps),"(YouTube, Facebook Messenger, Chrome, Facebook...",0.869976,0.563830,0.510638,0.586957,1.041017,1.0,0.020120,1.055991,0.303030,0.553137,0.053022,0.746308
1177,(Chrome),"(YouTube, Facebook Messenger, Google Maps, Fac...",0.771868,0.611111,0.510638,0.661562,1.082556,1.0,0.038941,1.149070,0.334281,0.585366,0.129731,0.748576
1178,(Facebook),"(YouTube, Facebook Messenger, Google Maps, Chr...",0.881797,0.522459,0.510638,0.579088,1.108391,1.0,0.049936,1.134541,0.827315,0.571429,0.118586,0.778232
