In [37]:
from pathlib import Path
import pandas as pd
from config import ROOT_DIR

In [38]:
# mar imports
from src.mining_association_rules.apriori_df.apriori.apriori import DataFrameRuleGenerator
from src.mining_association_rules.apriori_df.interest_measures import BatchConfidence
from src.mining_association_rules.apriori_df.interest_measures import BatchSupport

In [55]:
# External libraries imports
from efficient_apriori import apriori
import apyori
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules

In [45]:
# Read dataset and define constant with thresholds
path = Path(ROOT_DIR) / "sources" / "survey.csv"
df = pd.read_csv(path)
MIN_SUPPORT = 0.4
MIN_CONFIDENCE = 0.4

In [46]:
# Convert dataset to format required by external libraries
transactions = [row[row == 1].index.tolist() for _, row in df.iterrows()]

### MAR

In [48]:
%%time
itemset_measures = {BatchSupport: MIN_SUPPORT}
rule_measures = {BatchConfidence: MIN_CONFIDENCE}
with DataFrameRuleGenerator(itemset_measures=itemset_measures, rule_measures=rule_measures) as rule_gen:
    rules = rule_gen.generate_strong_association_rules(transactions=df)

Finding 129 frequent itemsets took 0.005959666001217556
Generating 1180 association rules took 0.0655520019990945
Rules generated using df database in 0.07290494100016076 seconds


CPU times: user 78.9 ms, sys: 1.13 ms, total: 80 ms
Wall time: 77.2 ms


### efficient-apriori

In [47]:
%%time
itemsets, rules = apriori(transactions, min_support=MIN_SUPPORT,  min_confidence=MIN_CONFIDENCE)

CPU times: user 20.1 ms, sys: 8.03 ms, total: 28.2 ms
Wall time: 26.7 ms


### apyori

In [49]:
%%time
rules = list(apyori.apriori(transactions, min_support=MIN_SUPPORT, min_confidence=MIN_CONFIDENCE))

### mlxtend

In [57]:
%%time
te = TransactionEncoder()
te_ary = te.fit(transactions).transform(transactions)
df = pd.DataFrame(te_ary, columns=te.columns_)
frequent_itemsets = apriori(df, min_support=MIN_SUPPORT, use_colnames=True)
rules_df = association_rules(frequent_itemsets, metric="confidence", min_threshold=MIN_CONFIDENCE)

CPU times: user 21.4 ms, sys: 3.04 ms, total: 24.4 ms
Wall time: 22.2 ms
