In [5]:
from pathlib import Path

import pandas as pd

from config import ROOT_DIR

In [6]:
# mar imports
from src.mining_association_rules.apriori_df.apriori.apriori import DataFrameRuleGenerator
from src.mining_association_rules.apriori_df.interest_measures import BatchConfidence
from src.mining_association_rules.apriori_df.interest_measures import BatchSupport

In [7]:
# External libraries imports
import apyori
import efficient_apriori
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules
from mlxtend.preprocessing import TransactionEncoder

In [8]:
# Read dataset and define constant with thresholds
path = Path(ROOT_DIR) / "sources" / "survey.csv"
df = pd.read_csv(path)
MIN_SUPPORT = 0.4
MIN_CONFIDENCE = 0.4

In [9]:
# Convert dataset to format required by external libraries
transactions = [row[row == 1].index.tolist() for _, row in df.iterrows()]

### MAR

In [10]:
%%time
itemset_measures = {BatchSupport: MIN_SUPPORT}
rule_measures = {BatchConfidence: MIN_CONFIDENCE}
with DataFrameRuleGenerator(itemset_measures=itemset_measures, rule_measures=rule_measures) as rule_gen:
    rules = rule_gen.generate_strong_association_rules(transactions=df)

Finding 129 frequent itemsets took 0.0055804740004532505
Generating 1180 association rules took 0.059675896998669486
Rules generated using df database in 0.06644871999924362 seconds


CPU times: user 64.7 ms, sys: 7.43 ms, total: 72.1 ms
Wall time: 69.7 ms


### efficient-apriori

In [11]:
%%time
itemsets, rules = efficient_apriori.apriori(transactions, min_support=MIN_SUPPORT, min_confidence=MIN_CONFIDENCE)

CPU times: user 22.2 ms, sys: 4.16 ms, total: 26.4 ms
Wall time: 25 ms


### apyori

In [12]:
%%time
rules = list(apyori.apriori(transactions, min_support=MIN_SUPPORT, min_confidence=MIN_CONFIDENCE))

CPU times: user 195 ms, sys: 4.8 ms, total: 200 ms
Wall time: 198 ms


### mlxtend

In [13]:
%%time
te = TransactionEncoder()
te_ary = te.fit(transactions).transform(transactions)
df = pd.DataFrame(te_ary, columns=te.columns_)
frequent_itemsets = apriori(df, min_support=MIN_SUPPORT, use_colnames=True)
rules_df = association_rules(frequent_itemsets, metric="confidence", min_threshold=MIN_CONFIDENCE)

CPU times: user 21.9 ms, sys: 3.57 ms, total: 25.4 ms
Wall time: 23.3 ms
