# Compare PyApriori with other Python packages

Packages:
- mlxtend.frequent_patterns.apriori
- PyFIM
- apyori
- pyapriori (this package)

Data:
- accidents
- connect
- chess
- mushroom

Source: Frequent Itemset Mining Implementations Repository (http://fimi.uantwerpen.be/data/) 

## Data Preparation

In [1]:
import pandas as pd
import csv
from mlxtend.preprocessing import TransactionEncoder
from scipy.sparse import csc_matrix
from cupyx.scipy.sparse import csr_matrix as csr_matrix_cupy

Load datasets

In [2]:
def load_dataset(datafile):
    dataset = [i.strip().split() for i in open(datafile).readlines()]
    # Number of transactions
    transactions_count = len(dataset)
    # One Hot Encoding
    te = TransactionEncoder()
    te_dummies = te.fit(dataset).transform(dataset)
    # Dataframe with One Hot Encoding
    df = pd.DataFrame(te_dummies, columns=te.columns_)
    # CSC Matrix with One Hot Encoding
    dataset_csc = csc_matrix(te_dummies)
    # Cupy CSC Matrix with One Hot Encoding
    dataset_csr_cupy = csr_matrix_cupy(dataset_csc)
    return df, dataset, dataset_csc, dataset_csr_cupy, transactions_count

## Modeling

In [3]:
import time
from mlxtend.frequent_patterns import apriori as mlxtend_apriori
import fim
from apyori import apriori as apyori_apriori
from pyapriori import PyApriori
from interruptingcow import timeout

The model fit is killed in 30s.

In [4]:
def run_models(minsup, df, dataset, dataset_csc, dataset_csr_cupy, transactions_count):
    print("")
    print("mlxtend")
    try:
        raise NameError('HiThere')
        start = time.time()
        with timeout(30, exception=RuntimeError):
            result = mlxtend_apriori(df, min_support=(minsup/100), use_colnames=True)
        end = time.time()
        mlxtend_t = end - start
        mlxtend_count = result.shape[0]
    except Exception as inst:
        print(inst) 
        print(inst.args)  
        mlxtend_t = None
        mlxtend_count = None
    print('mlxtend Done in ' + str(mlxtend_t) + 's')
    
    print("")
    print("fim")
    try:
        start = time.time()
        with timeout(30, exception=RuntimeError):
            result = fim.apriori(dataset, supp=minsup)
        end = time.time()
        fim_t = end - start
        fim_count = len(result)  
    except Exception as inst:
        print(inst)
        print(inst.args)  
        fim_t = None
        fim_count = None 
    print('fim Done in ' + str(fim_t) + 's')    
    
    print("")
    print("apyori")
    try:
        raise NameError('HiThere')
        start = time.time()
        with timeout(30, exception=RuntimeError):
            result = list(apyori_apriori(dataset, min_support=(minsup/100)))
        end = time.time()
        apyori_t = end - start
        apyori_count = len(result)
    except Exception as inst:
        print(inst)
        print(inst.args)  
        apyori_t = None
        apyori_count = None
    print('apyori Done in ' + str(apyori_t) + 's')      
    
    print("")
    print("pyapriori")
    try:
        start = time.time()
        with timeout(30, exception=RuntimeError):
            py_apriori = PyApriori(round(transactions_count * (minsup/100)), 1)
            _, result = py_apriori.fit(dataset_csc)
        end = time.time()
        pyapriori_t = end - start
        pyapriori_count = len(result)  
    except Exception as inst:
        print(inst)
        print(inst.args)  
        pyapriori_t = None
        pyapriori_count = None 
    print('pyapriori Done in ' + str(pyapriori_t) + 's')     
    
    print("")
    print("pyapriori_cupy")
    try:
        start = time.time()
        with timeout(30, exception=RuntimeError):
            py_apriori = PyApriori(round(transactions_count * (minsup/100)), 1)
            _, result = py_apriori.fit(dataset_csr_cupy)
        end = time.time()
        pyapriori_cupy_t = end - start
        pyapriori_cupy_count = len(result) 
    except Exception as inst:
        print(inst)
        print(inst.args)  
        pyapriori_cupy_t = None
        pyapriori_cupy_count = None 
    print('pyapriori_cupy Done in ' + str(pyapriori_cupy_t) + 's')       
    
    return mlxtend_t, mlxtend_count, fim_t, fim_count, apyori_t, apyori_count, pyapriori_t, pyapriori_count, pyapriori_cupy_t, pyapriori_cupy_count

## Run test

Parameters

In [5]:
supports = [50, 60, 70]
datafiles = ['data/accidents.dat', 'data/chess.dat', 'data/mushroom.dat']

Test

In [6]:
import cupy as cp
#cp.cuda.set_allocator(cp.cuda.MemoryPool(cp.cuda.malloc_managed).malloc)
#cp.cuda.set_allocator(cp.cuda.MemoryPool().malloc)
test_result = {}
for minsup in supports:
    for datafile in datafiles:
        print((minsup, datafile))
        df, dataset, dataset_csc, dataset_csr_cupy, transactions_count = load_dataset(datafile)
        mlxtend_t, mlxtend_count, fim_t, fim_count, apyori_t, apyori_count, pyapriori_t, pyapriori_count, pyapriori_cupy_t, pyapriori_cupy_count = run_models(minsup, df, dataset, dataset_csc, dataset_csr_cupy, transactions_count)
        test_result[(minsup, datafile)] = {'mlxtend_t': mlxtend_t, 'mlxtend_count':mlxtend_count, 'fim_t':fim_t, 'fim_count':fim_count, 'apyori_t':apyori_t, 'apyori_count':apyori_count, 'pyapriori_t':pyapriori_t, 'pyapriori_count':pyapriori_count, 'pyapriori_cupy_t':pyapriori_cupy_t, 'pyapriori_cupy_count':pyapriori_cupy_count}

(50, 'data/accidents.dat')

mlxtend
HiThere
('HiThere',)
mlxtend Done in Nones

fim
fim Done in 1.7281925678253174s

apyori
HiThere
('HiThere',)
apyori Done in Nones

pyapriori
pyapriori Done in 15.585243940353394s

pyapriori_cupy
pyapriori_cupy Done in 6.010077476501465s
(50, 'data/chess.dat')

mlxtend
HiThere
('HiThere',)
mlxtend Done in Nones

fim
fim Done in 5.055415868759155s

apyori
HiThere
('HiThere',)
apyori Done in Nones

pyapriori
pyapriori Done in 25.67377758026123s

pyapriori_cupy
Out of memory allocating 5,855,040,000 bytes (allocated so far: 24,751,790,592 bytes).
('Out of memory allocating 5,855,040,000 bytes (allocated so far: 24,751,790,592 bytes).',)
pyapriori_cupy Done in Nones
(50, 'data/mushroom.dat')

mlxtend
HiThere
('HiThere',)
mlxtend Done in Nones

fim
fim Done in 0.008114337921142578s

apyori
HiThere
('HiThere',)
apyori Done in Nones

pyapriori
pyapriori Done in 0.007407665252685547s

pyapriori_cupy
pyapriori_cupy Done in 0.04902768135070801s
(60, 'data/accid

Results

In [7]:
test_result

{(50, 'data/accidents.dat'): {'mlxtend_t': None,
  'mlxtend_count': None,
  'fim_t': 1.7281925678253174,
  'fim_count': 8057,
  'apyori_t': None,
  'apyori_count': None,
  'pyapriori_t': 15.585243940353394,
  'pyapriori_count': 8057,
  'pyapriori_cupy_t': 6.010077476501465,
  'pyapriori_cupy_count': 8057},
 (50, 'data/chess.dat'): {'mlxtend_t': None,
  'mlxtend_count': None,
  'fim_t': 5.055415868759155,
  'fim_count': 1272932,
  'apyori_t': None,
  'apyori_count': None,
  'pyapriori_t': 25.67377758026123,
  'pyapriori_count': 1272932,
  'pyapriori_cupy_t': None,
  'pyapriori_cupy_count': None},
 (50, 'data/mushroom.dat'): {'mlxtend_t': None,
  'mlxtend_count': None,
  'fim_t': 0.008114337921142578,
  'fim_count': 152,
  'apyori_t': None,
  'apyori_count': None,
  'pyapriori_t': 0.007407665252685547,
  'pyapriori_count': 153,
  'pyapriori_cupy_t': 0.04902768135070801,
  'pyapriori_cupy_count': 153},
 (60, 'data/accidents.dat'): {'mlxtend_t': None,
  'mlxtend_count': None,
  'fim_t': 0.