# Performance Numpy vs. Cupy vs. ActionRulesDiscovery

## Parameters

In [1]:
stable_attributes = ["gender", "SeniorCitizen", "Partner"]
flexible_attributes = ["PhoneService", 
                       "InternetService", 
                       "OnlineSecurity", 
                       "DeviceProtection", 
                       "TechSupport",
                       "StreamingTV"]
target = 'Churn'
min_stable_attributes = 2
min_flexible_attributes = 1 #min 1
min_undesired_support = 50
min_undesired_confidence = 0.6
min_desired_support = 50
min_desired_confidence = 0.6
undesired_state = 'Yes'
desired_state = 'No'

## Pandas, Numpy (action-rules)

In [2]:
import pandas as pd

### Data

Import data to Pandas DataFrame.

In [3]:
pd.set_option('display.max_columns', None)
dataFrame = pd.read_csv("data/telco.csv", sep=";")
dataFrame = pd.concat([dataFrame] * 1)
dataFrame.head()

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,7590-VHVEG,Female,0,Yes,No,1,No,No phone service,DSL,No,Yes,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No
1,5575-GNVDE,Male,0,No,No,34,Yes,No,DSL,Yes,No,Yes,No,No,No,One year,No,Mailed check,56.95,1889.5,No
2,3668-QPYBK,Male,0,No,No,2,Yes,No,DSL,Yes,Yes,No,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes
3,7795-CFOCW,Male,0,No,No,45,No,No phone service,DSL,Yes,No,Yes,Yes,No,No,One year,No,Bank transfer (automatic),42.3,1840.75,No
4,9237-HQITU,Female,0,No,No,2,Yes,No,Fiber optic,No,No,No,No,No,No,Month-to-month,Yes,Electronic check,70.7,151.65,Yes


### Action rules

In [4]:
from action_rules import ActionRules
# Action Rules Mining
action_rules = ActionRules(
    min_stable_attributes = min_stable_attributes, 
    min_flexible_attributes = min_flexible_attributes, 
    min_undesired_support = min_undesired_support, 
    min_undesired_confidence = min_undesired_confidence, 
    min_desired_support = min_desired_support,
    min_desired_confidence = min_desired_confidence, 
    verbose = False)

In [5]:
def measure_time():
    action_rules.fit(
        data = dataFrame, 
        stable_attributes = stable_attributes, 
        flexible_attributes = flexible_attributes, 
        target = target, 
        target_undesired_state = undesired_state,
        target_desired_state = desired_state, 
        use_gpu = True,
        use_sparse_matrix = False
    )

### Count action rules

In [6]:
import cupy as cp
cp.get_default_memory_pool().free_all_blocks()

In [7]:
import cupy as cp

# Function to print memory usage
def print_memory_usage():
    mem_pool = cp.get_default_memory_pool()
    pinned_mem_pool = cp.get_default_pinned_memory_pool()
    print(f"Used memory: {mem_pool.used_bytes() / 1024**2:.2f} MB")
    print(f"Total memory: {mem_pool.total_bytes() / 1024**2:.2f} MB")

print_memory_usage()

Used memory: 0.00 MB
Total memory: 0.00 MB


In [8]:
measure_time()
len(action_rules.get_rules().get_ar_notation())

327

In [9]:
import sys
!{sys.executable} -m pip install scalene



In [10]:
# Load the Scalene extension with specific options
%load_ext scalene

LOADING
Scalene extension successfully loaded. Note: Scalene currently only
supports CPU+GPU profiling inside Jupyter notebooks. For full Scalene
profiling, use the command line version. To profile in line mode, use
`%scrun [options] statement`. To profile in cell mode, use `%%scalene
[options]` followed by your code.


In [11]:
import os
os.environ['PYTHONIOENCODING'] = 'utf-8'

In [12]:
%scrun --outfile "prof.html" --html measure_time()

SCRUN MAGIC


Scalene: An exception of type UnicodeDecodeError occurred. Arguments:
('ascii', b'/* PrismJS 1.26.0\nhttps://prismjs.com/download.html#themes=prism&languages=markup+css+clike+javascript+python&plugins=normalize-whitespace */\n/// <reference lib="WebWorker"/>\n\nvar _self =\n  typeof window !== "undefined"\n    ? window // if in browser\n    : typeof WorkerGlobalScope !== "undefined" &&\n      self instanceof WorkerGlobalScope\n    ? self // if in worker\n    : {}; // if in node js\n\n/**\n * Prism: Lightweight, robust, elegant syntax highlighting\n *\n * @license MIT <https://opensource.org/licenses/MIT>\n * @author Lea Verou <https://lea.verou.me>\n * @namespace\n * @public\n */\nvar Prism = (function (_self) {\n  // Private helper vars\n  var lang = /(?:^|\\s)lang(?:uage)?-([\\w-]+)(?=\\s|$)/i;\n  var uniqueId = 0;\n\n  // The grammar object for plaintext\n  var plainTextGrammar = {};\n\n  var _ = {\n    /**\n     * By default, Prism will attempt to highlight all code elements (by ca

In [13]:
print_memory_usage()

Used memory: 0.52 MB
Total memory: 188.96 MB


### Time

In [None]:
%timeit measure_time()

In [None]:
print_memory_usage()

## Pandas, Scipy Sparse Matrix (action-rules)

In [None]:
from action_rules import ActionRules
# Action Rules Mining
action_rules = ActionRules(
    min_stable_attributes = min_stable_attributes, 
    min_flexible_attributes = min_flexible_attributes, 
    min_undesired_support = min_undesired_support, 
    min_undesired_confidence = min_undesired_confidence, 
    min_desired_support = min_desired_support,
    min_desired_confidence = min_desired_confidence, 
    verbose = False)

In [None]:
def measure_time2():
    action_rules.fit(
        data = dataFrame, 
        stable_attributes = stable_attributes, 
        flexible_attributes = flexible_attributes, 
        target = target, 
        target_undesired_state = undesired_state,
        target_desired_state = desired_state, 
        use_gpu = True,
        use_sparse_matrix = False 
    )

In [None]:
measure_time2()
len(action_rules.get_rules().get_ar_notation())

In [None]:
%timeit measure_time2()

In [None]:
import cupy as cp
from cupyx.scipy.sparse import csr_matrix

# Example sparse matrix
data = cp.array([[0, 1, 0], [1, 0, 1], [0, 1, 0]], dtype=float)
sparse_matrix = csr_matrix(data)

# Perform the comparison
print(type(sparse_matrix))
print(sparse_matrix.dtype)
comparison_result = sparse_matrix == 1

# Convert to a dense format to view the result (for demonstration purposes)
dense_result = comparison_result.todense()
print(dense_result)

In [None]:
sparse_matrix.sum()

### Show first five rules

In [None]:
for ar in action_rules.get_rules().get_ar_notation()[:5]:
    print(ar)
    print()

## CuPy (action-rules)

In [None]:
import cudf

Import data to cuDF dataframe

In [None]:
### Actio rules

In [None]:
from action_rules import ActionRules
# Action Rules Mining
action_rules = ActionRules(
    min_stable_attributes = min_stable_attributes, 
    min_flexible_attributes = min_flexible_attributes, 
    min_undesired_support = min_undesired_support, 
    min_undesired_confidence = min_undesired_confidence, 
    min_desired_support = min_desired_support,
    min_desired_confidence = min_desired_confidence, 
    verbose = False)

In [None]:
def measure_time_2():
    action_rules.fit(
        data = dataFrame, 
        stable_attributes = stable_attributes, 
        flexible_attributes = flexible_attributes, 
        target = target, 
        target_undesired_state = undesired_state,
        target_desired_state = desired_state, 
        use_gpu = True # Use cuDF
    )

### Count action rules

In [None]:
measure_time_2()
len(action_rules.get_rules().get_ar_notation())

### Time

In [None]:
%timeit measure_time_2()

### Show first five rules

In [None]:
for ar in action_rules.get_rules().get_ar_notation()[:5]:
    print(ar)
    print()

## DEAR algorithm (actionRulesDiscovery)

In [None]:
from actionrules.actionRulesDiscovery import ActionRulesDiscovery

In [None]:
### Action Rules

In [None]:
def measure_time_3():
    actionRulesDiscovery = ActionRulesDiscovery()
    actionRulesDiscovery.load_pandas(dataFrame)
    actionRulesDiscovery.fit(stable_attributes = stable_attributes ,
                             flexible_attributes = flexible_attributes,
                             consequent = target,
                             conf = min_undesired_confidence * 100, # There is just one confidence for both desired and undesired part.
                             supp = -min_undesired_support, # There is just one support for both desired and undesired part. Minus means that it is the absolute support.
                             desired_changes = [[undesired_state, desired_state]],
                             is_nan=False,
                             is_reduction=True,
                             min_stable_attributes=min_stable_attributes,
                             min_flexible_attributes=min_flexible_attributes,
                             is_strict_flexible=False
                            )
    return actionRulesDiscovery

### Count action rules

In [None]:
actionRulesDiscovery = measure_time_3()
len(actionRulesDiscovery.get_action_rules())

### Time

In [None]:
%timeit measure_time_3()

### Show first five rules

In [None]:
for rule in actionRulesDiscovery.get_action_rules_representation()[:20]:
    print(rule)
    print(" ")