# Performance Numpy vs. Cupy vs. ActionRulesDiscovery

## Parameters

In [1]:
stable_attributes = ["gender", "SeniorCitizen", "Partner"]
flexible_attributes = ["PhoneService", 
                       "InternetService", 
                       "OnlineSecurity", 
                       "DeviceProtection", 
                       "TechSupport",
                       "StreamingTV"]
target = 'Churn'
min_stable_attributes = 2
min_flexible_attributes = 1 #min 1
min_undesired_support = 50
min_undesired_confidence = 0.6
min_desired_support = 50
min_desired_confidence = 0.6
undesired_state = 'Yes'
desired_state = 'No'

## Pandas (action-rules)

In [2]:
import pandas as pd

### Data

Import data to Pandas DataFrame.

In [3]:
pd.set_option('display.max_columns', None)
dataFrame = pd.read_csv("data/telco.csv", sep=";")
dataFrame.head()

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,7590-VHVEG,Female,0,Yes,No,1,No,No phone service,DSL,No,Yes,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No
1,5575-GNVDE,Male,0,No,No,34,Yes,No,DSL,Yes,No,Yes,No,No,No,One year,No,Mailed check,56.95,1889.5,No
2,3668-QPYBK,Male,0,No,No,2,Yes,No,DSL,Yes,Yes,No,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes
3,7795-CFOCW,Male,0,No,No,45,No,No phone service,DSL,Yes,No,Yes,Yes,No,No,One year,No,Bank transfer (automatic),42.3,1840.75,No
4,9237-HQITU,Female,0,No,No,2,Yes,No,Fiber optic,No,No,No,No,No,No,Month-to-month,Yes,Electronic check,70.7,151.65,Yes


### Action rules

In [4]:
from action_rules import ActionRules
# Action Rules Mining
action_rules = ActionRules(
    min_stable_attributes = min_stable_attributes, 
    min_flexible_attributes = min_flexible_attributes, 
    min_undesired_support = min_undesired_support, 
    min_undesired_confidence = min_undesired_confidence, 
    min_desired_support = min_desired_support,
    min_desired_confidence = min_desired_confidence, 
    verbose = False)

In [5]:
def measure_time():
    action_rules.fit(
        data = dataFrame, 
        stable_attributes = stable_attributes, 
        flexible_attributes = flexible_attributes, 
        target = target, 
        target_undesired_state = undesired_state,
        target_desired_state = desired_state, 
        use_gpu = False
    )

### Count action rules

In [6]:
measure_time()
len(action_rules.get_rules().get_ar_notation())

327

### Time

In [7]:
%timeit measure_time()

1.37 s ± 36 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


### Show first five rules

In [8]:
for ar in action_rules.get_rules().get_ar_notation()[:5]:
    print(ar)
    print()

{'undesired': {'itemset': (0, 3, 11, 14), 'support': 144, 'confidence': 0.602510460251046, 'target': 24}, 'desired': {'itemset': (0, 3, 11, 16), 'support': 102, 'confidence': 0.6415094339622641, 'target': 23}, 'uplift': 58.32075471698113}

{'undesired': {'itemset': (0, 3, 11, 14), 'support': 144, 'confidence': 0.602510460251046, 'target': 24}, 'desired': {'itemset': (0, 3, 13, 16), 'support': 66, 'confidence': 0.8571428571428571, 'target': 23}, 'uplift': 109.85714285714283}

{'undesired': {'itemset': (0, 3, 7, 11, 14), 'support': 130, 'confidence': 0.6074766355140186, 'target': 24}, 'desired': {'itemset': (0, 3, 7, 11, 16), 'support': 94, 'confidence': 0.6438356164383562, 'target': 23}, 'uplift': 53.780821917808225}

{'undesired': {'itemset': (0, 3, 7, 11, 14), 'support': 130, 'confidence': 0.6074766355140186, 'target': 24}, 'desired': {'itemset': (0, 3, 7, 13, 16), 'support': 62, 'confidence': 0.8493150684931506, 'target': 23}, 'uplift': 97.75342465753423}

{'undesired': {'itemset': (

## CuPy (action-rules)

In [21]:
import cudf

ModuleNotFoundError: No module named 'cudf'

Import data to cuDF dataframe

In [9]:
### Actio rules

In [10]:
from action_rules import ActionRules
# Action Rules Mining
action_rules = ActionRules(
    min_stable_attributes = min_stable_attributes, 
    min_flexible_attributes = min_flexible_attributes, 
    min_undesired_support = min_undesired_support, 
    min_undesired_confidence = min_undesired_confidence, 
    min_desired_support = min_desired_support,
    min_desired_confidence = min_desired_confidence, 
    verbose = False)

In [11]:
def measure_time_2():
    action_rules.fit(
        data = dataFrame, 
        stable_attributes = stable_attributes, 
        flexible_attributes = flexible_attributes, 
        target = target, 
        target_undesired_state = undesired_state,
        target_desired_state = desired_state, 
        use_gpu = True # Use cuDF
    )

### Count action rules

In [12]:
measure_time_2()
len(action_rules.get_rules().get_ar_notation())



327

### Time

In [13]:
%timeit measure_time_2()

5.47 s ± 141 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


### Show first five rules

In [14]:
for ar in action_rules.get_rules().get_ar_notation()[:5]:
    print(ar)
    print()

{'undesired': {'itemset': (0, 3, 11, 14), 'support': array(144, dtype=uint64), 'confidence': array(0.60251046), 'target': 24}, 'desired': {'itemset': (0, 3, 11, 16), 'support': array(102, dtype=uint64), 'confidence': array(0.64150943), 'target': 23}, 'uplift': array(58.32075472)}

{'undesired': {'itemset': (0, 3, 11, 14), 'support': array(144, dtype=uint64), 'confidence': array(0.60251046), 'target': 24}, 'desired': {'itemset': (0, 3, 13, 16), 'support': array(66, dtype=uint64), 'confidence': array(0.85714286), 'target': 23}, 'uplift': array(109.85714286)}

{'undesired': {'itemset': (0, 3, 7, 11, 14), 'support': array(130, dtype=uint64), 'confidence': array(0.60747664), 'target': 24}, 'desired': {'itemset': (0, 3, 7, 11, 16), 'support': array(94, dtype=uint64), 'confidence': array(0.64383562), 'target': 23}, 'uplift': array(53.78082192)}

{'undesired': {'itemset': (0, 3, 7, 11, 14), 'support': array(130, dtype=uint64), 'confidence': array(0.60747664), 'target': 24}, 'desired': {'itemse

## DEAR algorithm (actionRulesDiscovery)

In [15]:
from actionrules.actionRulesDiscovery import ActionRulesDiscovery

In [16]:
### Action Rules

In [17]:
def measure_time_3():
    actionRulesDiscovery = ActionRulesDiscovery()
    actionRulesDiscovery.load_pandas(dataFrame)
    actionRulesDiscovery.fit(stable_attributes = stable_attributes ,
                             flexible_attributes = flexible_attributes,
                             consequent = target,
                             conf = min_undesired_confidence * 100, # There is just one confidence for both desired and undesired part.
                             supp = -min_undesired_support, # There is just one support for both desired and undesired part. Minus means that it is the absolute support.
                             desired_changes = [[undesired_state, desired_state]],
                             is_nan=False,
                             is_reduction=True,
                             min_stable_attributes=min_stable_attributes,
                             min_flexible_attributes=min_flexible_attributes,
                             is_strict_flexible=False
                            )
    return actionRulesDiscovery

### Count action rules

In [18]:
actionRulesDiscovery = measure_time_3()
len(actionRulesDiscovery.get_action_rules())

  self.data = self.data.applymap(str)


327

### Time

In [19]:
%timeit measure_time_3()

  self.data = self.data.applymap(str)
  self.data = self.data.applymap(str)
  self.data = self.data.applymap(str)
  self.data = self.data.applymap(str)
  self.data = self.data.applymap(str)
  self.data = self.data.applymap(str)
  self.data = self.data.applymap(str)
  self.data = self.data.applymap(str)


2.16 s ± 25.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


### Show first five rules

In [20]:
for rule in actionRulesDiscovery.get_action_rules_representation()[:20]:
    print(rule)
    print(" ")

r = [(Partner: no) ∧ (SeniorCitizen: 0) ∧  (PhoneService: yes)  ∧  (TechSupport: no)  ∧  (OnlineSecurity: no)  ∧  (DeviceProtection: no)  ∧ (InternetService: fiber optic → dsl) ] ⇒ [Churn: Yes → No] with support: 0.021723697288087464, confidence: 0.3756265939671093, uplift: 0.01756678852282622.
 
r = [(Partner: no) ∧ (SeniorCitizen: 0) ∧  (PhoneService: yes)  ∧ (TechSupport: no → yes)  ∧  (OnlineSecurity: no)  ∧ (DeviceProtection: no → yes)  ∧  (InternetService: fiber optic) ] ⇒ [Churn: Yes → No] with support: 0.009371006673292631, confidence: 0.4292203758347569, uplift: 0.02438818081028092.
 
r = [(Partner: no) ∧ (SeniorCitizen: 0) ∧  (PhoneService: yes)  ∧ (TechSupport: no → yes)  ∧  (OnlineSecurity: no)  ∧  (DeviceProtection: no)  ∧ (InternetService: fiber optic → dsl) ] ⇒ [Churn: Yes → No] with support: 0.011216811018032088, confidence: 0.46887180214017427, uplift: 0.029434996395236473.
 
r = [(Partner: no) ∧ (SeniorCitizen: 0) ∧  (PhoneService: yes)  ∧ (TechSupport: no → yes)  ∧  