# Performance

## Setting

### Parameters

In [1]:
stable_attributes = ["gender", "SeniorCitizen", "Partner"]
flexible_attributes = ["PhoneService", 
                       "InternetService", 
                       "OnlineSecurity", 
                       "DeviceProtection", 
                       "TechSupport",
                       "StreamingTV"]
target = 'Churn'
min_stable_attributes = 2
min_flexible_attributes = 1 #min 1
min_undesired_support = 50
min_undesired_confidence = 0.6
min_desired_support = 50
min_desired_confidence = 0.6
undesired_state = 'Yes'
desired_state = 'No'

### Parameters Grid

In [2]:
dataframe = ['pandas', 'cudf']
use_gpu = [True, False]
use_sparse_matrix = [True, False]

All combinations:

In [3]:
import itertools
# Get all combinations
combinations = list(itertools.product(dataframe, use_gpu, use_sparse_matrix))
# Print the combinations
for combo in combinations:
    print('df: ' + str(combo[0]) + ', gpu: ' + str(combo[1]) + ', sparse: ' + str(combo[2]))

df: pandas, gpu: True, sparse: True
df: pandas, gpu: True, sparse: False
df: pandas, gpu: False, sparse: True
df: pandas, gpu: False, sparse: False
df: cudf, gpu: True, sparse: True
df: cudf, gpu: True, sparse: False
df: cudf, gpu: False, sparse: True
df: cudf, gpu: False, sparse: False


## Data

In [4]:
import pandas as pd
import cudf

Import data to Pandas DataFrame and increase the table tenfold.

In [5]:
pd.set_option('display.max_columns', None)
data_frame = pd.read_csv("data/telco.csv", sep=";")
data_frame = pd.concat([data_frame] * 10)
data_frame.head()

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,7590-VHVEG,Female,0,Yes,No,1,No,No phone service,DSL,No,Yes,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No
1,5575-GNVDE,Male,0,No,No,34,Yes,No,DSL,Yes,No,Yes,No,No,No,One year,No,Mailed check,56.95,1889.5,No
2,3668-QPYBK,Male,0,No,No,2,Yes,No,DSL,Yes,Yes,No,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes
3,7795-CFOCW,Male,0,No,No,45,No,No phone service,DSL,Yes,No,Yes,Yes,No,No,One year,No,Bank transfer (automatic),42.3,1840.75,No
4,9237-HQITU,Female,0,No,No,2,Yes,No,Fiber optic,No,No,No,No,No,No,Month-to-month,Yes,Electronic check,70.7,151.65,Yes


Import data to cuDF DataFrame and increase the table tenfold.

In [6]:
cudf_frame = cudf.read_csv("data/telco.csv", sep=";")
cudf_frame = cudf.concat([cudf_frame] * 10)
cudf_frame.head()

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,7590-VHVEG,Female,0,Yes,No,1,No,No phone service,DSL,No,Yes,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No
1,5575-GNVDE,Male,0,No,No,34,Yes,No,DSL,Yes,No,Yes,No,No,No,One year,No,Mailed check,56.95,1889.5,No
2,3668-QPYBK,Male,0,No,No,2,Yes,No,DSL,Yes,Yes,No,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes
3,7795-CFOCW,Male,0,No,No,45,No,No phone service,DSL,Yes,No,Yes,Yes,No,No,One year,No,Bank transfer (automatic),42.3,1840.75,No
4,9237-HQITU,Female,0,No,No,2,Yes,No,Fiber optic,No,No,No,No,No,No,Month-to-month,Yes,Electronic check,70.7,151.65,Yes


## Action Rules

In [7]:
from action_rules import ActionRules
# Action Rules Mining
action_rules = ActionRules(
    min_stable_attributes = min_stable_attributes, 
    min_flexible_attributes = min_flexible_attributes, 
    min_undesired_support = min_undesired_support, 
    min_undesired_confidence = min_undesired_confidence, 
    min_desired_support = min_desired_support,
    min_desired_confidence = min_desired_confidence, 
    verbose = False)

## GPU

In [8]:
! nvidia-smi

Mon Jul  8 15:27:22 2024       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 555.42.03              Driver Version: 555.85         CUDA Version: 12.5     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA GeForce RTX 3090        On  |   00000000:22:00.0 Off |                  N/A |
| 30%   31C    P2            104W /  350W |     276MiB /  24576MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [9]:
! nvcc --version

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2021 NVIDIA Corporation
Built on Thu_Nov_18_09:45:30_PST_2021
Cuda compilation tools, release 11.5, V11.5.119
Build cuda_11.5.r11.5/compiler.30672275_0


## Processing Time

Action rules discovery.

In [10]:
def measure_time(data, use_gpu, use_sparse_matrix):
    action_rules.fit(
        data = data, 
        stable_attributes = stable_attributes, 
        flexible_attributes = flexible_attributes, 
        target = target, 
        target_undesired_state = undesired_state,
        target_desired_state = desired_state, 
        use_gpu = use_gpu,
        use_sparse_matrix = use_sparse_matrix,
    )
    print('Number of action rules: ' + str(len(action_rules.get_rules().get_ar_notation())))

Measure time for each parameters combination.

In [11]:
for combo in combinations:
    if combo[0] == 'pandas':
        data = data_frame
    else:
        data = cudf_frame
    print('df: ' + str(combo[0]) + ', gpu: ' + str(combo[1]) + ', sparse: ' + str(combo[2]))
    print('___________________________________')
    t = %timeit -n2 -r2 -o measure_time(data, combo[1], combo[2])
    print('Time: ' + str(t))
    print()

df: cudf, gpu: False, sparse: True
___________________________________
Number of action rules: 4045
Number of action rules: 4045
Number of action rules: 4045
Number of action rules: 4045
2min 2s ± 19.2 s per loop (mean ± std. dev. of 2 runs, 2 loops each)
Time: 2min 2s ± 19.2 s per loop (mean ± std. dev. of 2 runs, 2 loops each)

df: cudf, gpu: False, sparse: False
___________________________________


TypeError: Implicit conversion to a NumPy array is not allowed. Please use `.get()` to construct a NumPy array explicitly.