# Performance

## Setting

### Parameters

In [1]:
stable_attributes = ["gender", "SeniorCitizen", "Partner"]
flexible_attributes = ["PhoneService", 
                       "InternetService", 
                       "OnlineSecurity", 
                       "DeviceProtection", 
                       "TechSupport",
                       "StreamingTV"]
target = 'Churn'
min_stable_attributes = 2
min_flexible_attributes = 1 #min 1
min_undesired_support = 50
min_undesired_confidence = 0.6
min_desired_support = 50
min_desired_confidence = 0.6
undesired_state = 'Yes'
desired_state = 'No'

### Parameters Grid

In [2]:
dataframe = ['pandas', 'cudf']
use_gpu = [True, False]
use_sparse_matrix = [True, False]

All combinations:

In [3]:
import itertools
# Get all combinations
combinations = list(itertools.product(dataframe, use_gpu, use_sparse_matrix))
# Print the combinations
for combo in combinations:
    print('df: ' + str(combo[0]) + ', gpu: ' + str(combo[1]) + ', sparse: ' + str(combo[2]))

df: pandas, gpu: True, sparse: True
df: pandas, gpu: True, sparse: False
df: pandas, gpu: False, sparse: True
df: pandas, gpu: False, sparse: False
df: cudf, gpu: True, sparse: True
df: cudf, gpu: True, sparse: False
df: cudf, gpu: False, sparse: True
df: cudf, gpu: False, sparse: False


## Data

In [4]:
import pandas as pd
import cudf

Import data to Pandas DataFrame and increase the table tenfold.

In [5]:
pd.set_option('display.max_columns', None)
data_frame = pd.read_csv("data/telco.csv", sep=";")
data_frame = pd.concat([data_frame] * 10)
data_frame.head()

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,7590-VHVEG,Female,0,Yes,No,1,No,No phone service,DSL,No,Yes,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No
1,5575-GNVDE,Male,0,No,No,34,Yes,No,DSL,Yes,No,Yes,No,No,No,One year,No,Mailed check,56.95,1889.5,No
2,3668-QPYBK,Male,0,No,No,2,Yes,No,DSL,Yes,Yes,No,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes
3,7795-CFOCW,Male,0,No,No,45,No,No phone service,DSL,Yes,No,Yes,Yes,No,No,One year,No,Bank transfer (automatic),42.3,1840.75,No
4,9237-HQITU,Female,0,No,No,2,Yes,No,Fiber optic,No,No,No,No,No,No,Month-to-month,Yes,Electronic check,70.7,151.65,Yes


Import data to cuDF DataFrame and increase the table tenfold.

In [6]:
cudf_frame = cudf.read_csv("data/telco.csv", sep=";")
cudf_frame = cudf.concat([cudf_frame] * 10)
cudf_frame.head()

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,7590-VHVEG,Female,0,Yes,No,1,No,No phone service,DSL,No,Yes,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No
1,5575-GNVDE,Male,0,No,No,34,Yes,No,DSL,Yes,No,Yes,No,No,No,One year,No,Mailed check,56.95,1889.5,No
2,3668-QPYBK,Male,0,No,No,2,Yes,No,DSL,Yes,Yes,No,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes
3,7795-CFOCW,Male,0,No,No,45,No,No phone service,DSL,Yes,No,Yes,Yes,No,No,One year,No,Bank transfer (automatic),42.3,1840.75,No
4,9237-HQITU,Female,0,No,No,2,Yes,No,Fiber optic,No,No,No,No,No,No,Month-to-month,Yes,Electronic check,70.7,151.65,Yes


## Action Rules

In [7]:
from action_rules import ActionRules
# Action Rules Mining
def init_model():
    action_rules = ActionRules(
        min_stable_attributes = min_stable_attributes, 
        min_flexible_attributes = min_flexible_attributes, 
        min_undesired_support = min_undesired_support, 
        min_undesired_confidence = min_undesired_confidence, 
        min_desired_support = min_desired_support,
        min_desired_confidence = min_desired_confidence, 
        verbose = False)
    return action_rules

## GPU + System Information

In [8]:
! nvidia-smi

Sun Jul 21 11:36:55 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.183.01             Driver Version: 535.183.01   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA L40S                    Off | 00000000:03:00.0 Off |                    0 |
| N/A   50C    P0              83W / 350W |    906MiB / 46068MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [9]:
import platform,socket,re,uuid,json,psutil,logging

def getSystemInfo():
    info={}
    info['platform']=platform.system()
    info['platform-release']=platform.release()
    info['platform-version']=platform.version()
    info['architecture']=platform.machine()
    info['hostname']=socket.gethostname()
    info['ip-address']=socket.gethostbyname(socket.gethostname())
    info['mac-address']=':'.join(re.findall('..', '%012x' % uuid.getnode()))
    info['processor']=platform.processor()
    info['ram']=str(round(psutil.virtual_memory().total / (1024.0 **3)))+" GB"
    return json.dumps(info)
json.loads(getSystemInfo())

{'platform': 'Linux',
 'platform-release': '5.15.0-113-generic',
 'platform-version': '#123-Ubuntu SMP Mon Jun 10 08:16:17 UTC 2024',
 'architecture': 'x86_64',
 'hostname': 'jupytergpu',
 'ip-address': '127.0.1.1',
 'mac-address': '00:50:56:94:f5:54',
 'processor': 'x86_64',
 'ram': '393 GB'}

In [10]:
import cpuinfo
cpuinfo.get_cpu_info()

{'python_version': '3.10.10.final.0 (64 bit)',
 'cpuinfo_version': [9, 0, 0],
 'cpuinfo_version_string': '9.0.0',
 'arch': 'X86_64',
 'bits': 64,
 'count': 32,
 'arch_string_raw': 'x86_64',
 'vendor_id_raw': 'AuthenticAMD',
 'brand_raw': 'AMD EPYC 9354 32-Core Processor',
 'hz_advertised_friendly': '3.2451 GHz',
 'hz_actual_friendly': '3.2451 GHz',
 'hz_advertised': [3245124000, 0],
 'hz_actual': [3245124000, 0],
 'stepping': 1,
 'model': 17,
 'family': 25,
 'flags': ['3dnowext',
  '3dnowprefetch',
  'abm',
  'adx',
  'aes',
  'apic',
  'arat',
  'avx',
  'avx2',
  'avx512_bf16',
  'avx512_bitalg',
  'avx512_vbmi2',
  'avx512_vnni',
  'avx512_vpopcntdq',
  'avx512bitalg',
  'avx512bw',
  'avx512cd',
  'avx512dq',
  'avx512f',
  'avx512ifma',
  'avx512vbmi',
  'avx512vbmi2',
  'avx512vl',
  'avx512vnni',
  'avx512vpopcntdq',
  'bmi1',
  'bmi2',
  'clflush',
  'clflushopt',
  'clwb',
  'clzero',
  'cmov',
  'cmp_legacy',
  'constant_tsc',
  'cpuid',
  'cr8_legacy',
  'cx16',
  'cx8',
  '

## Processing Time

Action rules discovery.

In [11]:
def measure_time(data, use_gpu, use_sparse_matrix):
    action_rules = init_model()
    action_rules.fit(
        data = data, 
        stable_attributes = stable_attributes, 
        flexible_attributes = flexible_attributes, 
        target = target, 
        target_undesired_state = undesired_state,
        target_desired_state = desired_state, 
        use_gpu = use_gpu,
        use_sparse_matrix = use_sparse_matrix,
    )
    print('Number of action rules: ' + str(len(action_rules.get_rules().get_ar_notation())))

Measure time for each parameters combination.

In [12]:
import gc
for combo in combinations:
    if combo[0] == 'pandas':
        data = data_frame
    else:
        data = cudf_frame
    print('df: ' + str(combo[0]) + ', gpu: ' + str(combo[1]) + ', sparse: ' + str(combo[2]))
    print('___________________________________')
    t = %timeit -n2 -r2 -o measure_time(data, combo[1], combo[2])
    print('Time: ' + str(t))
    gc.collect()
    print()

df: pandas, gpu: True, sparse: True
___________________________________
Number of action rules: 4045
Number of action rules: 4045
Number of action rules: 4045
Number of action rules: 4045
54.2 s ± 3.3 ms per loop (mean ± std. dev. of 2 runs, 2 loops each)
Time: 54.2 s ± 3.3 ms per loop (mean ± std. dev. of 2 runs, 2 loops each)

df: pandas, gpu: True, sparse: False
___________________________________
Number of action rules: 4045
Number of action rules: 4045
Number of action rules: 4045
Number of action rules: 4045
13.3 s ± 17.7 ms per loop (mean ± std. dev. of 2 runs, 2 loops each)
Time: 13.3 s ± 17.7 ms per loop (mean ± std. dev. of 2 runs, 2 loops each)

df: pandas, gpu: False, sparse: True
___________________________________
Number of action rules: 4045
Number of action rules: 4045
Number of action rules: 4045
Number of action rules: 4045
1min 17s ± 207 ms per loop (mean ± std. dev. of 2 runs, 2 loops each)
Time: 1min 17s ± 207 ms per loop (mean ± std. dev. of 2 runs, 2 loops each)
