# Performance Pandas vs. Cudf

## Pandas

In [1]:
import pandas as pd

Import data to Pandas DataFrame.

In [2]:
pd.set_option('display.max_columns', None)
dataFrame = pd.read_csv("data/telco.csv", sep=";")
dataFrame.head()

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,7590-VHVEG,Female,0,Yes,No,1,No,No phone service,DSL,No,Yes,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No
1,5575-GNVDE,Male,0,No,No,34,Yes,No,DSL,Yes,No,Yes,No,No,No,One year,No,Mailed check,56.95,1889.5,No
2,3668-QPYBK,Male,0,No,No,2,Yes,No,DSL,Yes,Yes,No,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes
3,7795-CFOCW,Male,0,No,No,45,No,No phone service,DSL,Yes,No,Yes,Yes,No,No,One year,No,Bank transfer (automatic),42.3,1840.75,No
4,9237-HQITU,Female,0,No,No,2,Yes,No,Fiber optic,No,No,No,No,No,No,Month-to-month,Yes,Electronic check,70.7,151.65,Yes


Action rules

In [4]:
from action_rules import ActionRules
# Parameters
stable_attributes = ["gender", "SeniorCitizen", "Partner"]
flexible_attributes = ["PhoneService", 
                       "InternetService", 
                       "OnlineSecurity", 
                       "DeviceProtection", 
                       "TechSupport",
                       "StreamingTV"]
target = 'Churn'
min_stable_attributes = 1
min_flexible_attributes = 1 #min 1
min_undesired_support = 4
min_undesired_confidence = 0.6
min_desired_support = 4
min_desired_confidence = 0.6
undesired_state = 'Yes'
desired_state = 'No'
# Action Rules Mining
action_rules = ActionRules(min_stable_attributes, min_flexible_attributes, min_undesired_support, min_undesired_confidence, min_desired_support,min_desired_confidence, verbose=False)

In [5]:
action_rules.fit(
    dataFrame, 
    stable_attributes, 
    flexible_attributes, 
    target, 
    undesired_state,
    desired_state, 
    False
)

In [6]:
# Count action rules
len(action_rules.get_rules().get_ar_notation())

5170

In [7]:
%timeit action_rules.fit(dataFrame, stable_attributes, flexible_attributes, target, undesired_state, desired_state, False)

19.2 s ± 121 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


## Cudf

In [None]:
from action_rules import ActionRules
# Parameters
stable_attributes = ["gender", "SeniorCitizen", "Partner"]
flexible_attributes = ["PhoneService", 
                       "InternetService", 
                       "OnlineSecurity", 
                       "DeviceProtection", 
                       "TechSupport",
                       "StreamingTV"]
target = 'Churn'
min_stable_attributes = 1
min_flexible_attributes = 1 #min 1
min_undesired_support = 4
min_undesired_confidence = 0.6
min_desired_support = 4
min_desired_confidence = 0.6
undesired_state = 'Yes'
desired_state = 'No'
# Action Rules Mining
action_rules = ActionRules(min_stable_attributes, min_flexible_attributes, min_undesired_support, min_undesired_confidence, min_desired_support,min_desired_confidence, verbose=False)

In [None]:
action_rules.fit(
    dataFrame, 
    stable_attributes, 
    flexible_attributes, 
    target, 
    undesired_state,
    desired_state, 
    True, # GPU Support
)

In [None]:
%timeit action_rules.fit(dataFrame, stable_attributes, flexible_attributes, target, undesired_state, desired_state, True)

In [15]:
# Count action rules
len(action_rules.get_rules().get_ar_notation())

5170

In [None]:
for ar in action_rules.get_rules().get_ar_notation():
    print(ar)
    print()

# DEAR

In [8]:
from actionrules.actionRulesDiscovery import ActionRulesDiscovery

In [9]:
actionRulesDiscovery = ActionRulesDiscovery()
actionRulesDiscovery.load_pandas(dataFrame)

  self.data = self.data.applymap(str)


In [10]:
actionRulesDiscovery.fit(stable_attributes = ["gender", "SeniorCitizen", "Partner"],
            flexible_attributes  = ["PhoneService", 
                                    "InternetService", 
                                    "OnlineSecurity", 
                                    "DeviceProtection", 
                                    "TechSupport",
                                    "StreamingTV",
                                   ],
            consequent = "Churn",
            conf=60,
            supp=(4/len(dataFrame))*100,
            desired_changes = [["Yes", "No"]],
            is_nan=False,
            is_reduction=True,
            min_stable_attributes=1,
            min_flexible_attributes=1,
            is_strict_flexible = False)

In [11]:
len(actionRulesDiscovery.get_action_rules())

5129

In [12]:
def timeit():
    actionRulesDiscovery = ActionRulesDiscovery()
    actionRulesDiscovery.load_pandas(dataFrame)
    actionRulesDiscovery.fit(stable_attributes = ["gender", "SeniorCitizen", "Partner"],
                flexible_attributes  = ["PhoneService", 
                                        "InternetService", 
                                        "OnlineSecurity", 
                                        "DeviceProtection", 
                                        "TechSupport",
                                        "StreamingTV",
                                       ],
                consequent = "Churn",
                conf=60,
                supp=4,
                desired_changes = [["Yes", "No"]],
                is_nan=False,
                is_reduction=True,
                min_stable_attributes=1,
                min_flexible_attributes=1)

In [13]:
%timeit timeit()

  self.data = self.data.applymap(str)
  self.data = self.data.applymap(str)
  self.data = self.data.applymap(str)
  self.data = self.data.applymap(str)
  self.data = self.data.applymap(str)
  self.data = self.data.applymap(str)
  self.data = self.data.applymap(str)
  self.data = self.data.applymap(str)


400 ms ± 8.09 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [15]:
for rule in actionRulesDiscovery.get_action_rules_representation()[:20]:
    print(rule)
    print(" ")

r = [(Partner: no) ∧  (DeviceProtection: no)  ∧  (TechSupport: no)  ∧  (PhoneService: yes)  ∧ (InternetService: fiber optic → dsl)  ∧  (OnlineSecurity: no) ] ⇒ [Churn: Yes → No] with support: 0.02413744143120829, confidence: 0.38748058774339983, uplift: 0.026471157693648106.
 
r = [(Partner: no) ∧ (DeviceProtection: no → yes)  ∧  (TechSupport: no)  ∧  (PhoneService: yes)  ∧ (InternetService: fiber optic → dsl)  ∧  (OnlineSecurity: no) ] ⇒ [Churn: Yes → No] with support: 0.006247337782195088, confidence: 0.45212296159065946, uplift: 0.03761437353214331.
 
r = [(Partner: no) ∧  (DeviceProtection: no)  ∧ (TechSupport: no → yes)  ∧  (PhoneService: yes)  ∧  (InternetService: fiber optic)  ∧  (OnlineSecurity: no) ] ⇒ [Churn: Yes → No] with support: 0.009229021723697288, confidence: 0.37724485326325014, uplift: 0.024706695905048884.
 
r = [(Partner: no) ∧ (DeviceProtection: no → yes)  ∧ (TechSupport: no → yes)  ∧  (PhoneService: yes)  ∧  (InternetService: fiber optic)  ∧  (OnlineSecurity: no)