In [18]:
from system_config_generation.create_new_configs import CreateNewConfigs
from system_config_generation.update_existing_configs import UpdateExistingConfigs
from rules.rules import Rules

import pandas as pd
import numpy as np
import random
import cProfile, pstats

In [2]:
rule_logic_list = [
    "(X['account_number_avg_order_total_per_account_number_1day']>1)",
    "(X['sim_sc_ml']>0.5)",
    "(X['is_billing_shipping_city_same']==True)"
]

In [23]:
rule_strings = {}
rule_scores = {}
for i in range(0, 1000000):
    rule_strings[f'Rule{i}'] = rule_logic_list[random.randint(0, 2)]
    rule_scores[f'Rule{i}'] = random.randint(-100, 0)

First, we need to convert the rule conditions in to the system-ready format. For this, we can use the Rules class - first instantiate the class with the rule conditions:

In [24]:
rules = Rules(rule_strings=rule_strings)

Then convert the conditions to the system-ready format using the *.as_system_dicts()* method. Note that for this step, you'll need to provide the Cassandra datatypes and Cassandra field names of each field present in the rule set (use the *return_cassandra_dtypes* module in the *simility_requests* sub-package to gather this information):

In [25]:
field_datatypes = {
    'account_number_avg_order_total_per_account_number_1day': 'DOUBLE',
    'sim_sc_ml': 'FLOAT',
    'is_billing_shipping_city_same': 'BOOLEAN'
}
cassandra_field_names = {
    'account_number_avg_order_total_per_account_number_1day': 'account_number.avg_order_total_per_account_number_1day',
    'sim_sc_ml': 'sim_sc_ml',
    'is_billing_shipping_city_same': 'is_billing_shipping_city_same'
}

In [26]:
system_conditions = rules.as_system_dicts(field_datatypes=field_datatypes, cassandra_field_names=cassandra_field_names)

Now that we have our rule conditions in the system-ready format, we can use the CreateNewConfigs class to generate the system-ready configurations:

In [28]:
cnc = CreateNewConfigs(conditions=system_conditions, scores=rule_scores, app_prefix='james_testing', entity='transaction', make_active=True)

In [29]:
rule_configs = cnc.generate()

## OLD

In [30]:
filename = 'system_config_gen_old.dat'

In [31]:
cnc = CreateNewConfigs(conditions=system_conditions, scores=rule_scores, app_prefix='james_testing', entity='transaction', make_active=True)
cProfile.run('rule_configs = cnc.generate()', sort='cumtime', filename=f'{filename}.dat')

In [32]:
p = pstats.Stats(f'{filename}.dat')
p.sort_stats('cumtime').print_stats()

Wed Jan 13 15:22:03 2021    system_config_gen_old.dat.dat

         8000005 function calls in 10.080 seconds

   Ordered by: cumulative time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.000    0.000   10.080   10.080 {built-in method builtins.exec}
        1    0.489    0.489   10.080   10.080 <string>:1(<module>)
        1    0.705    0.705    9.591    9.591 /Users/jlaidler/Documents/tigress/tigress/argo/argo/system_config_generation/system_config_generation/create_new_configs.py:51(generate)
  1000000    1.533    0.000    8.886    0.000 /Users/jlaidler/Documents/tigress/tigress/argo/argo/system_config_generation/system_config_generation/create_new_configs.py:66(_create_config)
  1000000    0.536    0.000    7.118    0.000 //anaconda3/lib/python3.7/json/__init__.py:183(dumps)
  1000000    1.157    0.000    6.582    0.000 //anaconda3/lib/python3.7/json/encoder.py:182(encode)
  1000000    5.034    0.000    5.034    0.000 //anaconda3/lib/python3

<pstats.Stats at 0x11d6135c0>

Can't really make any quicker...

---

# Updating system configurations of existing rules

In [33]:
cnc = CreateNewConfigs(conditions=system_conditions, scores=rule_scores, app_prefix='james_testing', entity='transaction', make_active=True)
rule_configs = cnc.generate()

The optimised conditions and scores are:

In [34]:
opt_rule_strings = {}
opt_rule_scores = {}
for i in range(0, 1000000):
    opt_rule_strings[f'Rule{i}'] = rule_logic_list[random.randint(0, 2)]
    opt_rule_scores[f'Rule{i}'] = random.randint(-100, 0)

First, we need to convert the optimised rule conditions in to the system-ready format. For this, we can use the Rules class - first instantiate the class with the rule conditions:

In [35]:
rules = Rules(rule_strings=opt_rule_strings)

Then convert the conditions to the system-ready format using the *.as_system_dicts()* method. Note that for this step, you'll need to provide the Cassandra datatypes and Cassandra field names of each field present in the rule set (use the *return_cassandra_dtypes* module in the *simility_requests* sub-package to gather this information):

In [36]:
field_datatypes = {
    'account_number_avg_order_total_per_account_number_1day': 'DOUBLE',
    'sim_sc_ml': 'FLOAT',
    'is_billing_shipping_city_same': 'BOOLEAN'
}
cassandra_field_names = {
    'account_number_avg_order_total_per_account_number_1day': 'account_number.avg_order_total_per_account_number_1day',
    'sim_sc_ml': 'sim_sc_ml',
    'is_billing_shipping_city_same': 'is_billing_shipping_city_same'
}

In [37]:
opt_system_conditions = rules.as_system_dicts(field_datatypes=field_datatypes, cassandra_field_names=cassandra_field_names)

In [38]:
# opt_system_conditions

Now that we have our rule conditions in the system-ready format, we can use the UpdateExistingConfigs class to generate the system-ready configurations:

In [39]:
uec = UpdateExistingConfigs(rule_configs=rule_configs, updated_conditions=opt_system_conditions, updated_scores=opt_rule_scores)

In [40]:
updated_rule_configs = uec.update()

## OLD

In [41]:
filename = 'system_config_update_old.dat'

In [42]:
uec = UpdateExistingConfigs(rule_configs=rule_configs, updated_conditions=opt_system_conditions, updated_scores=opt_rule_scores)
cProfile.run('uec.update()', sort='cumtime', filename=f'{filename}.dat')

In [43]:
p = pstats.Stats(f'{filename}.dat')
p.sort_stats('cumtime').print_stats()

Wed Jan 13 15:28:18 2021    system_config_update_old.dat.dat

         11000005 function calls in 11.183 seconds

   Ordered by: cumulative time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.000    0.000   11.183   11.183 {built-in method builtins.exec}
        1    0.000    0.000   11.183   11.183 <string>:1(<module>)
        1    0.723    0.723   11.183   11.183 /Users/jlaidler/Documents/tigress/tigress/argo/argo/system_config_generation/system_config_generation/update_existing_configs.py:44(update)
  1000000    3.408    0.000   10.460    0.000 /Users/jlaidler/Documents/tigress/tigress/argo/argo/system_config_generation/system_config_generation/update_existing_configs.py:60(_update_config)
  1000000    0.539    0.000    6.469    0.000 //anaconda3/lib/python3.7/json/__init__.py:183(dumps)
  1000000    1.120    0.000    5.930    0.000 //anaconda3/lib/python3.7/json/encoder.py:182(encode)
  1000000    4.410    0.000    4.410    0.000 //anaconda3

<pstats.Stats at 0x11d5d89b0>

## NEW

In [44]:
filename = 'system_config_update_new.dat'

In [45]:
uec = UpdateExistingConfigs(rule_configs=rule_configs, updated_conditions=opt_system_conditions, updated_scores=opt_rule_scores)
cProfile.run('uec.update()', sort='cumtime', filename=f'{filename}.dat')

In [46]:
p = pstats.Stats(f'{filename}.dat')
p.sort_stats('cumtime').print_stats()

Wed Jan 13 15:31:40 2021    system_config_update_new.dat.dat

         11000005 function calls in 11.645 seconds

   Ordered by: cumulative time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.000    0.000   11.645   11.645 {built-in method builtins.exec}
        1    0.000    0.000   11.645   11.645 <string>:1(<module>)
        1    0.688    0.688   11.645   11.645 /Users/jlaidler/Documents/tigress/tigress/argo/argo/system_config_generation/system_config_generation/update_existing_configs.py:44(update)
  1000000    3.397    0.000   10.957    0.000 /Users/jlaidler/Documents/tigress/tigress/argo/argo/system_config_generation/system_config_generation/update_existing_configs.py:60(_update_config)
  1000000    0.524    0.000    6.999    0.000 //anaconda3/lib/python3.7/json/__init__.py:183(dumps)
  1000000    1.079    0.000    6.475    0.000 //anaconda3/lib/python3.7/json/encoder.py:182(encode)
  1000000    5.014    0.000    5.014    0.000 //anaconda3

<pstats.Stats at 0x11ee9e278>

##  Can't really improve runtime