# Dakar Rules

An experiment in using a rule based system to generate rally results fact statements.

Here's some set-up for working with my scraped Dakar results data.

In [11]:
STAGE = 3

MAX = 10

setups = {'sunderland':{'v':'moto','b':3},
          'alo':{'v':'car','b':310},
          'sainz':{'v':'car','b':305},
          'attiyah':{'v':'car', 'b':300},
          'price':{'v':'moto','b':1}
         }

def get_setup(n):
    return setups[n]['v'],setups[n]['b']

VTYPE, REBASER = get_setup('price')

And the database handler itself...

In [12]:
import sqlite3
from sqlite_utils import Database

dbname = 'dakar_2020.db'

conn = sqlite3.connect(dbname)
db = Database(conn)

The rules engine I'm going to use is [`durable_rules`](https://github.com/jruizgit/rules).

In [13]:
#https://github.com/jruizgit/rules/blob/master/docs/py/reference.md
#%pip install durable_rules
from durable.lang import *

We'll also be using `pandas`...

In [14]:
import pandas as pd

Let's grab a simple set of example rankings from the database, as a `pandas` dataframe...

In [15]:
q=f"SELECT * FROM ranking WHERE VehicleType='{VTYPE}' AND Type='general' AND Stage={STAGE} AND Pos<={MAX}"
tmpq = pd.read_sql(q, conn).fillna(0)
tmpq.head(3)

Unnamed: 0,Year,Stage,Type,Pos,Bib,VehicleType,Crew,Brand,Time_raw,TimeInS,Gap_raw,GapInS,Penalty_raw,PenaltyInS
0,2020,3,general,1,9,moto,R. BRABEC MONSTER ENERGY HONDA TEAM 2020,HONDA,10:39:04,38344,0:00:00,0.0,00:00:00,0.0
1,2020,3,general,2,7,moto,K. BENAVIDES MONSTER ENERGY HONDA TEAM 2020,HONDA,10:43:47,38627,0:04:43,283.0,00:00:00,0.0
2,2020,3,general,3,2,moto,M. WALKNER RED BULL KTM FACTORY TEAM,KTM,10:45:06,38706,0:06:02,362.0,00:00:00,0.0


The `inflect` package makes it easy to generate numner words from numerics... and a whole host of other things...

In [16]:
#https://github.com/jazzband/inflect
import inflect

p = inflect.engine()

The following function is a simple handler for generating nice time strings...

In [17]:
#BAsed on: https://stackoverflow.com/a/24542445/454773
intervals = (
    ('weeks', 604800),  # 60 * 60 * 24 * 7
    ('days', 86400),    # 60 * 60 * 24
    ('hours', 3600),    # 60 * 60
    ('minutes', 60),
    ('seconds', 1),
    )
    
def display_time(t, granularity=3,
                 sep=',', andword='and',
                 units = 'seconds', intify=True):
    """Take a time in seconds and return a sensible
        natural language interpretation of it."""
    def nl_join(l):
        if len(l)>2:
            return ', '.join(f'{l[:-1]} {andword} {str(l[-1])}')
        elif len(l)==2:
            return f' {andword} '.join(l)
        return l[0]
    
    result = []

    if intify:
        t=int(t)

    #Need better handle for arbitrary time strings
    #Perhaps parse into a timedelta object
    # and then generate NL string from that?
    if units=='seconds':
        for name, count in intervals:
            value = t // count
            if value:
                t -= value * count
                if value == 1:
                    name = name.rstrip('s')
                result.append("{} {}".format(value, name))

        return nl_join(result[:granularity])

I suspect there's a way of doing things "occasionally" via the rules engine, but at times it may be easier to have rules that create statements "occasionally" as part of the rule code. This adds variety to generated text.

The following functions help with that, returning strings probabilistically.

In [18]:
import random

def sometimes(t, p=0.5):
    """Sometimes return a string passed to the function."""
    if random.random()>=p:
        return t
    return ''

def occasionally(t):
    """Sometimes return a string passed to the function."""
    return sometimes(t, p=0.2)

def rarely(t):
    """Rarely return a string passed to the function."""
    return sometimes(t, p=0.05)

def pickone_equally(l, prefix='', suffix=''):
    """Return an item from a list,
       selected at random with equal probability."""
    t = random.choice(l)
    if t:
        return f'{prefix}{t}{suffix}'
    return suffix

def pickfirst_prob(l, p=0.5):
    """Select the first item in a list with the specified probability,
       else select an item, with equal probability, from the rest of the list."""
    if len(l)>1 and random.random() >= p:
        return random.choice(l[1:])
    return l[0]


Create a simple test ruleset for commenting on a simple results table.

Rather than printing out statements in each rule, as the demos show, lets instead append generated text elements to an ordered list, and then render that at the end.

(We could also return a tuple from a rule, eg `(POS, TXT)` that would allow us to re-order statements when generating the final text rendering.)

In [19]:
from durable.lang import *

txts = []

with ruleset('test1'):
    
    #Display something about the crew in first place
    @when_all(m.Pos == 1)
    def whos_in_first(c):
        """Generate a sentence to report on the first placed vehicle."""
        #We can add additional state, accessiblr from other rules
        #In this case, record the Crew and Brand for the first placed crew
        c.s.first_crew = c.m.Crew
        c.s.first_brand = c.m.Brand
        
        #Python f-strings make it easy to generate text sentences that include data elements
        txts.append(f'{c.m.Crew} were in first in their {c.m.Brand} with a time of {c.m.Time_raw}.')
    
    #This just checks whether we get multiple rule fires...
    @when_all(m.Pos == 1)
    def whos_in_first2(c):
        txts.append('we got another first...')
        
    #We can be a bit more creative in the other results
    @when_all(m.Pos>1)
    def whos_where(c):
        """Generate a sentence to describe the position of each other placed vehicle."""
        
        #Use the inflect package to natural language textify position numbers...
        nth = p.number_to_words(p.ordinal(c.m.Pos))
        
        #Use various probabalistic text generators to make a comment for each other result
        first_opts = [c.s.first_crew, 'the stage winner']
        if c.m.Brand==c.s.first_brand:
            first_opts.append(f'the first placed {c.m.Brand}')
        t = pickone_equally([f'with a time of {c.m.Time_raw}',
                             f'{sometimes(f"{display_time(c.m.GapInS)} behind {pickone_equally(first_opts)}")}'],
                           prefix=', ')
        
        #And add even more variation possibilities into the returned generated sentence
        txts.append(f'{c.m.Crew} were in {nth}{sometimes(" position")}{sometimes(f" representing {c.m.Brand}")}{t}.')
    

The rules handler doesn't seem to like the `numpy` typed numerical objects that the `pandas` dataframe provides, but if we cast the dataframe values to JSON and then back to a Python `dict`, everything seems to work fine.

In [41]:
type(tmpq.iloc[0].to_dict()['Pos'])

numpy.int64

In [42]:
tmpq[['Pos', 'Crew','Brand']].iloc[0].to_dict()

{'Pos': 1,
 'Crew': 'R. BRABEC MONSTER ENERGY HONDA TEAM 2020',
 'Brand': 'HONDA'}

In [40]:

post('test1',tmpq[['Pos', 'Crew','Brand']].iloc[0].to_dict())
txts

TypeError: Object of type int64 is not JSON serializable

In [10]:
import json
#This handles numpy types that ruleset json serialiser doesn't like
tmp = json.loads(tmpq.iloc[0].to_json())

If we post as an event, then only a single rule can be fired from it


In [11]:
post('test1',tmp)
print(''.join(txts))

R. BRABEC MONSTER ENERGY HONDA TEAM 2020 were in first in their HONDA with a time of 10:39:04.


We can create a function that can be applied to each row of a `pandas` dataframe that will run the conents of the row through the ruleset:

In [12]:
def rulesbyrow(row, ruleset):
    row = json.loads(json.dumps(row.to_dict()))
    post(ruleset,row)

Capture the text results generated from the ruleset into a list, and then display the results. 

In [14]:
txts=[]
tmpq.apply(rulesbyrow, ruleset='test1', axis=1)

print('\n\n'.join(txts))

R. BRABEC MONSTER ENERGY HONDA TEAM 2020 were in first in their HONDA with a time of 10:39:04.

K. BENAVIDES MONSTER ENERGY HONDA TEAM 2020 were in second representing HONDA.

M. WALKNER RED BULL KTM FACTORY TEAM were in third.

J. BARREDA BORT MONSTER ENERGY HONDA TEAM 2020 were in fourth, with a time of 10:50:06.

JI. CORNEJO FLORIMO MONSTER ENERGY HONDA TEAM 2020 were in fifth, 11 minutes and 19 seconds behind the stage winner.

T. PRICE RED BULL KTM FACTORY TEAM were in sixth representing KTM, with a time of 10:51:02.

L. BENAVIDES RED BULL KTM FACTORY TEAM were in seventh position, with a time of 10:53:24.

P. QUINTANILLA ROCKSTAR ENERGY HUSQVARNA FACTORY RACING were in eighth representing HUSQVARNA.

S. SUNDERLAND RED BULL KTM FACTORY TEAM were in ninth position.

X. DE SOULTRAIT MONSTER ENERGY YAMAHA RALLY TEAM were in tenth position, with a time of 10:58:59.


We can evaluate a whole set of events passed as list of events using the `post_batch(RULESET,EVENTS)` function. It's easy enough to convert a `pandas` dataframe into a list of palatable `dict`s... 

In [16]:
def df_json(df):
    """Convert rows in a pandas dataframe to a JSON string.
       Cast the JSON string back to a list of dicts 
       that are palatable to the rules engine. 
    """
    return json.loads(df.to_json(orient='records'))

Unfortunately, the `post_batch()` route doesn't look like it necessarily commits the rows to the ruleset in the provided row order? (Has the `dict` lost its ordering?)

In [19]:
txts=[]

post_batch('test1', df_json(tmpq))
print('\n\n'.join(txts))

R. BRABEC MONSTER ENERGY HONDA TEAM 2020 were in first in their HONDA with a time of 10:39:04.

X. DE SOULTRAIT MONSTER ENERGY YAMAHA RALLY TEAM were in tenth position, with a time of 10:58:59.

S. SUNDERLAND RED BULL KTM FACTORY TEAM were in ninth, with a time of 10:56:14.

P. QUINTANILLA ROCKSTAR ENERGY HUSQVARNA FACTORY RACING were in eighth position representing HUSQVARNA, 15 minutes and 40 seconds behind R. BRABEC MONSTER ENERGY HONDA TEAM 2020.

L. BENAVIDES RED BULL KTM FACTORY TEAM were in seventh, with a time of 10:53:24.

T. PRICE RED BULL KTM FACTORY TEAM were in sixth position representing KTM, with a time of 10:51:02.

JI. CORNEJO FLORIMO MONSTER ENERGY HONDA TEAM 2020 were in fifth position, with a time of 10:50:23.

J. BARREDA BORT MONSTER ENERGY HONDA TEAM 2020 were in fourth representing HONDA.

M. WALKNER RED BULL KTM FACTORY TEAM were in third, with a time of 10:45:06.

K. BENAVIDES MONSTER ENERGY HONDA TEAM 2020 were in second, with a time of 10:43:47.


We can also assert the rows as `facts` rather than running them through the ruleset as `events`.

In [21]:
def factsbyrow(row, ruleset):
    row = json.loads(json.dumps(row.to_dict()))
    assert_fact(ruleset,row)

The fact is retained even it it matches a rule, so it gets a chance to match other rules too...

In [22]:
txts=[]
tmpq.apply(factsbyrow, ruleset='test1', axis=1);
print('\n\n'.join(txts))

R. BRABEC MONSTER ENERGY HONDA TEAM 2020 were in first in their HONDA with a time of 10:39:04.

we got another first...

K. BENAVIDES MONSTER ENERGY HONDA TEAM 2020 were in second, with a time of 10:43:47.

M. WALKNER RED BULL KTM FACTORY TEAM were in third representing KTM.

J. BARREDA BORT MONSTER ENERGY HONDA TEAM 2020 were in fourth representing HONDA, with a time of 10:50:06.

JI. CORNEJO FLORIMO MONSTER ENERGY HONDA TEAM 2020 were in fifth position, 11 minutes and 19 seconds behind the first placed HONDA.

T. PRICE RED BULL KTM FACTORY TEAM were in sixth representing KTM, with a time of 10:51:02.

L. BENAVIDES RED BULL KTM FACTORY TEAM were in seventh position, 14 minutes and 20 seconds behind the stage winner.

P. QUINTANILLA ROCKSTAR ENERGY HUSQVARNA FACTORY RACING were in eighth position, 15 minutes and 40 seconds behind R. BRABEC MONSTER ENERGY HONDA TEAM 2020.

S. SUNDERLAND RED BULL KTM FACTORY TEAM were in ninth position, 17 minutes and 10 seconds behind R. BRABEC MONSTER 

However, if we apply the same facts multiple times, I think we get an error and bork the ruleset...

In [2]:
from durable.lang import *
with ruleset('flow4'):
    
    @when_all(m.action == 'start')
    def first(c):
        raise Exception('Unhandled Exception!')

    # when the exception property exists
    @when_all(+s.exception)
    def second(c):
        print(c.s.exception)
        c.s.exception = None
            
post('flow4', { 'action': 'start' })
post('flow4', { 'action': 'stop' })
post('flow4', { 'action': 'stops' })

exception caught Unhandled Exception!, traceback ['  File "/usr/local/lib/python3.7/site-packages/durable/engine.py", line 233, in run\n    self._func(c)\n', '  File "<ipython-input-2-f66570ecb6de>", line 6, in first\n    raise Exception(\'Unhandled Exception!\')\n']


MessageNotHandledException: {'action': 'stop'}

In [46]:
import pandas as pd
import numpy as np
df=pd.DataFrame({'intval':[1], 'strval':['a']})
df['intval'] = df['intval'].astype(np.int64)
df.dtypes

intval     int64
strval    object
dtype: object

In [38]:
type(df.iloc[0]['intval'])

numpy.int64

In [47]:
df.iloc[0].to_dict()

{'intval': 1, 'strval': 'a'}

In [52]:
from durable.lang import *
with ruleset('_npint_test'):
    
    @when_all(m.intval >0)
    def testint(c):
        print('works')
            


In [53]:
post('_npint_test', df.iloc[0].to_dict())

TypeError: Object of type int64 is not JSON serializable

In [55]:
post('_npint_test', df[['intval']].iloc[0].to_dict())

works


{'sid': '0', 'id': 'sid-0', '$s': 1}

In [56]:
df.iloc[0].to_dict()

{'intval': 1, 'strval': 'a'}

In [57]:
df[['intval']].iloc[0].to_dict()

{'intval': 1}

In [29]:
from durable.lang import *

TEST = 'test819'

capture=[]
with ruleset(TEST):
    
    @when_all(m.test.matches('F[T]*'))
    def trailT(c):
        print('F[T]*',c.m.test)
        
    @when_all(m.test.matches('[TF]*[T]{3}'))
    def trail3T(c):
        print('[TF]*[T]{3}',c.m.test)
        
    @when_all(m.test.matches('.*[T]{4}'))
    def trail4T(c):
        print('[T]{4}', c.m.test)
        
    @when_all(m.test.matches('[TF]*T'))
    def shouldfail(c):
        print('[TF]*T', c.m.test)
        
    @when_all(m.test.matches('[TF]*T'))
    def finalt(c):
        print('[TF]*T',c.m.test)
        
        
    # when the exception property exists
    @when_all(m.test.matches('.*'))
    def catcher(c):
        print(f'missed {c.m.test}')

            


In [32]:
from durable.engine import MessageNotHandledException

facts = [{'test':'FTFT'}, {'test':'FFFF'}, {'test':'FFFT'}, {'test':'FTTTT'}, 
         {'test2':[1,2,3,1]}]

for fact in facts:
    try:
        assert_fact(TEST, fact )
        retract_fact(TEST, fact )
    except MessageNotHandledException as error:
        pass
    

[TF]*T FTFT
[TF]*T FTFT
missed FTFT
missed FFFF
[TF]*T FFFT
[TF]*T FFFT
missed FFFT
F[T]* FTTTT
[TF]*[T]{3} FTTTT
[T]{4} FTTTT
[TF]*T FTTTT
[TF]*T FTTTT
missed FTTTT


In [34]:
"TTTT".count("T"), "TFTT".count("T")

(4, 3)

In [7]:
dir(capture[0])

['__add__',
 '__and__',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__div__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattr__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__mul__',
 '__ne__',
 '__neg__',
 '__new__',
 '__or__',
 '__pos__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__sub__',
 '__subclasshook__',
 '__truediv__',
 '__weakref__',
 '_left',
 '_op',
 '_right',
 '_type',
 'alias',
 'allItems',
 'anyItem',
 'define',
 'imatches',
 'matches']

In [21]:
dir(capture[0].matches)

['__call__',
 '__class__',
 '__delattr__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__func__',
 '__ge__',
 '__get__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__self__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__']

In [11]:
capture.value

AttributeError: 'list' object has no attribute 'value'

In [None]:
capture[0].allItems