In [1]:
import os
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.getcwd())))

# setup stuff.  The cell just supports the workbook, you can ignore it

EXAMPLE_DO_FOLDER = os.path.join(os.getcwd(), "example_do_folder")
INSERTION_FOLDER = EXAMPLE_DO_FOLDER

def show_loadable(at):
    with open(os.path.join(INSERTION_FOLDER, at)) as file:
        contents = file.read()
    with_bar = "\n  | ".join(contents.split("\n"))
    print(F"\n### SHOWING MODULE {'.../'+at!r}\n  | {with_bar}\n\n")

def run_do(*args, **kwargs):
    parts = [repr(x) for x in args] + [F"{k}={v!r}" for k, v in kwargs.items()]
    print(F"do({', '.join(parts)})")
    result = do(*args, **kwargs)
    print(F"--> {result!r}")
    print()


print(F"### INSERTION FOLDER        = {INSERTION_FOLDER!r}")
print(F"### DO FOLDER (in Jupyter)  = {EXAMPLE_DO_FOLDER}")
sys.path.append(os.path.dirname(os.path.dirname(EXAMPLE_DO_FOLDER)))
from ml_dat import do, DoManager, dat_config   # Add all loadables BEFORE loading this module
do.set_do_folder(dat_config.do_folder)
print(F"## DO FOLDER (in do module) = {do.do_folder!r}")
if EXAMPLE_DO_FOLDER != do.do_folder:
    print(f"WARNING: EXAMPLE_DO_FOLDER used by jupyter = {EXAMPLE_DO_FOLDER} does not match {do.do_folder}")
if not os.path.exists(INSERTION_FOLDER):
    input(f"WARNING: INSERTION_FOLDER {INSERTION_FOLDER!r} not found.")
    os.makedirs(INSERTION_FOLDER)
print("\n\n\n\n")




### INSERTION FOLDER        = '/Users/oblinger/ob/proj/ml-dat/examples/example_do_folder'
### DO FOLDER (in Jupyter)  = /Users/oblinger/ob/proj/ml-dat/examples/example_do_folder
## DO FOLDER (in do module) = '/Users/oblinger/ob/proj/ml-dat/examples/example_do_folder'







# Mapping dotted-strings to python source code objects.
The "do.load" function provides a simple way to load functions and data directly from
python source modules.


### Registering and accessing python objects

In [2]:
do.register_value("foo.bar.baz", [11, "two"])

In [3]:
do.load("foo.bar.baz")

[11, 'two']

.
##### And you can see semantically this is just a dict-tree of values:

In [5]:
do.load("foo.bar")   

Exception: DO: Base 'foo' is not defined.

### Registering python modules
Here we register a python module, then load data a functions from it via dotted.names.

In [6]:
path = f"{EXAMPLE_DO_FOLDER}/some_python_file.py"
print(f"here we are registering 'params' as {path!r}.")
do.register_module("params", path)

here we are registering 'params' as '/Users/oblinger/ob/proj/ml-dat/examples/example_do_folder/some_python_file.py'.


In [7]:
do.load("params.a_value")

{'alpha': 111}

In [8]:
fn = do.load("params.a_function")
fn()

'Hello from a function in a python file'

In [9]:
# And we can see that the do function is just a wrapper around the do.load.  This performs the same function as the above cell.
do("params.a_function")

'Hello from a function in a python file'

### DATCONFIG - Implicitly registered modules
'Do' will scan from CWD to find '.datconfig' and use the do_folder it specifies.  At load time this folder tree is scanned and all .py .json and .yaml files found are implicitly registered according to the basename of each file.

In [13]:
show_loadable("../.datconfig")


### SHOWING MODULE '.../../.datconfig'
  | {
  |     "do_folder": "example_do_folder",
  |     "inst_data_folder": "example_inst_data_folder"
  | }
  | 




In [18]:
show_loadable("hello/do_examples/my_data.py")



### SHOWING MODULE '.../hello/do_examples/my_data.py'
  | 
  | from ml_dat import do
  | 
  | my_data = [111, 222, 333]
  | 
  | message = "Hello from my_data!"
  | 
  | 
  | def my_function():
  |     print("Hello from my_function!")
  |     return 123
  | 
  | 
  | a_tree = {
  |     "a": do.load("my_yaml_data"),
  |     "b": 2,
  |     "c": {
  |         "d": my_function,
  |         "e": 4,
  |         "f": {
  |             "g": 5,
  |             "h": 6,
  |             "i": 7
  |         }
  |     }
  | }
  | 
  | 




#### Loading from implicitly defined modules
These values are all loaded from python and yaml files implicitly registered since they are contained under the do_folder.

In [19]:
do.load("my_data.message")    # Returns the global variable 'message' from the my_data.py file.

'Hello from my_data!'

In [20]:
do.load("my_data.a_tree.b")   # Returns the value of the nested variable 'b' from the 'a_tree' dictionary in my_data.py

2

In [21]:
#
# But under the covers, this tree of values is really still just data contained in some python module.  This module can be accessed directly if needed:
do.load("my_data")

[111, 222, 333]

In [22]:
show_loadable("my_yaml_data.yaml")

FileNotFoundError: [Errno 2] No such file or directory: '/Users/oblinger/ob/proj/ml-dat/examples/example_do_folder/my_yaml_data.yaml'

In [23]:
do.load("my_yaml_data.two")   # loading within a sub-structure

['gamma', 'delta']

In [24]:
do.load("my_data.a_tree.a.two")  # Same data included into another structure

['gamma', 'delta']

# CORE "DO" FUNCTIONALITY -- Dynamically Loaded Function

The do function provides efficient access to dynamically searched and loaded python functions:
1. that are referenced by a naming string
2. that are dynmaically loaded from a python module
3. that accept fixed & keyword arguments and return results as any function does

.
## EXAMPLE -- A SIMPLEST "DO" CALL
This "do" loads hello_world.py and runs the function hello_world from it.

In [25]:
show_loadable("hello_world.py")

FileNotFoundError: [Errno 2] No such file or directory: '/Users/oblinger/ob/proj/ml-dat/examples/example_do_folder/hello_world.py'

In [26]:
do("hello_world")

   hello world!


.
## EXAMPLE -- MULTIPLE DO FUNCTIONS DEFINED IN ONE MODULE
One can put multiple do functions in one file and reference them with a dot notion as shown here.

In [29]:
show_loadable("hello/do_examples/hello_again.py")


### SHOWING MODULE '.../hello/do_examples/hello_again.py'
  | def hello_again():
  |     print("   Hello World again!")
  | 
  | 
  | def hella():
  |     print("   HELLA Hello World!!!  Hello world again.")
  | 
  | 
  | def salutation(name="Hello", *, emphasis=False, lucky_number=999):
  |     line = F"   {name}, My lucky number is {lucky_number}"
  |     print(F"{line.upper()}!" if emphasis else line)
  |     return lucky_number
  | 




In [31]:
do("hello_again.hella")

   HELLA Hello World!!!  Hello world again.


.
## EXAMPLE -- PASSING ARGS AND RESULTS
Here we see fixed and keyword args being forwarded by do to the underlying function.
And likewise its result is forward back to be the result of the do call.

In [32]:
do("hello_again.salutation", "Michael", emphasis=True)

   MICHAEL, MY LUCKY NUMBER IS 999!


999

.
## EXAMPLE -- ALL SUB-FOLDERS ARE SCANNED FOR "DO" FUNCTION
Here we see "deep_hello" is called even when it occurs deeply within the folder tree.

In [37]:
show_loadable("hello/do_examples/deep/deep/deep/deep_hello.py")


### SHOWING MODULE '.../hello/do_examples/deep/deep/deep/deep_hello.py'
  | def deep_hello():
  |     print("hello echoing out from deep in the filesystem!")
  | 




In [38]:
do("deep_hello")

hello echoing out from deep in the filesystem!


.
.
# >>> CALLING A CONFIGURATION <<<
In addition to invoking a simple function, "do" can also invoke a configuration dict.
In this case:
1. The dict is expanded by recursively looking up "main.base" and using its values tree as defaults
2. Then finally calling the function associated with "main.do"
3. The expanded dict is passed as the first arg followed by args passed to do

.
## EXAMPLE -- CALLING A CONFIG
Here 'hello_config' loads a json file instead of a python function.
In this case the "main.do" value of "hello config action" is loaded and called.

In [39]:
show_loadable("hello_config.json")

FileNotFoundError: [Errno 2] No such file or directory: '/Users/oblinger/ob/proj/ml-dat/examples/example_do_folder/hello_config.json'

In [40]:
show_loadable("hello/do_examples/configurable_salutation.py")


### SHOWING MODULE '.../hello/do_examples/configurable_salutation.py'
  | def configurable_salutation(spec, name=None, *, emphasis=False, lucky_number=None):
  |     name = spec.get("name") if name is None else name
  |     emphasis = spec.get("emphasis") or emphasis
  |     lucky_number = spec.get("lucky_number") or lucky_number
  |     line = F"   {name}, My lucky number is {lucky_number}"
  |     print(F"{line.upper()}!" if emphasis else line)
  |     return lucky_number
  | 




In [41]:
do("hello_config", "Martin")

   Martin, My lucky number is 7


7

.
## EXAMPLE -- CONFIG INHERITANCE
Here 'hello_shadowing_config' sets lucky_number to 777 and inherits function to call and other parameters from 'hello_config'.

In [42]:
show_loadable("hello/do_examples/hello_shadowed_config.json")


### SHOWING MODULE '.../hello/do_examples/hello_shadowed_config.json'
  | {
  |   "main": {
  |     "base": "hello_config" },
  |   "lucky_number":  777
  | }
  | 




In [43]:
do("hello_shadowed_config")

   Hello, My lucky number is 777


777

_
## EXAMPLE -- COMBINING CONFIGS AND CODE
Complex tools (including nearly a visualizers/report generators) naturally have simple config info best expressed as a config dict,
and complex config best expressed in python.  Forcing these to be separate loadables will generate confusing sea of many tiny 
separate 2-line loadable files.

To address this "do" allows config data (normally stored in .json) to be stored in a variable in a .py file.  This allows that
config info to be bundled with functions that are referenced by that same config in the same module.  

The example below shows a silly complex tool that applies a sequence to text transformation rules to a sequence of letters.
The first loadable provides a config with the base parameters and the rule engine itself.  The second loadable configures the tool and 
provides a couple of small python rule functions that are used by the configuration all nicely wrapped up in a single .py file.

In [45]:
show_loadable("hello/do_examples/letterator.py")


### SHOWING MODULE '.../hello/do_examples/letterator.py'
  | from ml_dat import do
  | 
  | """Silly configurable tool for applying rules to a sequence of letters."""
  | letterator = {
  |     "main": {
  |       "do": "letterator.run",      # example of a complex tool config
  |       "title": "The Letterator"
  |     },   
  |     "start": 48,
  |     "end": 122
  | }
  | 
  | 
  | def run(spec):
  |     results = []
  |     for idx in range(spec["start"], spec["end"]):
  |         text = chr(idx)
  |         for step, rule_name in spec["rules"]:
  |             fn = do.load(rule_name)
  |             if idx % step == 0:
  |                 text = fn(idx, text)
  |         results.append(text)
  |     print(spec["main"]["title"])
  |     return "  ".join(results)
  | 




In [48]:
show_loadable("hello/do_examples/my_letters.py")


### SHOWING MODULE '.../hello/do_examples/my_letters.py'
  | my_letters = {
  |   "main": {"base": "letterator"},
  |   "start": 97,
  |   "rules": [
  |     (7, "my_letters.jackpot"),
  |     (3, "my_letters.triple_it"),
  |     (5, "my_letters.all_caps_it")]
  | }
  | 
  | 
  | def triple_it(_idx, text):
  |     return F"{text}{text}{text}"
  | 
  | 
  | def all_caps_it(_idx, text):
  |     return text.upper()
  | 
  | 
  | def jackpot(_idx, _text):
  |     return "jackpot "
  | 




In [49]:
do("my_letters")

The Letterator


'a  jackpot   ccc  D  e  fff  g  h  JACKPOT JACKPOT JACKPOT   j  k  lll  m  N  ooo  jackpot   q  rrr  S  t  uuu  v  jackpot   XXX  y'

.
.
# USE CASE - SELF DOCUMENTING PROCESSES
When possible we can use simple verisoned object to help us execute coding processes, and 
track/maintain those processes.  

.
## EXAMPLE -- Loadable constant
Here we show that a loadable can be any python constant data value.
In this example we have a set of named lists that are used to track 
our supported dataset, metrics, and tools.

This versioned data structure is used as input by the 'naughtly_list' script that scans
supported components to see that each has (1) a doc string, both quick and full regression tests, etc.


In [50]:
show_loadable("hello/do_examples/supported.yaml")


### SHOWING MODULE '.../hello/do_examples/supported.yaml'
  | datasets:
  | - regression_games # Any game referenced by any regression test MUST be listed here
  | - baller10  # Default dataset use by all basketball metrics
  | - volley10
  | - arron4    # Examples of higher resolution games
  | metrics:
  | - team_highlight.money  # Jason agreed money metric for team highlights 
  | - team_highlight.precision # Just the precision portion of this metric
  | - player_highlight.money # Jason agreed, include player ID
  | - basket_stats.money # Jason agree, metric for points, player, make-miss stats
  | - p_metric # used in 2022
  | 




In [51]:
show_loadable("hello/do_examples/team_highlight.py")


### SHOWING MODULE '.../hello/do_examples/team_highlight.py'
  | """
  | Team highlight money is the F1 where correctness is tied to correctly assessing shot 
  | attempt and make-miss, without consideration of player-ID nor number of points scored.
  | 
  | This is the Jason approved metric associated with our team highlights product, and we
  | have agreed 80% is the minimum approved threshold required for product ship.
  | """
  | 
  | from ml_dat import do, Inst
  | 
  | 
  | def reg_quick_test():
  |     run_result = Inst.load("reg1_latest")   # Reg1 pickle for a special 5-min snipit
  |     assert do("team_highlight_money", run_result) > .65
  | 
  | 
  | reg_full_test = "std_full1"  # indicates full regression testing is part of 'std_full1'
  | 
  | 
  | def money(_run_result: Inst) -> float:
  |     return -1  # implementation goes here
  | 
  |     
  | def precision(_run_result: Inst) -> float:
  |     return -1  # implementation goes here
  | 




In [52]:
show_loadable("hello/do_examples/naughty_list.py")


### SHOWING MODULE '.../hello/do_examples/naughty_list.py'
  | """
  | The "naughty list" scans all supported datasets, metrics, visualization/debugging tools 
  | and verifies they are (1) properly documented, (2) they execute their full regressions, 
  | (3) The continue to run against representative games, metrics, tools.
  | 
  | Any metric, tool, dataset that is not fully compliant is indicated on the naughty list.
  | """
  | 
  | from ml_dat import load_inst
  | 
  | 
  | def naughty_list():
  |     # this double for loop checks docs exist, regression test exists, and passes etc.
  |     for section, supported_insts in load_inst("supported").items():
  |         for name in supported_insts:
  |             inst = load_inst(name)
  |             if not hasattr(inst, "__DOC__"):
  |                 print(F"   {section} {name} does not have a valid doc string")
  |             # if not hasattr(inst, ):
  |             #     print(F"   {section} {name} does not have a valid doc str

In [53]:
# Note this code presently does not run


from dat.inst import Inst
from dat.do import load

reg1_name = load("supported")["datasets"][0]   # Gets then name of a mcproc result to use
reg1 = Inst(spec={}, path=".")     # This should be Inst.load(reg1) but that inst does not exist here
score = do("team_highlight.money", reg1)  # computes money metric on reg1

# do("naughty_list")   # runs our checking code

ModuleNotFoundError: No module named 'dat'

.
.
# USING DO FROM THE COMMAND LINE
Do encapsulates execution as a self describing building block.  The do function is designed to be easily 
embedded within larger execution scripts.  In some cases it is convenient for a user to directly invoke do
as a toplevel command.  The do commandline interface provide command line support "for free" for any such 
do function. It defines a simple mapping from expected --arguments and -a argument onto Python fixed and kwargs.
This probably best shown using a series of examples:

.
### EXAMPLE -- Showing default usage command for 

In [54]:
!./do --usage


SYNOPSIS
    do CMD_NAME FIXED_ARGS ... KEYWORD_ARG ...
    do KEY_WORD_ARGS  ...  CMD_NAME FIXED_ARGS ...

    do --usage
    do --get DOTTED.KEY
    do --set DOTTED.KEY=VALUE
    do --sets "DOTTED.KEY1=VALUE1, DOTTED.KEY2=VALUE2"

DESCRIPTION
    Executes the do command named by CMD_NAME.
    
    --usage     Prints the command-specific usage info if it exists
    
    --USAGE     Prints this usage message
    
    --print     Prints the python do call with args, but does not call it.
    
    --get DOTTED.NAME
                Expands the config for a command and returns an arg from it
    
    --set DOTTED.NAME=VALUE
    --sets DOTTED.NAME1=VALUE1,DOTTED.NAME2=VALUE2,...
                Expands the config for a command and updates the indicated
                config parameters before invoking the indicated command

NOTES
    Per standard UNIX 'getopt' parameter parsing two dashes ("--")
    can be used to terminate keyword arguments and cause all remai

.
### EXAMPLE -- INVOKING A DO FUNCTION FROM THE COMMAND LINE
Earlier we had hello salutation that took fixed and keyword args.
Without additonal configuration we can invoke it from the command line
using UNIX style args and flags as shown here:

In [55]:
!./do hello_again.salutation Maxim --emphasis

   MAXIM, MY LUCKY NUMBER IS 999!
999


.
### EXAMPLE -- INVOKING A CONFIGURED TOOL FROM THE COMMAND LINE
In this example we show one also can invoke a do configuration from the command line as well.
Here we have the same configurable "letterator" tool invoked as a do function above:

In [56]:
show_loadable("hello/do_examples/my_letters.py")


### SHOWING MODULE '.../hello/do_examples/my_letters.py'
  | my_letters = {
  |   "main": {"base": "letterator"},
  |   "start": 97,
  |   "rules": [
  |     (7, "my_letters.jackpot"),
  |     (3, "my_letters.triple_it"),
  |     (5, "my_letters.all_caps_it")]
  | }
  | 
  | 
  | def triple_it(_idx, text):
  |     return F"{text}{text}{text}"
  | 
  | 
  | def all_caps_it(_idx, text):
  |     return text.upper()
  | 
  | 
  | def jackpot(_idx, _text):
  |     return "jackpot "
  | 




In [57]:
!./do my_letters

The Letterator
a  jackpot   ccc  D  e  fff  g  h  JACKPOT JACKPOT JACKPOT   j  k  lll  m  N  ooo  jackpot   q  rrr  S  t  uuu  v  jackpot   XXX  y


### EXAMPLE -- TWEAK CONFIG FROM COMMANDLINE
Often we script and configure a complex test, but then we want to tweak one or two parameters over and over and check our results.
(This becomes especially powerful when intermediate results are cached, so retesting is fast.)

In [58]:
!./do my_letters --set main.title "Re-configured letterator" --json rules '[[2, "my_letters.triple_it"]]'

Re-configured letterator
a  bbb  c  ddd  e  fff  g  hhh  i  jjj  k  lll  m  nnn  o  ppp  q  rrr  s  ttt  u  vvv  w  xxx  y


.
### EXAMPLE -- SETTING MULTIPLE PARAMETERS AT ONCE
The --sets keyword can perform multiple simple assigments at once

In [59]:
!./do my_letters --sets main.title=Quickie,start=100,end=110

Quickie
D  e  fff  g  h  JACKPOT JACKPOT JACKPOT   j  k  lll  m
