In [34]:
import pandas as pd
from pathlib import Path
import sys

sys.path.insert(0, str(Path.cwd().parent) + "/common/")  # adds parent directory

from audit import AuditLogReader, AuditLogRenderer

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [35]:
log_dir = Path("/home/mike/workspace/soa-ilec/soa-ilec/data/workspaces/test_agent_7/")
log_rdr = AuditLogReader(log_dir)

In [38]:
import json 

sql_log = log_rdr.traverse_sql_audit_log()
final_model_log, all_model_logs, all_by_time = log_rdr.traverse_model_audit_log()


In [40]:
au = AuditLogRenderer(log_dir)
audit_html = au.render()
print(audit_html)

<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@6.6.0/css/fontawesome.min.css" integrity="sha384-NvKbDTEnL+A8F/AA5Tc5kmMLSJHUO868P+lDtTpJIeQdGYaUIuLr4lVGOEA1OcMy" crossorigin="anonymous">

<h2>SQL Log</h2>

<details>
    <summary>Basic dataset size and exposure/death totals; check for nonpositive exposure</summary>
    <pre>select 
  count(*) as n_rows,
  sum(case when ExpDeathQx2015VBTwMI_byPol &lt;= 0 or ExpDeathQx2015VBTwMI_byPol is null then 1 else 0 end) as n_bad_exposure,
  sum(Number_Of_Deaths) as total_deaths,
  sum(ExpDeathQx2015VBTwMI_byPol) as total_exposure
from UL_MODEL_DATA_SMALL
LIMIT 1000;</pre>
</details>

<details>
    <summary>Counts and totals by DATASET, Gender, Smoker_Status to check balance and coverage</summary>
    <pre>select 
  DATASET, Gender, Smoker_Status,
  count(*) as n,
  sum(ExpDeathQx2015VBTwMI_byPol) as exposure,
  sum(Number_Of_Deaths) as deaths
from UL_MODEL_DATA_SMALL
group by DATASET, Gender, Smoker_Status


In [41]:
display(HTML(audit_html))

In [24]:
from IPython.display import display, HTML
from jinja2 import Environment, FileSystemLoader, select_autoescape


# 1) Point Jinja at your templates folder (e.g., ./templates)
env = Environment(
    loader=FileSystemLoader("/home/mike/workspace/soa-ilec/soa-ilec/common/templates/"),
    autoescape=select_autoescape(["html", "xml"])
)

# 2) Load a template file (e.g., templates/page.html)
audit_template = env.get_template("audit_log.html")

# 3) Render it with context
audit_html = audit_template.render(
    sql_log = sql_log,
    cmd_log = all_by_time,    
)

display(HTML(audit_html))

In [25]:
print(audit_html)

<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@6.6.0/css/fontawesome.min.css" integrity="sha384-NvKbDTEnL+A8F/AA5Tc5kmMLSJHUO868P+lDtTpJIeQdGYaUIuLr4lVGOEA1OcMy" crossorigin="anonymous">

<h2>SQL Log</h2>

<details>
    <summary>Count rows and TRAIN/TEST split</summary>
    <pre>select count(*) as n_rows, sum(case when DATASET=&#39;TRAIN&#39; then 1 else 0 end) as n_train, sum(case when DATASET=&#39;TEST&#39; then 1 else 0 end) as n_test from UL_MODEL_DATA_SMALL limit 1000</pre>
</details>

<details>
    <summary>Check exposure quality and summary stats</summary>
    <pre>select 
  sum(case when ExpDeathQx2015VBTwMI_byPol &lt;= 0 or ExpDeathQx2015VBTwMI_byPol is null then 1 else 0 end) as n_bad_exposure,
  min(ExpDeathQx2015VBTwMI_byPol) as min_exposure,
  max(ExpDeathQx2015VBTwMI_byPol) as max_exposure,
  sum(ExpDeathQx2015VBTwMI_byPol) as tot_exposure,
  sum(Number_Of_Deaths) as tot_deaths,
  sum(Number_Of_Deaths)/nullif(sum(ExpDeathQx2015VBTw

In [19]:
all_by_time

[{'workspace_id': '845c4a85-8599-4af7-a95d-1b619b953af2',
  'type': 1,
  'entry': None},
 {'workspace_id': 'a34c210b-7558-4fe3-963f-1afdb70bad2d',
  'type': 2,
  'entry': {'last_workspace_id': '845c4a85-8599-4af7-a95d-1b619b953af2',
   'workspace_id': 'a34c210b-7558-4fe3-963f-1afdb70bad2d',
   'tool_name': 'cmd_create_dataset',
   'args': ['model_data_train',
    "select \n  DATASET,\n  Gender as GENDER,\n  case \n    when Smoker_Status is null or upper(Smoker_Status)='UNKNOWN' then 'Unknown'\n    when upper(Smoker_Status)='SMOKER' then 'Smoker'\n    when upper(Smoker_Status)='NONSMOKER' then 'NonSmoker'\n    else Smoker_Status end as SMOKER_STATUS,\n  Attained_Age as ATTAINED_AGE,\n  Number_Of_Deaths as NUMBER_OF_DEATHS,\n  ExpDeathQx2015VBTwMI_byPol as EXPDEATHQX2015VBTWMI_BYPOL\nfrom UL_MODEL_DATA_SMALL\nwhere DATASET='TRAIN' and ExpDeathQx2015VBTwMI_byPol > 0"],
   'result': {'success': True, 'result': {'n_rows': 716.0}}}},
 {'workspace_id': '4ddd428d-5fbc-46d5-9fc0-2268847fe91a',


In [24]:
import shutil

workspace_dir = Path("/home/mike/workspace/soa-ilec/soa-ilec/data/workspaces/ul_model_data/")
img_dir = workspace_dir / "plots"

if not img_dir.exists():
    img_dir.mkdir(exist_ok=True)

for i in list(workspace_dir.rglob("*.png")):

    ws_dir = i.parent.name
    _, ws_id = ws_dir.split("_")

    img_path = img_dir / f"{ws_id}.png"
    shutil.copy(i, img_path)



In [22]:
ws_dir.split("_")

['workspace', 'cd8f0950-99b7-4095-8b33-0d6869eada2a']