In [5]:
import pandas as pd
from pathlib import Path
import sys

sys.path.insert(0, str(Path.cwd().parent / "common"))  # common
sys.path.insert(0, str(Path.cwd().parent / "mcp"))  # mcp

from agent import AssumptionsAgent
from prompt import ModelingPrompt
from app_shared import Database, AppSession
from ilec_mcp_server import create_REnv
from ilec_r_lib import AgentRCommands as RCmd
from env_vars import DEFAULT_AGENT_WORK_DIR

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [6]:
agent = AssumptionsAgent()

## Canary

In [8]:
print(agent.prompt("canary_agent", "How many differerent insurance plan types are there, list them"))

INFO:httpx:HTTP Request: POST http://127.0.0.1:9090/mcp/ "HTTP/1.1 307 Temporary Redirect"
INFO:httpx:HTTP Request: POST http://127.0.0.1:9090/mcp "HTTP/1.1 200 OK"
INFO:mcp.client.streamable_http:Received session ID: 8abec0b9d536437eb41ab358072659e3
INFO:mcp.client.streamable_http:Negotiated protocol version: 2025-06-18
INFO:httpx:HTTP Request: POST http://127.0.0.1:9090/mcp/ "HTTP/1.1 307 Temporary Redirect"
INFO:httpx:HTTP Request: GET http://127.0.0.1:9090/mcp/ "HTTP/1.1 307 Temporary Redirect"
INFO:httpx:HTTP Request: POST http://127.0.0.1:9090/mcp "HTTP/1.1 202 Accepted"
INFO:httpx:HTTP Request: GET http://127.0.0.1:9090/mcp "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://127.0.0.1:9090/mcp/ "HTTP/1.1 307 Temporary Redirect"
INFO:httpx:HTTP Request: POST http://127.0.0.1:9090/mcp "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/traces/ingest "HTTP/1.1 204 No Content"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/responses "HTTP/1.1 200 OK

There are 7 distinct insurance plan types across the datasets (ILEC_DATA and UL_MODEL_DATA):
- Perm
- VLSG
- Term
- UL
- VL
- Other
- ULSG


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/traces/ingest "HTTP/1.1 204 No Content"


## Modeling

### Prepare Modeling Data

In [7]:
with Database.get_duckdb_conn() as conn:
    res = conn.execute("PRAGMA table_info(ILEC_DATA)").fetchall()
res

[(0, 'Observation_Year', 'INTEGER', False, None, False),
 (1, 'Preferred_Indicator', 'VARCHAR', False, None, False),
 (2, 'Gender', 'VARCHAR', False, None, False),
 (3, 'Smoker_Status', 'VARCHAR', False, None, False),
 (4, 'Insurance_Plan', 'VARCHAR', False, None, False),
 (5, 'Issue_Age', 'INTEGER', False, None, False),
 (6, 'Duration', 'INTEGER', False, None, False),
 (7, 'Attained_Age', 'INTEGER', False, None, False),
 (8, 'Age_Basis', 'VARCHAR', False, None, False),
 (9, 'Face_Amount_Band', 'VARCHAR', False, None, False),
 (10, 'Issue_Year', 'INTEGER', False, None, False),
 (11, 'Number_Of_Preferred_Classes', 'DECIMAL(11,1)', False, None, False),
 (12, 'Preferred_Class', 'VARCHAR', False, None, False),
 (13, 'SOA_Anticipated_Level_Term_Period', 'VARCHAR', False, None, False),
 (14, 'SOA_Guaranteed_Level_Term_Period', 'VARCHAR', False, None, False),
 (15, 'SOA_Post_level_Term_Indicator', 'VARCHAR', False, None, False),
 (16, 'Select_Ultimate_Indicator', 'VARCHAR', False, None, False

In [8]:
#predictors = list(map(lambda x: x[1], res[1:13]))
predictors = ["Gender", "Attained_Age", "Smoker_Status"]
predictor_columns = ",".join(predictors)
target_column = "Number_Of_Deaths"
offset_column = "ExpDeathQx2015VBTwMI_byPol"

predictor_columns

'Gender,Attained_Age,Smoker_Status'

In [9]:
MODEL_DATA_QUERY = f"""
create or replace view UL_MODEL_DATA_SMALL as (
    with obs_data as (
        select
            {predictor_columns},
            {target_column},
            {offset_column},
            case 
                when Observation_Year < 2016 then 'TRAIN'
                else 'TEST'
            end as DATASET
        from ILEC_DATA
        where Insurance_Plan = 'UL'
    )
    select
        DATASET,
        {predictor_columns},
        sum(coalesce({target_column}, 0)) as {target_column},
        sum(coalesce({offset_column}, 0)) as {offset_column}
    from obs_data
    group by DATASET, {predictor_columns}
)
"""

with Database.get_duckdb_conn(read_only=False) as conn:
    vw_res = conn.execute(MODEL_DATA_QUERY).fetchall()
vw_res



[]

In [10]:
with Database.get_duckdb_conn() as conn:
    vw_res = conn.execute("select * from UL_MODEL_DATA_SMALL limit 1").fetchall()
vw_res

[('TRAIN', 'Male', 8, 'NonSmoker', 3.0, 1.934050844552496)]

### Run Modeling Prompt

In [11]:
modeling_prompt = ModelingPrompt(
    "UL_MODEL_DATA_SMALL",
    predictors,
    target_column,
    offset_column
)

str(modeling_prompt)

'The goal is to create a model to predict mortality on the sql table \'UL_MODEL_DATA_SMALL\'.First, call sql_schema() with \'UL_MODEL_DATA_SMALL\' as the table_name argument.Use the column \'Number_Of_Deaths\' as the target (y_var) and column \'ExpDeathQx2015VBTwMI_byPol\' as the offset (offset_var), including in calls to cmd_rpart() and cmd_glmnet().If either the target or y_var columns are not present in \'UL_MODEL_DATA_SMALL\', fail and report your findings.Perform exploratory data analysis on UL_MODEL_DATA_SMALL using sql_query(). Use the EDA results in model design when possible.These columns in UL_MODEL_DATA_SMALL are all valid model features: Gender,Attained_Age,Smoker_Status.You may perform basic feature engineering via binning continuous variables as categorical or ordinal, but nothing else.Ensure any basic feature engineering tasks are included in sql argument for cmd_create_dataset().But when possible, rely on the design_matrix_vars argument in cmd_glmnet() for feature engin

In [12]:
agent_response = agent.prompt("ul_data_small_agent", str(modeling_prompt))

INFO:httpx:HTTP Request: POST http://127.0.0.1:9090/mcp/ "HTTP/1.1 307 Temporary Redirect"
INFO:httpx:HTTP Request: POST http://127.0.0.1:9090/mcp "HTTP/1.1 200 OK"
INFO:mcp.client.streamable_http:Received session ID: 33b9ac91f8fe4837859730d4e7eb1ec4
INFO:mcp.client.streamable_http:Negotiated protocol version: 2025-06-18
INFO:httpx:HTTP Request: POST http://127.0.0.1:9090/mcp/ "HTTP/1.1 307 Temporary Redirect"
INFO:httpx:HTTP Request: GET http://127.0.0.1:9090/mcp/ "HTTP/1.1 307 Temporary Redirect"
INFO:httpx:HTTP Request: POST http://127.0.0.1:9090/mcp "HTTP/1.1 202 Accepted"
INFO:httpx:HTTP Request: GET http://127.0.0.1:9090/mcp "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://127.0.0.1:9090/mcp/ "HTTP/1.1 307 Temporary Redirect"
INFO:httpx:HTTP Request: POST http://127.0.0.1:9090/mcp "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/traces/ingest "HTTP/1.1 204 No Content"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/responses "HTTP/1.1 200 OK

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/traces/ingest "HTTP/1.1 204 No Content"


In [13]:
with Database.get_session_conn() as conn:
    session = AppSession(conn)
    work_dir = Path(session["MCP_WORK_DIR"])
    with open(work_dir / "response.md", "w") as fh:
        fh.write(agent_response)


### Test Web Handler

In [28]:
! curl "http://127.0.0.1:8085/start_agent?agent_name=ul_data_small_agent&model_data_view=UL_MODEL_DATA_SMALL&target_var=Number_Of_Deaths&offset_var=ExpDeathQx2015VBTwMI_byPol"

{"agent_name":"ul_data_small_agent","agent_status":"COMPLETE","model_data_view":"UL_MODEL_DATA_SMALL","target_var":"Number_Of_Deaths","offset_var":"ExpDeathQx2015VBTwMI_byPol"}