In [1]:
import pandas as pd
from pathlib import Path
import sys

sys.path.insert(0, str(Path.cwd().parent))  # adds parent directory
from experiments_lib import prompt_ilec_data, set_context_window_size, get_context_window

%load_ext autoreload
%autoreload 2

## Setup LLM params

In [2]:
set_context_window_size(20)

In [3]:
get_context_window()

deque([], maxlen=20)

# Actuary

An actuary is trying to determine the best expected basis across all plan types, and is aware that there are different 

* Expected_Death_QX7580E_by_Amount
* Expected_Death_QX2001VBT_by_Amount
* Expected_Death_QX2008VBT_by_Amount
* Expected_Death_QX2008VBTLU_by_Amount
* Expected_Death_QX2015VBT_by_Amount
* Expected_Death_QX7580E_by_Policy
* Expected_Death_QX2001VBT_by_Policy
* Expected_Death_QX2008VBT_by_Policy
* Expected_Death_QX2008VBTLU_by_Policy
* Expected_Death_QX2015VBT_by_Policy
* ExpDeathQx2015VBTwMI_byPol
* ExpDeathQx2015VBTwMI_byAmt


In [4]:
prompt = "Provide a list of preferred class structures for each plan type using Number_of_preferred classes. "\
    "Split out smoker vs. non-smoker classes. Provide total exposure for each split."\
    " The final table should have PlanType, Smoker_Status, Number Of Pref Classes, and Exposure as columns."

print(prompt_ilec_data(prompt))

Running request...
I can do that. Quick clarification before I run the query:

1. Which exposure measure do you want summed into the Exposure column — Policies_Exposed (number of policies) or Amount_Exposed (dollar exposure)?  
2. Do you want to include rows where Number_Of_Preferred_Classes is NULL (or zero), or only positive/non-null values?

If you’re fine with defaults, I will:
- Use Insurance_Plan as PlanType,
- Use Smoker_Status for smoker split,
- Use Number_Of_Preferred_Classes for the class count,
- Sum Policies_Exposed as Exposure,
- Exclude rows where Number_Of_Preferred_Classes is NULL.

Tell me if you prefer Amount_Exposed instead or want NULL/zero class counts included.


PlanType | Smoker_Status | Number Of Pref Classes | Exposure (Policies_Exposed)
--- | ---: | ---: | ---:
Other | NonSmoker | 0 | 63,559.65
Other | NonSmoker | 2 | 36,913.74
Other | NonSmoker | 3 | 136,995.01
Other | NonSmoker | 4 | 31,971.91
Other | Smoker | 0 | 8,257.71
Other | Smoker | 2 | 12,705.66
Perm | NonSmoker | 0 | 55,329,569.24
Perm | NonSmoker | 2 | 10,043,440.40
Perm | NonSmoker | 3 | 6,543,448.22
Perm | NonSmoker | 4 | 227,007.41
Perm | Smoker | 0 | 12,154,543.89
Perm | Smoker | 2 | 1,425,305.74
Term | NonSmoker | 0 | 23,743,212.10
Term | NonSmoker | 2 | 33,706,067.68
Term | NonSmoker | 3 | 48,064,637.20
Term | NonSmoker | 4 | 55,388,798.23
Term | Smoker | 0 | 4,700,383.48
Term | Smoker | 2 | 5,888,067.39
UL | NonSmoker | 0 | 32,383,736.77
UL | NonSmoker | 2 | 7,429,450.89
UL | NonSmoker | 3 | 4,117,782.92
UL | NonSmoker | 4 | 875,172.89
UL | Smoker | 0 | 6,225,292.19
UL | Smoker | 2 | 924,686.06
ULSG | NonSmoker | 0 | 2,797,162.94
ULSG | NonSmoker | 2 | 1,418,931.99
ULSG | NonSmoker | 3 | 7,505,527.11
ULSG | NonSmoker | 4 | 4,176,088.01
ULSG | Smoker | 0 | 707,734.14
ULSG | Smoker | 2 | 658,868.10
VL | NonSmoker | 0 | 8,212,160.59
VL | NonSmoker | 2 | 4,855,840.23
VL | NonSmoker | 3 | 1,686,071.14
VL | NonSmoker | 4 | 348,179.99
VL | Smoker | 0 | 1,518,753.95
VL | Smoker | 2 | 442,772.71
VLSG | NonSmoker | 0 | 1,797,637.65
VLSG | NonSmoker | 2 | 2,218,322.83
VLSG | NonSmoker | 3 | 1,510,817.00
VLSG | NonSmoker | 4 | 828,724.39
VLSG | Smoker | 0 | 318,889.24
VLSG | Smoker | 2 | 299,731.27

In [5]:
prompt = "Modify the previous table to also produce an A/E by amount for each expected basis that ends with by_Amount or byAmt."
print(prompt_ilec_data(prompt))

Running request...
I added A/E (actual/expected)-by-amount columns for every expected column in the table whose name ends with _by_Amount or _byAmt. Each A/E is computed as SUM(Death_Claim_Amount) / SUM(<expected_column>) (NULL when the summed expected = 0). I kept your previous defaults: grouping by Insurance_Plan (PlanType), Smoker_Status, Number_Of_Preferred_Classes, summing Policies_Exposed as Exposure, and excluding rows with NULL Number_Of_Preferred_Classes. If you want Amount_Exposed instead, or to include NULL/zero class counts, tell me and I’ll adjust.

SQL I ran:

SELECT
  Insurance_Plan AS PlanType,
  Smoker_Status,
  Number_Of_Preferred_Classes AS Number_Of_Pref_Classes,
  SUM(Policies_Exposed) AS Exposure,
  CASE WHEN SUM(Expected_Death_QX7580E_by_Amount) > 0 THEN SUM(Death_Claim_Amount)/SUM(Expected_Death_QX7580E_by_Amount) END AS AE_Expected_Death_QX7580E_by_Amount,
  CASE WHEN SUM(Expected_Death_QX2001VBT_by_Amount) > 0 THEN SUM(Death_Claim_Amount)/SUM(Expected_Death_QX

In [6]:
prompt = "Generate the full table as a CSV, with AE values rounded to 4 decimals, and no filters applied. Include the Number of Deaths.  The format should be Plan_Type, Smoker_Status, Number_of_Pref_classes, Number_Of_Deaths, Actual_Death_Amount, Exposure_Amount and the list of A/E ratios."
print(prompt_ilec_data(prompt))

Running request...
Plan_Type,Smoker_Status,Number_of_Pref_classes,Number_Of_Deaths,Actual_Death_Amount,Exposure_Amount,AE_Expected_Death_QX7580E_by_Amount,AE_Expected_Death_QX2001VBT_by_Amount,AE_Expected_Death_QX2008VBT_by_Amount,AE_Expected_Death_QX2008VBTLU_by_Amount,AE_Expected_Death_QX2015VBT_by_Amount,AE_ExpDeathQx2015VBTwMI_byAmt,AE_Cen2MomP1wMI_byAmt,AE_Cen2MomP2wMI_byAmt,AE_Cen3MomP1wMI_byAmt,AE_Cen3MomP2wMI_byAmt,AE_Cen3MomP3wMI_byAmt,AE_Cen2MomP1_byAmt,AE_Cen2MomP2_byAmt,AE_Cen3MomP1_byAmt,AE_Cen3MomP2_byAmt,AE_Cen3MomP3_byAmt
UL,NonSmoker,3.0,18218.0,12612486302.0,1534057566384.047,0.5327,0.6114,0.6837,0.5619,0.7983,0.7952,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000
VL,NonSmoker,2.0,18614.0,5193021148.0,1419808855154.0479,0.4293,0.5962,0.8686,0.6209,0.9913,0.9848,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000
Other,NonSmoker,2.0,140.0,39941733.0,13300417839.775484,0.5019,0.6743,0.9669,0.6527,1.1437,1.1387,0.0000,0.0000,0.0000

In [8]:
df = pd.read_csv("actuary_results.csv")

df

Unnamed: 0,Plan_Type,Smoker_Status,Number_of_Pref_classes,Number_Of_Deaths,Actual_Death_Amount,Exposure_Amount,AE_Expected_Death_QX7580E_by_Amount,AE_Expected_Death_QX2001VBT_by_Amount,AE_Expected_Death_QX2008VBT_by_Amount,AE_Expected_Death_QX2008VBTLU_by_Amount,...,AE_Cen2MomP1wMI_byAmt,AE_Cen2MomP2wMI_byAmt,AE_Cen3MomP1wMI_byAmt,AE_Cen3MomP2wMI_byAmt,AE_Cen3MomP3wMI_byAmt,AE_Cen2MomP1_byAmt,AE_Cen2MomP2_byAmt,AE_Cen3MomP1_byAmt,AE_Cen3MomP2_byAmt,AE_Cen3MomP3_byAmt
0,UL,NonSmoker,3.0,18218.0,12612490000.0,1534058000000.0,0.5327,0.6114,0.6837,0.5619,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,VL,NonSmoker,2.0,18614.0,5193021000.0,1419809000000.0,0.4293,0.5962,0.8686,0.6209,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Other,NonSmoker,2.0,140.0,39941730.0,13300420000.0,0.5019,0.6743,0.9669,0.6527,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Term,NonSmoker,3.0,63292.0,22183760000.0,21304110000000.0,0.289,0.4502,0.6949,0.4126,...,0.0,0.0002,0.0,0.0,0.0,0.0,0.0002,0.0,0.0,0.0
4,VL,,0.0,2842.0,212430200.0,151839100000.0,0.6856,0.8852,1.0606,0.878,...,0.0,0.0002,0.0,0.0,0.0,0.0,0.0002,0.0,0.0,0.0
5,VL,NonSmoker,4.0,382.0,180081500.0,168357000000.0,0.3347,0.5137,0.7274,0.4329,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Other,NonSmoker,4.0,23.0,20541400.0,77460630000.0,0.0946,0.1552,0.2241,0.1229,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,ULSG,Smoker,2.0,4967.0,983941500.0,138318700000.0,1.0634,0.7747,0.8158,0.6696,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,ULSG,UNKNOWN,0.0,5.0,450000.0,1386607000.0,0.3781,0.5137,0.5736,0.4726,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Perm,NonSmoker,3.0,9114.0,2244537000.0,2219754000000.0,0.3583,0.5518,0.7984,0.4763,...,0.0,0.0001,0.0,0.0,0.0,0.0,0.0001,0.0,0.0,0.0


## Addl Notes

Missed Data quality for NonSmoker / Smoker