## MST Reserving Model v0.2

Date: 12/20/2024<br>
<br>
Test model for testing potential methodology using sample data.  The model is considered ready for testing with MST data now that it can meet the following requirements:
1. Utilize manually input loss development factors
2. Utilize manually input a' priori loss ratios
3. Perform `Chainladder` method using the manual ldfs
4. Perform `BornhuetterFerguson` method using manual ldfs and a' prior loss ratios
5. Perform `CapeCod` method using manual ldfs
6. Perform a Least Squares method
7. Balance to excel model output (MST_reserve_modelv0.1.xlsx)
 

## Importing libraries and data

### Library dependicies
`chainladder` for building chainladder, BF, and GCC models. Also useful for building triangles.<br> 
`pandas` data munipulation<br>
`matplotlib` for plotting (duh) <br>
`sklearn.linear_model.LinearRegression` for the Least Squares method

### Data sources:
1. clrd_lob.csv
    - Sample triangle dataset
2. cdf_constants.csv
    - Manually selected cumulative loss development factors
3. ELR.xlsx
    - Manually selected expected loss ratios

In [1]:
#import libraries
import chainladder as cl #building reserve model and triangles
import pandas as pd #Dataframe manipulation
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
print("Chainladder version: "+cl.__version__)

Chainladder version: 0.8.22


In [2]:
#Importing data
df_data = pd.read_csv("clrd_lob.csv")
df_cdf = pd.read_csv("cdf_constants.csv")
df_elr = pd.read_excel("ELR.xlsx")

In [3]:
df_data.dtypes

LOB                object
origin             object
valuation          object
development         int64
IncurLoss           int64
CumPaidLoss         int64
BulkLoss            int64
EarnedPremDIR       int64
EarnedPremCeded     int64
EarnedPremNet       int64
dtype: object

In [4]:
df_elr.head()

Unnamed: 0,LOB,origin,ELR
0,comauto,1988-01-01,0.65
1,comauto,1989-01-01,0.65
2,comauto,1990-01-01,0.65
3,comauto,1991-01-01,0.65
4,comauto,1992-01-01,0.65


In [5]:
df_elr.dtypes

LOB               object
origin    datetime64[ns]
ELR              float64
dtype: object

In [6]:
#Convert columns to pandas datetime
date_columns = ['origin', 'valuation']
df_data[date_columns] = df_data[date_columns].apply(pd.to_datetime)

In [7]:
#merging df_elr with df_data to create UltExpectedDIRLoss
df_data = pd.merge(df_data, df_elr, on=['LOB', 'origin'], how='left')

In [8]:
df_data.head()

Unnamed: 0,LOB,origin,valuation,development,IncurLoss,CumPaidLoss,BulkLoss,EarnedPremDIR,EarnedPremCeded,EarnedPremNet,ELR
0,comauto,1988-01-01,1988-12-31,12,600839,154058,182604,1099729,186093,913636,0.65
1,comauto,1988-01-01,1989-12-31,24,632654,326916,90656,1099729,186093,913636,0.65
2,comauto,1988-01-01,1990-12-31,36,645663,447963,54262,1099729,186093,913636,0.65
3,comauto,1988-01-01,1991-12-31,48,646526,528167,27431,1099729,186093,913636,0.65
4,comauto,1988-01-01,1992-12-31,60,642177,574471,14819,1099729,186093,913636,0.65


In [9]:
#Creating Expected Loss column using pandas instead of using chainladder.
#Easier to perform the operation here.
df_data['UltExpectedDIRLoss'] = df_data['EarnedPremDIR'] * df_data['ELR']

In [10]:
tri = cl.Triangle(data=df_data,
                  origin='origin',
                  development='valuation',
                  index='LOB',
                  columns=['IncurLoss', 'CumPaidLoss', 'BulkLoss', 'EarnedPremDIR', 'EarnedPremCeded', 'EarnedPremNet', 'UltExpectedDIRLoss'],
                  cumulative=True

)

In [11]:
tri

Unnamed: 0,Triangle Summary
Valuation:,1997-12
Grain:,OYDY
Shape:,"(6, 7, 10, 10)"
Index:,[LOB]
Columns:,"[IncurLoss, CumPaidLoss, BulkLoss, EarnedPremDIR, EarnedPremCeded, EarnedPremNet, UltExpectedDIRLoss]"


In [12]:
# Create a dictionary of CDFs for each Line of Business (LOB)
cdf_dict = (
    df_cdf.groupby('LOB')
    .apply(lambda group: dict(zip(group['age'], group['cdf'])))
    .to_dict()
)

  .apply(lambda group: dict(zip(group['age'], group['cdf'])))


In [13]:
def get_paid_cdfs(row):
    """
    Returns the appropriate CDFs based on the LOB specified in the input row.
    """
    lob = row['LOB']
    return cdf_dict.get(lob, {})

In [14]:
# Apply the function to the triangle index
cdf_patterns = tri.index.to_frame().apply(get_paid_cdfs, axis=1)

AttributeError: 'DataFrame' object has no attribute 'to_frame'

In [None]:
# Use the patterns in the DevelopmentConstant
custom_patterns = cl.DevelopmentConstant(
    patterns=get_paid_cdfs,  # Use the refactored function directly
    callable_axis=1,
    style='cdf'
).fit_transform(tri["CumPaidLoss"])

In [None]:
plt.style.use('ggplot')
%config InlineBackend.figure_format = 'retina'

ax = custom_patterns.cdf_.T.plot(kind='bar', title='CDFs by LOB')

In [15]:
CL_model = cl.Chainladder().fit(X=custom_patterns)
#The 'UltExpectedDIRLoss' column in the triangle is EP * ELR, so setting the apriori = 1.000
BF_model = cl.BornhuetterFerguson(apriori=1.0).fit(X=custom_patterns, sample_weight=tri['UltExpectedDIRLoss'].latest_diagonal)
CC_model = cl.CapeCod(trend=0.05, decay=0.95).fit(X=custom_patterns, sample_weight=tri['EarnedPremDIR'].latest_diagonal)

In [None]:
CL_model.ultimate_.to_frame().transpose().style.format('{:,.0f}')

In [None]:
BF_model.ultimate_.to_frame().transpose().style.format('{:,.0f}')

In [None]:
CC_model.ultimate_.to_frame().transpose().style.format('{:,.0f}')

In [None]:
CC_model.detrended_apriori_.to_frame().transpose().style.format('{:.2%}')

In [20]:
df_CL_model = CL_model.ultimate_.to_frame().transpose().unstack().reset_index()
df_CL_model.rename(columns={0: 'CL_Ult'}, inplace=True)

df_BF_model = BF_model.ultimate_.to_frame().transpose().unstack().reset_index()
df_BF_model.rename(columns={0: 'BF_Ult'}, inplace=True)

df_CC_model = CC_model.ultimate_.to_frame().transpose().unstack().reset_index()
df_CC_model.rename(columns={0: 'CC_Ult'}, inplace=True)

df_ult = df_CL_model.merge(df_BF_model, on=['LOB', 'origin'], how='outer').merge(df_CC_model, on=['LOB', 'origin'], how='outer')

In [None]:
df_ult.loc[:, 'Selected_Ult'] = df_ult['BF_Ult']
df_ult

## Least Squares Method..............

In [22]:
df_data['origin'] = df_data['origin'].dt.year
df_ult['origin'] = df_ult['origin'].dt.year

In [23]:
df_data = pd.merge(df_data, df_ult[['LOB', 'origin', 'Selected_Ult']], on=['LOB', 'origin'], how='left')

In [24]:
current_eval = '1997-12-31'
prediction_data = df_data[df_data['valuation'] == current_eval]
fitting_data = df_data[df_data['valuation'] != current_eval]

In [25]:
models = {}
predictions = []

for (lob, development), group in fitting_data.groupby(['LOB', 'development']):
    if group.shape[0] > 1:
        X = group[['CumPaidLoss']]
        y = group['Selected_Ult']

        #fit a linear regression model
        model = LinearRegression()
        model.fit(X, y)

        #Store the model for reference
        models[(lob, development)] = model

        target_subset = prediction_data[(prediction_data['LOB'] == lob) & (prediction_data['development'] == development)].copy()
        if not target_subset.empty:
            X_new = target_subset[['CumPaidLoss']]
            target_subset['Least_Squares_Ult'] = model.predict(X_new)
            predictions.append(target_subset)

predictions_df = pd.concat(predictions, ignore_index=True)


In [26]:
Least_squares_df = predictions_df[['LOB', 'origin', 'Least_Squares_Ult']]
df_ult = df_ult.merge(Least_squares_df, on=['LOB', 'origin'], how='left')

In [27]:
df_ult.to_clipboard()

## View of the triangles

In [28]:
from IPython.display import display, HTML
def formatted_tri(lob):
    display(HTML(f"<h3>{lob} CumPaidLoss Triangle</h3>"))
    display(tri.loc[lob, 'CumPaidLoss'])
    display(HTML(f"<h3>{lob} Link Ratios</h3>"))
    display(tri.loc[lob, 'CumPaidLoss'].link_ratio)

In [None]:
for i in list(df_ult['LOB'].unique()):
    formatted_tri(i)