In [None]:
import pandas as pd
import numpy as np
from pathlib import Path
import sys
sys.path.append('src')

from src.data import load_and_preprocess_data
from src.models import decisionTree, FFNNRegressor, rmse

<h2>Prep Code</h2>

In [None]:
df_train, df_test, X_train, X_test, y_train, y_test, X_train_std, X_test_std = load_and_preprocess_data('claims_train.csv', 'claims_test.csv')

<h2>Decission Tree</h2>

<h2>RMSE</h2>

In [None]:
tree = decisionTree(
    max_depth=12,
    min_samples_split=200,
    min_samples_leaf=10,
)

tree.fit(X_train, y_train)


y_train_pred_m1 = tree.predict(X_train)
y_test_pred_m1  = tree.predict(X_test)

print("M1 scratch tree:")
print("Train RMSE:", rmse(y_train, y_train_pred_m1))
print("Test  RMSE:", rmse(y_test,  y_test_pred_m1))

<h2>RMSE by area</h2>

In [None]:
y_pred = y_train_pred_m1

print("RMSE by Area (train):")
for a in sorted(df_train['Area'].unique()):
    mask = (df_train['Area'] == a)
    area_rmse = rmse(y_train[mask], y_pred[mask])
    print(f"Area {a}: {area_rmse:.4f}")
print()

y_pred = y_test_pred_m1

print("RMSE by Area (test):")
for a in sorted(df_test['Area'].unique()):
    mask = (df_test['Area'] == a)
    area_rmse = rmse(y_test[mask], y_pred[mask])
    print(f"Area {a}: {area_rmse:.4f}")


<h2>RMSE by density quartile</h2>

In [None]:
df_train['DensityQuartile'] = pd.qcut(df_train['Density'], 4, labels=['Q1', 'Q2', 'Q3', 'Q4']
)

print("\nRMSE by Density Quartile (train):")
for q in ['Q1', 'Q2', 'Q3', 'Q4']:
    mask = (df_train['DensityQuartile'] == q)
    quartile_rmse = rmse(y_train[mask], y_train_pred_m1[mask])
    print(f"{q}: {quartile_rmse:.4f}")

print()

df_test['DensityQuartile'] = pd.qcut(df_test['Density'], 4, labels=['Q1', 'Q2', 'Q3', 'Q4']
)

print("\nRMSE by Density Quartile (test):")
for q in ['Q1', 'Q2', 'Q3', 'Q4']:
    mask = (df_test['DensityQuartile'] == q)
    quartile_rmse = rmse(y_test[mask], y_test_pred_m1[mask])
    print(f"{q}: {quartile_rmse:.4f}")



<h1>M2</h1>

In [None]:
input_dim = X_train_std.shape[1]

nn = FFNNRegressor(
    input_dim=input_dim,
    hidden_dim=32,
    lr=0.01,
    epochs=80,
    batch_size=2048,
    l2=1e-4,
    random_state=42,
    verbose=True,
)

nn.fit(X_train_std, y_train)

y_train_pred_m2 = nn.predict(X_train_std)
y_test_pred_m2  = nn.predict(X_test_std)

print("M2 scratch NN:")
print("Train RMSE:", nn.rmse(y_train, y_train_pred_m2))
print("Test  RMSE:", nn.rmse(y_test,  y_test_pred_m2))
print("Train R²:  ", nn.r2_score(y_train, y_train_pred_m2))
print("Test  R²:  ", nn.r2_score(y_test,  y_test_pred_m2))
