In [3]:
import pandas as pd
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import train_test_split
import joblib

#import data
columns_used = ['Coverage Index', 'Education Index', 'Employment Status Index', 'Location Index','Gender', 'Marital Status Index', 'Vehicle Class Index', 'Vehicle Size Index', 'Monthly Premium Auto']
df = pd.read_csv('AutoInsuranceClaims2024.csv', usecols=columns_used)

#encoding categorical variables
gender_map = {
    'F':1,
    'M':0,
}
df['Gender'] = df['Gender'].map(gender_map)

#split data into X and y (input and output)
X = df.drop(columns=['Monthly Premium Auto'])
y = df['Monthly Premium Auto']

#split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

#build model
model = GradientBoostingRegressor()
model.fit(X_train, y_train)
predictions = model.predict(X_test)

joblib.dump(model, 'premium-predictor.joblib')
joblib.dump(X.columns.tolist(), 'feature_names.joblib')



['feature_names.joblib']

In [2]:
from sklearn.metrics import mean_squared_error, mean_absolute_error
import numpy as np

#determine accuracy
mse = mean_squared_error(y_test, predictions)
mses = np.sqrt(mse)
mae = mean_absolute_error(y_test, predictions)

#testing patterns
correlation_matrix = df.corr()
correlation_matrix['Monthly Premium Auto']
test_data = {
    'Coverage Index': [1, 1],
    'Education Index': [2, 2], 
    'Employment Status Index': [1, 1], 
    'Location Index': [1, 1],
    'Gender': ['M', 'F'], 
    'Marital Status Index': [0, 0],
    'Vehicle Class Index': [1, 1],
    'Vehicle Size Index': [1, 1]
}
##prepare test data
test = pd.DataFrame(test_data)
test['Gender'] = test['Gender'].map(gender_map)
missing_cols = set(X.columns) - set(test.columns)
for col in missing_cols:
    test[col] = 0
test = test[X.columns]

test, model.predict(test)


(   Coverage Index  Education Index  Employment Status Index  Gender  \
 0               1                2                        1       1   
 1               1                2                        1       0   
 
    Location Index  Marital Status Index  Vehicle Class Index  \
 0               1                     0                    1   
 1               1                     0                    1   
 
    Vehicle Size Index  
 0                   1  
 1                   1  ,
 array([117.8243205 , 118.01447253]))