In [6]:
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import RandomizedSearchCV
import pandas as pd
import warnings

# To ignore convergence warnings
warnings.filterwarnings("ignore")

params = dict(
    max_iter = [500, 600, 700, 800, 900, 1000],
    hidden_layer_sizes = [(2,), (3,), (4,), (5,), (6,), (7,)], 
    activation = ["tanh", "relu"], 
    alpha = [(i / 10000) for i in range(1, 10)]
)

model = MLPRegressor(solver = "lbfgs", random_state = 0)

In [7]:
### Predict insurance costs for all charges ###

trainInsurance = pd.read_csv("../../data/sets/insurance_train.csv")
testInsurance = pd.read_csv("../../data/sets/insurance_test.csv")

trainX = trainInsurance.drop(columns = ["charges"])
trainY = trainInsurance["charges"]

testX = testInsurance.drop(columns = ["charges"])
testY = testInsurance["charges"]

randomSearch = RandomizedSearchCV(
    estimator = model, 
    param_distributions = params, 
    n_iter = 300, 
    random_state = 0
)

randomSearch.fit(trainX, trainY)
predictions = randomSearch.predict(testX)

print("Best Training Score: " + str(randomSearch.best_score_))
print("Best Params: " + str(randomSearch.best_params_))
print("R2 score: " + str(r2_score(testY, predictions)))
print("MSE: " + str(mean_squared_error(testY, predictions)))

Best Training Score: 0.85010579584171
Best Params: {'max_iter': 500, 'hidden_layer_sizes': (4,), 'alpha': 0.0009, 'activation': 'relu'}
R2 score: 0.8574886827464966
MSE: 0.16359218320289326


In [8]:
### Predict insurance costs for high charges ###

highTrain = pd.read_csv("../../data/sets/insurance_high_train.csv")
highTest = pd.read_csv("../../data/sets/insurance_high_test.csv")

highTrainX = highTrain.drop(columns = ["charges"])
highTrainY = highTrain["charges"]

highTestX = highTest.drop(columns = ["charges"])
highTestY = highTest["charges"]

randomSearch = RandomizedSearchCV(
    estimator = model, 
    param_distributions = params, 
    n_iter = 300, 
    random_state = 0
)

randomSearch.fit(highTrainX, highTrainY)
predictions = randomSearch.predict(highTestX)

print("Best Training Score: " + str(randomSearch.best_score_))
print("Best Params: " + str(randomSearch.best_params_))
print("R2 score: " + str(r2_score(highTestY, predictions)))
print("MSE: " + str(mean_squared_error(highTestY, predictions)))


Best Training Score: 0.7481430361551966
Best Params: {'max_iter': 1000, 'hidden_layer_sizes': (5,), 'alpha': 0.0007, 'activation': 'relu'}
R2 score: 0.7542930265423176
MSE: 0.20771169750075644


In [9]:
### Predict insurance costs for low-to-medium charges ###

lowTrain = pd.read_csv("../../data/sets/insurance_low_train.csv")
lowTest = pd.read_csv("../../data/sets/insurance_low_test.csv")

lowTrainX = lowTrain.drop(columns = ["charges"])
lowTrainY = lowTrain["charges"]

lowTestX = lowTest.drop(columns = ["charges"])
lowTestY = lowTest["charges"]

randomSearch = RandomizedSearchCV(
    estimator = model, 
    param_distributions = params, 
    n_iter = 300, 
    random_state = 0
)

randomSearch.fit(lowTrainX, lowTrainY)
predictions = randomSearch.predict(lowTestX)

print("Best Training Score: " + str(randomSearch.best_score_))
print("Best Params: " + str(randomSearch.best_params_))
print("R2 score: " + str(r2_score(lowTestY, predictions)))
print("MSE: " + str(mean_squared_error(lowTestY, predictions)))

Best Training Score: 0.9113791516225207
Best Params: {'max_iter': 500, 'hidden_layer_sizes': (3,), 'alpha': 0.0003, 'activation': 'tanh'}
R2 score: 0.8925623035502314
MSE: 0.11445256206767189
