# Model Validation

In [None]:
import sys
import warnings
import pandas as pd
import joblib
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
from yellowbrick.regressor import ResidualsPlot, PredictionError

warnings.filterwarnings('ignore')
pd.set_option("display.max_columns", None)

sys.path.append("../../")

## Load Data

In [None]:
training_set = pd.read_csv("../../data/training_set.csv")
testing_set = pd.read_csv("../../data/testing_set.csv")

In [None]:
# separating the feature columns from the target column
feature_columns = ["age", "sex", "bmi", "children", "smoker", "region"]
target_column = "charges"

X_train = training_set[feature_columns]
y_train = training_set[target_column]

X_test = testing_set[feature_columns]
y_test = testing_set[target_column]

## Load Model

In [None]:
model = joblib.load("model.joblib")

## Calculating Model Metrics

In [None]:
predictions = model.predict(X_test)

In [None]:
r2 = r2_score(y_test, predictions)
mse = mean_squared_error(y_test, predictions)
mae = mean_absolute_error(y_test, predictions)

print("r2 score: ", r2)
print("mean squared error: ", mse)
print("mean absolute error: ", mae)

## Generate Residuals Plot

In [None]:
# Instantiate the linear model and visualizer
visualizer = ResidualsPlot(model)

visualizer.fit(X_train, y_train)
visualizer.score(X_test, y_test)
visualizer.show()

## Generate Prediction Error Plot

In [None]:
visualizer = PredictionError(model)

visualizer.fit(X_train, y_train)
visualizer.score(X_test, y_test)
visualizer.show()