# Model evaluation

Notebook with code for evaluating the model. Idea is to evaluate the trained model on a validation dataset using the *evaluate* method, which produces some statistics that state how well the model performs on the given validation dataset.

In [None]:
# Add titanic library to path.
from pathlib import Path
import sys

lib_path = str(Path('').absolute().parent)
if lib_path not in sys.path:
    sys.path.append(lib_path)

In [None]:
from pathlib import Path

import joblib import joblib 
import pandas as pd
from sklearn import metrics
from sklearn.model_selection import cross_validate
 
from titanic.features import preprocess


# Define input/model paths.
input_path = Path("../data/validation.csv")
model_path = Path("../outputs/model.pkl")

# Load dataset.
dataset = pd.read_csv(input_path)

X_eval = preprocess(dataset.drop("Survived", axis=1))
y_eval = dataset["Survived"]

# Load model.
model = joblib.load(model_path)
 
# Calculate metrics.
scorer = metrics.make_scorer(metrics.mean_squared_error)
cv_results = cross_validate(model, X=X_eval, y=y_eval, scoring=scorer, cv=5)

metrics = {"mse": cv_results["test_score"].mean()}
print(metrics)