# Regression Metrics

* Notes from DataCamp's [XGBoost Course](https://campus.datacamp.com/courses/extreme-gradient-boosting-with-xgboost/regression-with-xgboost?ex=1)
* [MAE and RMSE](https://medium.com/human-in-a-machine-world/mae-and-rmse-which-metric-is-better-e60ac3bde13d)

In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error

In [23]:
original = pd.DataFrame({
    "actual": [10, 3, 6],
    "predicted": [20, 8, 1]
})

display(original)

Unnamed: 0,actual,predicted
0,10,20
1,3,8
2,6,1


## Root Mean Squared Error

In [None]:
# RMSE punishes large differences between actual and predicted more heavily than smaller ones

df = original.copy()
df["error"] = df["actual"] - df["predicted"]
df["squared_error"] = np.power(df["error"], 2)

display(df)

total_error = df["squared_error"].sum()
mse = total_error / len(df)
rmse = np.sqrt(mse)

print("Calculated manually:")
print("Total Error:", total_error)
print("MSE:", mse)
print("RMSE: {:.2f}".format(rmse))

print("\nCalculated using mean_squared_error function:")
mse = mean_squared_error(df["actual"], df["predicted"])
print("RMSE:", mse)
print("RMSE: {:.2f}".format(rmse))

print("\nCalculated directly using mean_squared_error function:")
rmse = mean_squared_error(df["actual"], df["predicted"], squared=False)
print("RMSE: {:.2f}".format(rmse))

## Mean Absolute Error

In [29]:
# Less frequently used than MSRE because it lacks certain mathematical properties

df = original.copy()

df["error"] = np.abs(df["actual"] - df["predicted"])

display(df)

total_error = df["error"].sum()
mae = total_error / len(df)

print("Total Error:", total_error)
print("MAE: {:.2f}".format(mae))

Unnamed: 0,actual,predicted,error
0,10,20,10
1,3,8,5
2,6,1,5


Total Error: 20
MAE: 6.67
