In [1]:
%config Completer.use_jedi=False

In [2]:
import numpy as np 
import pandas as pd 
from matplotlib import pyplot as plt 
import seaborn as sns

sns.set_style("darkgrid")

## 4.2.2 Regression model evaluation metrics

Model evaluation metrics documentation - https://scikit-learn.org/stable/modules/model_evaluation.html

1. R^2 (r-squared) or coefficient of determination
2. Mean absolute error (MAE)
3. Mean squared error (MSE)

In [3]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_boston

In [4]:
boston = load_boston()
boston;

In [5]:
boston_df = pd.DataFrame(boston["data"], columns=boston["feature_names"])
boston_df['target'] = pd.Series(boston["target"])
boston_df.head(3)

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,target
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03,34.7


In [6]:
np.random.seed(42)

X = boston_df.drop("target", axis=1)
y = boston_df["target"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [7]:
np.random.seed(42)

model = RandomForestRegressor()
model.fit(X_train, y_train)

RandomForestRegressor()

In [8]:
model.score(X_test, y_test)

0.8922527442109116

In [9]:
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error

In [11]:
y_preds = model.predict(X_test)

In [12]:
r2_score(y_test, y_preds)

0.8922527442109116

In [13]:
mean_absolute_error(y_test, y_preds)

2.0395392156862746

In [14]:
mean_squared_error(y_test, y_preds)

7.901513892156864

### 4.3 Using different evaluation metrics as Scikit-Learn functions

In [24]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split

heart_disease = pd.read_csv("..//data//heart-disease.csv")
np.random.seed(42)

X = heart_disease.drop("target", axis=1)
y = heart_disease["target"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

clf = RandomForestClassifier()
clf.fit(X_train, y_train)

y_preds = clf.predict(X_test)


In [25]:
print("Classifier metrics on the test set")
print(f"Accuracy: {accuracy_score(y_test, y_preds) * 100: .2f}%")
print(f"Precision: {precision_score(y_test, y_preds)}")
print(f"Recall: {recall_score(y_test, y_preds)}")
print(f"F1: {f1_score(y_test, y_preds)}")

Classifier metrics on the test set
Accuracy:  85.25%
Precision: 0.8484848484848485
Recall: 0.875
F1: 0.8615384615384615


In [23]:
from sklearn.metrics import confusion_matrix