# Supervised Learning Model Evaluation Lab

Complete the exercises below to solidify your knowledge and understanding of supervised learning model evaluation.

In [1]:
# Improt pandas library
import pandas as pd

## Regression Model Evaluation

Load the boston dataset using sklearn and get the datasets X and y containing the target and the rest of the variables

In [2]:
# Import boston dataset and encapsule in a variable
from sklearn.datasets import load_boston
boston = load_boston()

In [3]:
# Generate X and y variables
X = boston.data
y = boston.target

### Split this data set into training (80%) and testing (20%) sets.

The `MEDV` field represents the median value of owner-occupied homes (in $1000's) and is the target variable that we will want to predict.

In [4]:
# Import method train_test_split from sklearn
from sklearn.model_selection import train_test_split

In [5]:
# Split the dataset with 20% on testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

### Train a `LinearRegression` model on this data set and generate predictions on both the training and the testing set.

In [6]:
# Import method LinearRegression from sklearn
from sklearn.linear_model import LinearRegression

In [7]:
# Generate model and fit with training split
lr= LinearRegression()
lr.fit(X_train, y_train)

LinearRegression()

In [8]:
# Generate predictions on both training and testing split
y_train_pred = lr.predict(X_train)
y_test_pred = lr.predict(X_test)

### Calculate and print R-squared for both the training and the testing set.

In [9]:
# Import method r2_score from sklearn
from sklearn.metrics import r2_score

In [10]:
# r2_score for training set
r2_score(y_train, y_train_pred)

0.7508856358979673

In [11]:
# r2_score for testing set
r2_score(y_test, y_test_pred)

0.6687594935356298

### Calculate and print mean squared error for both the training and the testing set.

In [12]:
# Import method mean_squared_error from sklearn
from sklearn.metrics import mean_squared_error

In [13]:
# mean_squared_error for training set
mean_squared_error(y_train, y_train_pred)

21.641412753226312

In [14]:
# mean_squared_error for testing set
mean_squared_error(y_test, y_test_pred)

24.291119474973677

### Calculate and print mean absolute error for both the training and the testing set.

In [15]:
# Import method mean_absolute_error from sklearn
from sklearn.metrics import mean_absolute_error

In [16]:
# mean_absolute_error for training set
mean_absolute_error(y_train, y_train_pred)

3.3147716267832252

In [17]:
# mean_absolute_error for testing set
mean_absolute_error(y_test, y_test_pred)

3.1890919658878567

## Classification Model Evaluation

Load the iris dataset using sklearn and get the datasets X and y containing the target and the rest of the variables

In [18]:
# Import iris dataset and encapsule in a variable
from sklearn.datasets import load_iris
iris = load_iris()

### Split this data set into training (80%) and testing (20%) sets.

The `class` field represents the type of flower and is the target variable that we will want to predict.

In [19]:
# Generate X and y variables
X = iris.data
y = iris.target

In [20]:
# Split the dataset with 20% on testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,  random_state=42)

### Train a `LogisticRegression` model on this data set and generate predictions on both the training and the testing set.

In [21]:
# Import method LogisticRegression from sklearn
from sklearn.linear_model import LogisticRegression

In [22]:
# Generate model and fit with training split
lr= LogisticRegression(max_iter=1000)
lr.fit(X_train, y_train)

LogisticRegression(max_iter=1000)

In [23]:
# Generate predictions on both training and testing split
y_train_pred = lr.predict(X_train)
y_test_pred = lr.predict(X_test)

### Calculate and print the accuracy score for both the training and the testing set.

In [24]:
# Import method accuracy_score from sklearn
from sklearn.metrics import accuracy_score

In [25]:
# accuracy_score for training set
accuracy_score(y_train, y_train_pred)

0.975

In [26]:
# accuracy_score for testing set
accuracy_score(y_test, y_test_pred)

1.0

### Calculate and print the balanced accuracy score for both the training and the testing set.

In [27]:
# Import method balanced_accuracy_score from sklearn
from sklearn.metrics import balanced_accuracy_score

In [28]:
# balanced_accuracy_score for training set
balanced_accuracy_score(y_train, y_train_pred)

0.975609756097561

In [29]:
# balanced_accuracy_score for testing set
balanced_accuracy_score(y_test, y_test_pred)

1.0

### Calculate and print the precision score for both the training and the testing set.

In [30]:
# Import method precision_score from sklearn
from sklearn.metrics import precision_score

In [31]:
# precision_score for training set
#precision_score(y_train, y_train_pred)
precision_score(y_train, y_train_pred, average='macro')

0.9761904761904763

In [32]:
# precision_score for testing set
precision_score(y_test, y_test_pred, average='macro')

1.0

### Calculate and print the recall score for both the training and the testing set.

In [33]:
# Import method recall_score from sklearn
from sklearn.metrics import recall_score

In [34]:
# recall_score for training set
recall_score(y_train, y_train_pred, average='macro')

0.975609756097561

In [35]:
# recall_score for testing set
recall_score(y_test, y_test_pred, average='macro')

1.0

### Calculate and print the F1 score for both the training and the testing set.

In [36]:
# Import method f1_score from sklearn
from sklearn.metrics import f1_score

In [37]:
# f1_score for training set
f1_score(y_train, y_train_pred, average='macro')

0.9749960931395533

In [38]:
# f1_score for testing set
f1_score(y_test, y_test_pred, average='macro')

1.0

### Generate confusion matrices for both the training and the testing set.

In [39]:
# I would also take a look on classification reports

In [40]:
# Import confusion_matrix from sklearn
from sklearn.metrics import confusion_matrix, classification_report

In [41]:
# train confusion matrix
confusion_matrix(y_train, y_train_pred)

array([[40,  0,  0],
       [ 0, 38,  3],
       [ 0,  0, 39]], dtype=int64)

In [42]:
# train classification report
print(classification_report(y_train, y_train_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        40
           1       1.00      0.93      0.96        41
           2       0.93      1.00      0.96        39

    accuracy                           0.97       120
   macro avg       0.98      0.98      0.97       120
weighted avg       0.98      0.97      0.97       120



In [43]:
# test confusion matrix
confusion_matrix(y_test, y_test_pred)

array([[10,  0,  0],
       [ 0,  9,  0],
       [ 0,  0, 11]], dtype=int64)

In [44]:
# test classification report
print(classification_report(y_test, y_test_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      1.00      1.00         9
           2       1.00      1.00      1.00        11

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30

