**Assessing Model Performance for Classification Models**

In [0]:
# import libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression

In [0]:
# data doesn't have headers, so let's create headers
_headers = ['buying', 'maint', 'doors', 'persons', 'lug_boot', 'safety', 'car']

In [3]:
# read in cars dataset
df = pd.read_csv('https://raw.githubusercontent.com/PacktWorkshops/The-Data-Science-Workshop/master/Chapter06/Dataset/car.data', names=_headers, index_col=None)
df.head()

Unnamed: 0,buying,maint,doors,persons,lug_boot,safety,car
0,vhigh,vhigh,2,2,small,low,unacc
1,vhigh,vhigh,2,2,small,med,unacc
2,vhigh,vhigh,2,2,small,high,unacc
3,vhigh,vhigh,2,2,med,low,unacc
4,vhigh,vhigh,2,2,med,med,unacc


In [4]:
# encode categorical variables
_df = pd.get_dummies(df, columns=['buying', 'maint', 'doors', 'persons', 'lug_boot', 'safety'])
_df.head()

Unnamed: 0,car,buying_high,buying_low,buying_med,buying_vhigh,maint_high,maint_low,maint_med,maint_vhigh,doors_2,doors_3,doors_4,doors_5more,persons_2,persons_4,persons_more,lug_boot_big,lug_boot_med,lug_boot_small,safety_high,safety_low,safety_med
0,unacc,0,0,0,1,0,0,0,1,1,0,0,0,1,0,0,0,0,1,0,1,0
1,unacc,0,0,0,1,0,0,0,1,1,0,0,0,1,0,0,0,0,1,0,0,1
2,unacc,0,0,0,1,0,0,0,1,1,0,0,0,1,0,0,0,0,1,1,0,0
3,unacc,0,0,0,1,0,0,0,1,1,0,0,0,1,0,0,0,1,0,0,1,0
4,unacc,0,0,0,1,0,0,0,1,1,0,0,0,1,0,0,0,1,0,0,0,1


In this above, you convert categorical columns into numeric columns using a technique called one-hot encoding.

In [0]:
features = _df.drop('car', axis=1).values
labels = _df['car'].values

In [0]:
# split data into training and evaluation datasets
X_train, X_eval, y_train, y_eval = train_test_split(features, labels, test_size=0.3, random_state=0)
X_val, X_test, y_val, y_test = train_test_split(X_eval, y_eval, test_size=0.5, random_state=0)

In [7]:
# train a Logistic Regression model
model = LogisticRegression()
model.fit(X_train, y_train)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

In [0]:
# make predictions for the validation set
y_pred = model.predict(X_val)

In [9]:
print(y_pred)

['unacc' 'acc' 'unacc' 'unacc' 'acc' 'acc' 'vgood' 'unacc' 'unacc' 'unacc'
 'acc' 'acc' 'unacc' 'unacc' 'unacc' 'acc' 'unacc' 'unacc' 'acc' 'unacc'
 'acc' 'unacc' 'unacc' 'vgood' 'unacc' 'unacc' 'unacc' 'unacc' 'unacc'
 'vgood' 'acc' 'unacc' 'unacc' 'unacc' 'unacc' 'unacc' 'unacc' 'unacc'
 'unacc' 'unacc' 'unacc' 'unacc' 'unacc' 'unacc' 'unacc' 'vgood' 'unacc'
 'unacc' 'unacc' 'unacc' 'acc' 'unacc' 'acc' 'unacc' 'unacc' 'unacc' 'acc'
 'unacc' 'vgood' 'unacc' 'unacc' 'unacc' 'unacc' 'unacc' 'unacc' 'acc'
 'unacc' 'acc' 'unacc' 'unacc' 'unacc' 'unacc' 'unacc' 'unacc' 'unacc'
 'unacc' 'acc' 'unacc' 'unacc' 'unacc' 'unacc' 'unacc' 'acc' 'unacc'
 'unacc' 'unacc' 'unacc' 'unacc' 'unacc' 'unacc' 'unacc' 'acc' 'unacc'
 'unacc' 'unacc' 'unacc' 'acc' 'good' 'acc' 'unacc' 'unacc' 'unacc'
 'unacc' 'unacc' 'acc' 'unacc' 'unacc' 'unacc' 'unacc' 'unacc' 'unacc'
 'vgood' 'unacc' 'acc' 'unacc' 'unacc' 'unacc' 'unacc' 'unacc' 'unacc'
 'acc' 'acc' 'acc' 'unacc' 'unacc' 'unacc' 'acc' 'unacc' 'unacc' 'unac

In [0]:
from sklearn.metrics import confusion_matrix

In [11]:
confusion_matrix(y_val, y_pred)

array([[ 41,   1,   9,   0],
       [  7,   2,   0,   1],
       [  7,   0, 178,   0],
       [  1,   0,   0,  12]])

In [0]:
from sklearn.metrics import precision_score

In [13]:
precision_score(y_val, y_pred, average='macro')

0.8184395261601145

In [0]:
from sklearn.metrics import recall_score

In [15]:
recall_score(y_val, y_pred, average='macro')

0.7222901634666341

The F1 score ranges from 0 to 1, with 1 being the best possible score. You compute the F1 score using f1_score from sklearn.metrics.

In [0]:
from sklearn.metrics import f1_score

In [17]:
f1_score(y_val, y_pred, average='macro')

0.7385284045669938

In [0]:
from sklearn.metrics import accuracy_score

In [19]:
_accuracy = accuracy_score(y_val, y_pred)
print(_accuracy)

0.8996138996138996


**Logarithmic Loss**
The logarithmic loss (or log loss) is the loss function for categorical models. It is also called categorical cross-entropy. It seeks to penalize incorrect predictions. The sklearn documentation defines it as "the negative log-likelihood of the true values given your model predictions."

In [0]:
from sklearn.metrics import log_loss

In [21]:
_loss = log_loss(y_val, model.predict_proba(X_val))
print(_loss)

0.22578836752298448


**Receiver Operating Characteristic Curve**
Recall the True Positive Rate, It is also called sensitivity. Also recall that what we try to do with a logistic regression model is find a threshold value such that above that threshold value, we predict that our input falls into a certain class, and below that threshold, we predict that it doesn't.

The Receiver Operating Characteristic (ROC) curve is a plot that shows how the true positive and false positive rates vary for a model as the threshold is changed.

In [0]:
# data doesn't have headers, so let's create headers
_headers = ['Age', 'Delivery_Nbr', 'Delivery_Time', 'Blood_Pressure', 'Heart_Problem', 'Caesarian']

In [0]:
# read in cars dataset
df1 = pd.read_csv('https://raw.githubusercontent.com/PacktWorkshops/The-Data-Science-Workshop/master/Chapter06/Dataset/caesarian.csv.arff', names=_headers, index_col=None, skiprows=15)

In [24]:
df1.head()

Unnamed: 0,Age,Delivery_Nbr,Delivery_Time,Blood_Pressure,Heart_Problem,Caesarian
0,22,1,0,2,0,0
1,26,2,0,1,0,1
2,26,2,1,1,0,0
3,28,1,0,2,0,0
4,22,2,0,1,0,1


In [0]:
features1 = df1.drop(['Caesarian'], axis=1).values
labels1 = df1[['Caesarian']].values

In [0]:
# split 80% for training and 20% into an evaluation set
X_train, X_eval, y_train, y_eval = train_test_split(features1, labels1, test_size=0.2, random_state=0)
# further split the evaluation set into validation and test sets of 10% each
X_val, X_test, y_val, y_test = train_test_split(X_eval, y_eval, test_size=0.5, random_state=0)

In [27]:
model = LogisticRegression()
model.fit(X_train, y_train)

  y = column_or_1d(y, warn=True)


LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

In [0]:
y_proba = model.predict_proba(X_val)

In [30]:
print(y_proba)

[[0.32085037 0.67914963]
 [0.44452429 0.55547571]
 [0.55005086 0.44994914]
 [0.19657847 0.80342153]
 [0.55519917 0.44480083]
 [0.22083119 0.77916881]
 [0.32614894 0.67385106]
 [0.48008433 0.51991567]]
