  # Diagnosing the Model

  Evaluate the accuracy and health of the logistic regression model by creating a confusion matrix and classification report to describe the performance of the models.

In [1]:
from pathlib import Path
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report, accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression

import pandas as pd

  ### Create, train, and execute the Logistic Regression model

In [2]:
# Prep data
data = Path('emp_data_encoded.csv')
hr_df = pd.read_csv(data)

y = hr_df["left"]
X = hr_df.drop(columns="left")

# Split data into train and test subsets
X_train, X_test, y_train, y_test = train_test_split(X, 
                                                    y, 
                                                    random_state=1, 
                                                    stratify=y)

# Create and train LR model
classifier = LogisticRegression(solver='lbfgs',
                                max_iter=500,
                                random_state=1)
classifier.fit(X_train, y_train)

# Execute LR predictions
predictions = classifier.predict(X_test)
results = pd.DataFrame({"Prediction": predictions, "Actual": y_test}).reset_index(drop=True)
results.head()

Unnamed: 0,Prediction,Actual
0,0,0
1,0,0
2,0,0
3,0,1
4,0,1


  ### Confusion Matrix

In [3]:
# Create a confusion matrix
confusion_matrix(y_test, predictions)

# Calculating the accuracy score
acc_score = accuracy_score(y_test, predictions)

  ### Classification Report

In [4]:
# Create a classification report
target_names = ["Stay", "Leave"]
print(classification_report(y_test, predictions, target_names=target_names))
print(f"Accuracy Score : {acc_score}") 

              precision    recall  f1-score   support

        Stay       0.82      0.93      0.87      2857
       Leave       0.61      0.33      0.43       893

    accuracy                           0.79      3750
   macro avg       0.71      0.63      0.65      3750
weighted avg       0.77      0.79      0.77      3750

Accuracy Score : 0.7893333333333333
