# Model Evaluation

In [24]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import httpimport
import joblib

from pathlib import Path
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.preprocessing import StandardScaler

In [25]:
# Import personal library
with httpimport.github_repo("junclemente", "jcds", ref="master"):
    import jcds.metrics as jm

In [26]:
# Import datasets
datasets = Path("../datasets")
train_data = "training_data.csv"
val_data = "validation_data.csv"
test_data = "testing_data.csv"
train_df = pd.read_csv(datasets / train_data)
val_df = pd.read_csv(datasets / val_data)
test_df = pd.read_csv(datasets / test_data)
display(train_df.head())
display(val_df.head())
display(test_df.head())

Unnamed: 0,Undergrad_Degree,Work_Experience,Employability_Before,Status,Status_enc
0,Computer Science,No,185.174286,Placed,1
1,Engineering,No,206.867959,Not Placed,0
2,Art,No,234.881837,Not Placed,0
3,Finance,No,173.900408,Placed,1
4,Art,No,184.06398,Not Placed,0


Unnamed: 0,Undergrad_Degree,Work_Experience,Employability_Before,Status,Status_enc
0,Business,Yes,261.272959,Placed,1
1,Engineering,No,173.558776,Not Placed,0
2,Finance,No,205.074388,Placed,1
3,Business,Yes,230.52602,Placed,1
4,Business,No,229.0,Not Placed,0


Unnamed: 0,Undergrad_Degree,Work_Experience,Employability_Before,Status,Status_enc
0,Finance,No,168.775918,Placed,1
1,Business,Yes,195.508673,Placed,1
2,Computer Science,No,260.76051,Placed,1
3,Art,No,231.892551,Not Placed,0
4,Computer Science,Yes,400.0,Placed,1


## Setup Training and Validation Dataframes

In [27]:
# Variables to use for predictive modeling
variables = ["Undergrad_Degree", "Work_Experience", "Employability_Before"]
target = "Status_enc"

In [33]:
# Setup train and val dataframes
X_train = train_df[variables]
y_train = train_df[target]
X_val = val_df[variables]
y_val = val_df[target]
X_test = test_df[variables]
y_test = test_df[target]

# One-hot encode categorical variables
X_train = pd.get_dummies(X_train, drop_first=True)
X_val = pd.get_dummies(X_val, drop_first=True)
X_test = pd.get_dummies(X_test, drop_first=True)

# Standardize cont  / Initialize scaler
scaler = StandardScaler()
std_cols = ["Employability_Before"]
Xs_train = X_train.copy()
Xs_val = X_val.copy()
Xs_test = X_test.copy()
Xs_train[std_cols] = scaler.fit_transform(Xs_train[std_cols])
Xs_val[std_cols] = scaler.transform(Xs_val[std_cols])
Xs_test[std_cols] = scaler.transform(Xs_test[std_cols])

# display(X_train.head())
# display(Xs_train.head())
# display(X_val.head())
# display(Xs_val.head())
# display(X_test.head())
# display(Xs_test.head())

## Import Predictive Models

In [29]:
models = Path("../models")
dt_model = joblib.load(models / "decision_tree_model.pkl")
kn_model = joblib.load(models / "k_nearest_neighbor_model.pkl")
lr_model = joblib.load(models / "logistic_regression_model.pkl")

In [30]:
dt_model

In [31]:
kn_model

In [32]:
lr_model

In [15]:
display(y_train.sum())
display(y_train.shape[0])

348

576