In [None]:
import sys 
sys.executable  # Display the path to the Python executable ensuring the correct env"				

# Import Libraries

In [2]:
import numpy as np  # For numerical operations and arrays.	
import pandas as pd  # For data manipulation and analysis.	
import matplotlib.pyplot as plt  # For basic plotting.	
import seaborn as sns  # For enhanced plotting.	
from sklearn.preprocessing import StandardScaler  # For creating scaler instances for standardization purposes.
from sklearn.model_selection import train_test_split  # For splitting the data into sets avoiding overfitting.
from sklearn.linear_model import LogisticRegression  # For creating LogisticRegression instances.
from sklearn import metrics  # For evaluating the model
from absenteeism_scripts import *
from sklearn.model_selection import GridSearchCV  # For searching the best parameters over specified parameter values
import joblib  # For saving models

# Load Cleaned New Data, Model and Scaler

In [3]:
# Read CSV Datafile to a DataFrame:
new_data = pd.read_csv('cleaned_new_data.csv')

# Load the saved model and scaler
filename = 'abs_log_model.joblib'
scalername = 'abs_log_scaler.joblib'

model = joblib.load(filename)
scaler = joblib.load(scalername)

In [4]:
pd.options.display.max_columns = None

# Properly Use Saved Scaler

In [5]:
# Choose features to be scaled (these should match the features used when training the model)
scale_not_all = ['Transportation Expense', 'Age', 'Body Mass Index']

In [6]:
# Apply the same scaling to the new data
new_data[scale_not_all] = scaler.transform(new_data[scale_not_all])

# Predictions

In [7]:
# Predict using the loaded model
predictions = model.predict(new_data)

In [8]:
predictions.shape

(40,)

In [9]:
# If needed, you can also get the predicted probabilities
predicted_probabilities = model.predict_proba(new_data)

In [13]:
# Call function to add results in the new data DataFrame:
test_results_df = summary_metrics_on_new_data(new_data_df=new_data, 
                                              predictions=predictions, 
                                              pred_probabilities=predicted_probabilities)
test_results_df.head(10)

Unnamed: 0,Weekday Absence Occurred,Disease Absence,Other Factor Absence,Not-Major Issue Absence,Education,Has 2 Children,Has More than 2 Children,Has 2 Pets,Has More than 2 Pets,Transportation Expense,Age,Body Mass Index,Predictions,Not Extended Absenteeism Probability,Extended Absenteeism Probability
0,5,0,0,1,1,0,0,0,0,-0.689734,-0.971415,-1.81653,0,0.828847,0.171153
1,1,1,0,0,0,0,0,0,1,2.048718,-1.283169,0.079672,1,0.143791,0.856209
2,3,0,0,1,0,1,0,0,0,-1.050848,-0.347907,-0.394378,0,0.637463,0.362537
3,5,0,0,1,1,1,0,0,0,-0.689734,0.587355,-1.105454,0,0.786097,0.213903
4,5,1,0,0,0,1,0,0,0,-1.050848,-0.347907,-0.394378,1,0.370016,0.629984
5,1,1,0,0,0,0,0,1,0,0.002402,-1.283169,-0.631403,1,0.488628,0.511372
6,3,1,0,0,0,1,0,0,0,-1.607567,1.522616,-0.394378,0,0.505511,0.494489
7,3,0,0,1,1,0,0,0,0,-0.689734,-0.971415,-1.81653,0,0.79315,0.20685
8,5,0,0,1,0,0,0,0,0,-1.607567,0.119724,0.316698,0,0.829838,0.170162
9,3,1,0,0,0,0,0,0,0,-1.607567,0.119724,0.316698,0,0.505259,0.494741


# Save the Results

In [15]:
# Export the results:
test_results_df.to_csv('test_results.csv', index=False)