# Testing the ONNX model
This notebook tests the ONNX model exported from the Scikit-learn model.

In [1]:
%load_ext autoreload
%autoreload 2

from app.util import ONNXRunner
from stroke_prediction.config import PROJ_ROOT

runner = ONNXRunner(
    model_path=PROJ_ROOT / "models" / "model.onnx"
)

Prepare test data

In [2]:
import pandas as pd

from stroke_prediction.config import PROCESSED_DATA_DIR

test_data = pd.read_parquet(PROCESSED_DATA_DIR / "test-stroke-data.parquet")

X_test = test_data.drop(columns=["stroke"])
y_test = test_data["stroke"]

test_sample = X_test
test_sample

Unnamed: 0,ever_married_Yes,work_type_Never_worked,work_type_Private,work_type_Self-employed,work_type_children,smoking_status_formerly smoked,smoking_status_never smoked,smoking_status_smokes,age,hypertension,heart_disease,avg_glucose_level,bmi
0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,79.0,0.0,0.0,65.59,28.1
1,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,45.0,0.0,0.0,90.35,22.3
2,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,2.0,0.0,0.0,94.92,20.4
3,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,49.0,0.0,0.0,125.63,57.2
4,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,50.0,0.0,0.0,120.44,30.3
...,...,...,...,...,...,...,...,...,...,...,...,...,...
762,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,54.0,0.0,0.0,206.52,35.4
763,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,78.0,0.0,0.0,243.50,26.1
764,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,64.0,0.0,0.0,239.64,34.6
765,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,10.0,0.0,0.0,117.03,21.1


In [3]:
test_sample.iloc[101,:]

ever_married_Yes                   1.00
work_type_Never_worked             0.00
work_type_Private                  0.00
work_type_Self-employed            0.00
work_type_children                 0.00
smoking_status_formerly smoked     0.00
smoking_status_never smoked        0.00
smoking_status_smokes              0.00
age                               62.00
hypertension                       0.00
heart_disease                      0.00
avg_glucose_level                 73.44
bmi                               23.40
Name: 101, dtype: float64

In [23]:
from app.util import load_preprocessor

test = [
    {
    "age": 62,
    "gender": "Male",
    "work_type": "Govt_job",
    "residence_type": "Urban",
    "hypertension": False,
    "heart_disease": False,
    "ever_married": False,
    "avg_glucose_level": 73.44,
    "bmi": 23.4,
    "smoking_status": "Unknown"
  }
]

preprocessor = load_preprocessor(PROJ_ROOT / "models" / "preprocessor.pkl")
test = pd.DataFrame(test)
preprocessor.set_output(transform="pandas")
test = preprocessor.transform(test)
test[['ever_married', 'work_type_Never_worked', 'work_type_Private',
       'work_type_Self-employed', 'work_type_children',
       'smoking_status_formerly smoked', 'smoking_status_never smoked',
       'smoking_status_smokes', 'age', 'hypertension', 'heart_disease',
       'avg_glucose_level', 'bmi']]

Unnamed: 0,ever_married,work_type_Never_worked,work_type_Private,work_type_Self-employed,work_type_children,smoking_status_formerly smoked,smoking_status_never smoked,smoking_status_smokes,age,hypertension,heart_disease,avg_glucose_level,bmi
0,False,0.0,0.0,0.0,0.0,0.0,0.0,0.0,62,False,False,73.44,23.4


In [17]:
y_pred = runner.predict(test)
y_pred

0    1
Name: output_label, dtype: int64

Run the model

In [None]:
y_pred = runner.predict(test_sample)
y_pred

0      0
1      0
2      0
3      0
4      0
      ..
762    0
763    0
764    0
765    0
766    0
Name: output_label, Length: 767, dtype: int64

Check the model metrics

In [5]:
from sklearn.metrics import classification_report

print(classification_report(y_test, y_pred, target_names=["No Stroke", "Stroke"]))

              precision    recall  f1-score   support

   No Stroke       0.96      0.98      0.97       729
      Stroke       0.22      0.13      0.16        38

    accuracy                           0.93       767
   macro avg       0.59      0.55      0.56       767
weighted avg       0.92      0.93      0.93       767

