# Running ModelScan on a XGBoost Model

## Import statements

In [None]:
import pickle
from pathlib import Path
import os
import numpy as np
from utils.pickle_codeinjection import generate_unsafe_file
from utils.xgboost_diabetes_model import train_model, get_predictions

## Download and save the model

The model is trained on a diabetes dataset, and predicts whether a person has diabetes or not (https://www.kaggle.com/datasets/uciml/pima-indians-diabetes-database). The model is saved at ./XGBoostModels/safe_model.pkl

In [None]:
model_directory = os.path.join(os.getcwd(), "XGBoostModels")
if not os.path.isdir(model_directory):
    os.mkdir(model_directory)

safe_model_path_pickle = os.path.join(model_directory, "safe_model.pkl")
model = train_model()
with open(safe_model_path_pickle, "wb") as fo:
    pickle.dump(model, fo)

## Run the model

Run the safe model to verify that it has been downloaded correctly.

In [None]:
number_of_predictions = 3
get_predictions(number_of_predictions, model)

## Run ModelScan on the safe model

Now run the Modelscan tool using the modelscan command. Remember that we installed modelscan in our virtualenv. 

**The scan results include information on the files scanned, and any issues if found. For the safe model scanned, modelscan finds no model serialization attacks.**

In [None]:
!modelscan -p XGBoostModels/safe_model.pkl

## Model Serialization Attack

Here code is injected in the safe model to read aws secret keys. The unsafe model is saved at `./XGBoostModels/unsafe_model.pkl`

In [None]:
command = "system"
malicious_code = """cat ~/.aws/secrets
    """

In [None]:
with open(safe_model_path_pickle, "rb") as fo:
    safe_model_pickle = pickle.load(fo)

unsafe_model_path = os.path.join(model_directory, "unsafe_model.pkl")
generate_unsafe_file(model, command, malicious_code, unsafe_model_path)

## Run the unsafe model

The malicious code injected in the unsafe model gets executed when it is loaded. The aws secret keys are displayed.

Also, the unsafe model predicts the clothing items just as well as safe model i.e., the code injection attack will not impact the model performance. The unaffected performance of unsafe models makes the ML models an effective attack vector.

In [None]:
with open(unsafe_model_path, "rb") as fo:
    unsafe_model = pickle.load(fo)

get_predictions(number_of_predictions, unsafe_model)

## Run ModelScan on the unsafe model

The scan results include information on the files scanned, and any issues if found. In this case, a critical severity level issue is found in the unsafe model scanned.

modelscan also outlines the found operator(s) and module(s) deemed unsafe.

In [None]:
!modelscan -p XGBoostModels/unsafe_model.pkl

## Change the reporting format of output

This will save the scan results in file: xgboost-model-scan-results.json

In [None]:
!modelscan --path  XGBoostModels/unsafe_model.pkl -r json -o xgboost-model-scan-results.json