# Model Inference Pipeline Development
- Author: Marcellinus Aditya Witarsah
- Date: 06 June 2024

In [1]:
# Imports
%load_ext autoreload
%autoreload 2
%matplotlib inline
import pandas as pd
import polars as pl
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import gc
import logging
import time
import pickle
import os
from pathlib import Path
from abc import ABC
from abc import abstractmethod
from scipy import stats
from typing import Tuple
from typing import Union
from dataclasses import dataclass
from src.utils.common import logger
from src.utils.common import read_yaml, create_directories
from src.constants import CONFIG_FILE_PATH, SCHEMA_FILE_PATH, PARAMS_FILE_PATH
from sklearn.linear_model import LogisticRegression
from sklearn.base import BaseEstimator
# from optbinning import Scorecard
# from optbinning import BinningProcess

In [2]:
# # run once only
# os.chdir("..")

# Configuration

In [None]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class ModelInferenceConfig:
    """
    Data class for storing model inference configuration.

    Attributes:
        root_dir (str): Root directory for model inference.
        model_path (str): Path to save the trained model.
        train_data_path (str): Path to the inference data.
        test_data_path (str): Path to the test data.
        experiment_name (str): Name of the experiment.
        registered_model_name (str): Model name.
        target_column (str): The name of the target column.
        binning_process (dict): Configuration for the binning process.
        logistic_regression (dict): Configuration for logistic regression.
        scorecard (dict): Configuration for the scorecard.
    """
    root_dir: str
    model_path: str
    train_data_path: str
    test_data_path: str
    experiment_name: str
    registered_model_name: str
    target_column: str
    binning_process: dict
    logistic_regression: dict
    scorecard: dict

1. Find way to deploy model from dagshub: using docker and fetch API request.
2. Using request to get model inference

In [2]:
# Set up environment variables:
from dotenv import load_dotenv, find_dotenv
load_dotenv(find_dotenv())

True

In [3]:
import mlflow
# get last experiment id
mlflow.set_tracking_uri(os.getenv('MLFLOW_TRACKING_URI'))

In [4]:
client = mlflow.MlflowClient()
for model in client.search_registered_models(filter_string="name LIKE '%'"):
    for model_version in model.latest_versions:
        print(f"name={model_version.name}; run_id={model_version.run_id}; version={model_version.version}, stage={model_version.current_stage}")

name=credit-score-model; run_id=5c0e9a3c75e647439c3fd12b407d4c23; version=1, stage=None
name=WeightOfEvidence+LogisticRegression; run_id=e6157b63b09b446399ca05ea50f9c438; version=3, stage=None
name=woe-lr; run_id=d79bc9c897f042b1b81dee446f38fbd2; version=1, stage=None


In [5]:
def load_model(model_name: str, version: int) -> BaseEstimator:
    try:
        mlflow.set_tracking_uri(os.getenv('MLFLOW_TRACKING_URI'))
        model = mlflow.sklearn.load_model(f"models:/{model_name}/{version}")
        return model
    except Exception as e:
        logger.error(e) 

In [6]:
# Load model:
model = load_model("credit-score-model", 1)

# Test model:
test = pd.read_csv("../artifacts/data_preprocessing/test.csv")
X_test, y_test = test.drop(columns=['loan_status']), test['loan_status']
model.score(X_test)

  from .autonotebook import tqdm as notebook_tqdm
Downloading artifacts: 100%|██████████| 9/9 [00:00<00:00, 13.88it/s]
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


array([572.08448915, 583.40533029, 625.106093  , ..., 577.6666715 ,
       556.21258813, 600.6620341 ])

In [19]:
X_test.iloc[0].to_dict()


{'person_age': 22,
 'person_income': 50000,
 'person_home_ownership': 'RENT',
 'person_emp_length': 6.0,
 'loan_intent': 'PERSONAL',
 'loan_grade': 'B',
 'loan_amnt': 6000,
 'loan_int_rate': 11.89,
 'loan_percent_income': 0.12,
 'cb_person_default_on_file': 'N',
 'cb_person_cred_hist_length': 2}

In [11]:
import requests

ENDPOINT="http://127.0.0.1:8000/credit-score"
input = {
    'person_age': 22,
    'person_income': 50000,
    'person_home_ownership': 'RENT',
    'person_emp_length': 6.0,
    'loan_intent': 'PERSONAL',
    'loan_grade': 'B',
    'loan_amnt': 6000,
    'loan_int_rate': 11.89,
    'loan_percent_income': 0.12,
    'cb_person_default_on_file': 'N',
    'cb_person_cred_hist_length': 2
}
prediction = requests.post(
    url=ENDPOINT,
    json=input,
    headers={"Content-Type": "application/json"}
)

prediction #.json()

<Response [500]>

# Configuration

In [26]:
from dotenv import load_dotenv, find_dotenv
load_dotenv(find_dotenv())

# src/entities/config_entity.py
@dataclass(frozen=True)
class ModelInferenceConfig:
    """
    Dataclass for storing model inference configuration.

    This class provides a type-safe way to store configuration parameters for model inference, 
    ensuring that the specified attributes are immutable once set.

    Attributes:
        root_dir (Path): The root directory for model inference artifacts.
        model_path (Path): The path to the model file.
    """
    root_dir: Path
    model_path: Path


# src/config/configuration_manager.py
class ConfigurationManager:
    """
    Class to manage and prepare configuration settings for the pipeline.

    This class is responsible for reading configuration files and preparing
    configuration settings for the pipeline.

    Attributes:
        config (dict): Parsed configuration file content.
        params (dict): Parsed parameters file content.
        schema (dict): Parsed schema file content.
    """
    def __init__(
        self,
        config_filepath: str = CONFIG_FILE_PATH, 
        params_filepath: str = PARAMS_FILE_PATH, 
        schema_filepath: str = SCHEMA_FILE_PATH
    ):
        """
        Initialize the ConfigurationManager with file paths.

        Args:
            config_filepath (str): File path to the configuration YAML file.
            params_filepath (str): File path to the parameters YAML file.
            schema_filepath (str): File path to the schema YAML file.
        """
        self.config = read_yaml(Path(config_filepath))
        self.params = read_yaml(Path(params_filepath))
        self.schema = read_yaml(Path(schema_filepath))
        create_directories([self.config.artifacts_root])

    def get_model_inference_config(self):
        """
        Get the configuration for model inference.

        This method reads the model inference configuration from the config 
        file and prepares the directories required for model inference.

        Returns:
            ModelInferenceConfig: An instance of ModelInferenceConfig containing 
            the root directory and model path for model inference.
        """
        config = self.config.model_inference

        create_directories([config.root_dir])
        
        model_inference_config = ModelInferenceConfig(
            root_dir = Path(config.root_dir),
            model_path = Path(config.model_path),
        )
        
        return model_inference_config

In [23]:
configuration_manager = ConfigurationManager()
configuration_manager.get_model_inference_config()

2024-06-06 09:42:17,862 - credit-scorecard-logger - INFO - yaml file: config.yaml loaded successfully
2024-06-06 09:42:17,862 - credit-scorecard-logger - INFO - yaml file: params.yaml loaded successfully
2024-06-06 09:42:17,862 - credit-scorecard-logger - INFO - yaml file: schema.yaml loaded successfully
2024-06-06 09:42:17,862 - credit-scorecard-logger - INFO - Created directory at: artifacts
2024-06-06 09:42:17,862 - credit-scorecard-logger - INFO - Created directory at: models/


ModelInferenceConfig(root_dir=WindowsPath('models'), model_path=WindowsPath('models/model.joblib'))

# Model Evaluation

In [27]:
import joblib
class ModelInference:
    """
    A class used to perform model inference using a pre-trained model.

    This class is responsible for loading a model from a specified path and
    providing methods to make predictions on input data.

    Attributes:
        config (ModelInferenceConfig): Configuration for model inference.
        model: The loaded machine learning model.
    """
    def __init__(self, config: ModelInferenceConfig):
        """
        Initialize the ModelInference with a configuration.

        Args:
            config (ModelInferenceConfig): The configuration containing paths for model inference.
        """
        self.config = config
        self.model = self.get_model()
        
    def get_model(self):
        """
        Load the model from the file specified in the configuration.

        This method reads the model file from the path specified in the config
        and loads it into memory.

        Returns:
            model: The loaded machine learning model.
        """
        logger.info("Load model")
        model = None        
        with open(self.config.model_path, 'rb') as f:
            model = joblib.load(f)
        return model
    
    def predict(self, data: np.array) -> np.array:
        """
        Make predictions on input data.

        Args:
            data (np.array): Preprocessed input data for which predictions are to be made.
        
        Returns:
            np.array: The predicted values.
        """
        logger.info("Predict")
        prediction = self.model.predict(data)
        return prediction
    
    def predict_proba(self, data: np.array) -> np.array:
        """
        Make probability predictions on input data.

        Args:
            data (np.array): Preprocessed input data for which probability predictions are to be made.
        
        Returns:
            np.array: The predicted probabilities.
        """
        logger.info("Predict probabilities")
        prediction = self.model.predict_proba(data)
        return prediction[:, -1]


In [28]:
try:
    configuration_manager = ConfigurationManager()
    model_inference = ModelInference(configuration_manager.get_model_inference_config())
    data = pd.read_csv("artifacts/data_preprocessing/test.csv")
    X = data.drop(columns=['loan_status'])
    prediction = model_inference.predict_proba(X)
    print(prediction)
except Exception as e:
    logger.error(e)

2024-06-06 09:46:53,736 - credit-scorecard-logger - INFO - yaml file: config.yaml loaded successfully
2024-06-06 09:46:53,736 - credit-scorecard-logger - INFO - yaml file: params.yaml loaded successfully
2024-06-06 09:46:53,736 - credit-scorecard-logger - INFO - yaml file: schema.yaml loaded successfully
2024-06-06 09:46:53,736 - credit-scorecard-logger - INFO - Created directory at: artifacts
2024-06-06 09:46:53,736 - credit-scorecard-logger - INFO - Created directory at: models/
2024-06-06 09:46:53,736 - credit-scorecard-logger - INFO - Load model
2024-06-06 09:46:53,779 - credit-scorecard-logger - INFO - Predict probabilities
[0.07598039 0.05261978 0.01292163 ... 0.06346361 0.12475196 0.02963603]


# Testing
Restart and run again

In [1]:
import os
os.chdir("..")

In [2]:
import numpy as np
import pandas as pd
from src.utils.common import logger
from src.config.configuration_manager import ConfigurationManager
from src.models.model_inference import ModelInference
from typing import Union

class ModelInferencePipeline:
    """
    A pipeline class for running model inference.

    This class is responsible for setting up the configuration and 
    model inference components and running predictions on input data.

    Attributes:
        configuration_manager (ConfigurationManager): Manages the configuration settings.
        model_inference_config (ModelInferenceConfig): Configuration for model inference.
        model_inference (ModelInference): Instance of ModelInference to make predictions.
    """
    def __init__(self):
        """
        Instantiate the ModelInferencePipeline class.
        """
        self.configuration_manager = ConfigurationManager()
        self.model_inference_config = (
            self.configuration_manager.get_model_inference_config()
        )
        self.model_inference = ModelInference(self.model_inference_config)

    def run(self, data: Union[pd.DataFrame, np.array]) -> np.array:
        """
        Run the model inference pipeline on input data.

        Args:
            data (Union[pd.DataFrame, np.array]): The input data for prediction.
        
        Returns:
            np.array: The predicted probabilities.
        """
        prediction = self.model_inference.predict_proba(data)
        return prediction


In [3]:
data = pd.read_csv("artifacts/data_preprocessing/test.csv")
model_inference_pipeline = ModelInferencePipeline()
model_inference_pipeline.run(data)

2024-06-06 09:57:21,813 - credit-scorecard-logger - INFO - yaml file: config.yaml loaded successfully
2024-06-06 09:57:21,862 - credit-scorecard-logger - INFO - yaml file: params.yaml loaded successfully
2024-06-06 09:57:21,866 - credit-scorecard-logger - INFO - yaml file: schema.yaml loaded successfully
2024-06-06 09:57:21,867 - credit-scorecard-logger - INFO - Created directory at: artifacts
2024-06-06 09:57:21,869 - credit-scorecard-logger - INFO - Created directory at: models/
2024-06-06 09:57:21,870 - credit-scorecard-logger - INFO - Load model
2024-06-06 09:57:24,069 - credit-scorecard-logger - INFO - Predict probabilities


array([0.07598039, 0.05261978, 0.01292163, ..., 0.06346361, 0.12475196,
       0.02963603])