In [23]:
# # run once only
# os.chdir("..")

# Configuration

In [62]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class ModelInferenceConfig:
    """
    Data class for storing model inference configuration.

    Attributes:
        registered_model_name (str): The name of the registered model.
        version (int): The version number of the model.
    """
    registered_model_name: str
    version: int

In [49]:
# src/config/configuration_manager.py
from src.utils.common import read_yaml, create_directories
class ConfigurationManager:
    """
    Prepare ConfigurationManager class.
    
    This class is responsible for reading configuration files and preparing
    configuration settings for the pipeline.

    Attributes:
        config (dict): Parsed configuration file content.
        params (dict): Parsed parameters file content.
        schema (dict): Parsed schema file content.
    """
    def __init__(
        self,
        config_filepath: str = CONFIG_FILE_PATH, 
        params_filepath: str = PARAMS_FILE_PATH, 
        schema_filepath: str = SCHEMA_FILE_PATH
    ):
        """
        Initialize the ConfigurationManager with file paths.

        Args:
            config_filepath (str): File path to the configuration YAML file.
            params_filepath (str): File path to the parameters YAML file.
            schema_filepath (str): File path to the schema YAML file.
        """
        self.config = read_yaml(Path(config_filepath))
        self.params = read_yaml(Path(params_filepath))
        self.schema = read_yaml(Path(schema_filepath))
        create_directories([self.config.artifacts_root])

    def get_model_inference_config(self) -> ModelInferenceConfig:
        """
        Get configuration for model inference.
        
        Returns:
            ModelInferenceConfig: Configuration for model inference.
        """
        config = self.config.model_inference

        create_directories([config.root_dir])

        model_inference_config = ModelInferenceConfig(
            registered_model_name=config.mlflow.registered_model_name,
            version=config.mlflow.version,
        )
        return model_inference_config


In [50]:
configuration_manager = ConfigurationManager()
configuration_manager.get_model_inference_config()

2024-06-11 09:07:11,715 - credit-scorecard-logger - INFO - yaml file: config.yaml loaded successfully
2024-06-11 09:07:11,715 - credit-scorecard-logger - INFO - yaml file: params.yaml loaded successfully
2024-06-11 09:07:11,729 - credit-scorecard-logger - INFO - yaml file: schema.yaml loaded successfully
2024-06-11 09:07:11,729 - credit-scorecard-logger - INFO - Created directory at: artifacts
2024-06-11 09:07:11,729 - credit-scorecard-logger - INFO - Created directory at: models/


ModelInferenceConfig(registered_model_name='credit-score-model', version=1)

Model Inference

In [32]:
# Set up environment variables:
from dotenv import load_dotenv, find_dotenv
load_dotenv(find_dotenv())

True

In [58]:
import joblib
import numpy as np
import pandas as pd
from typing import Union
from src.utils.common import logger
# from src.entities.config_entity import ModelInferenceConfig
from sklearn.base import BaseEstimator


class ModelInference:
    """
    A class used to perform model inference using a pre-trained model.

    This class is responsible for loading a model from a specified path and
    providing methods to make predictions on input data.

    Attributes:
        config (ModelInferenceConfig): Configuration for model inference.
        model: The loaded machine learning model.
    """

    def __init__(self, config: ModelInferenceConfig):
        """
        Initialize the ModelInference with a configuration.

        Args:
            config (ModelInferenceConfig): The configuration containing paths for model inference.
        """
        self.config = config
        self.model = self.get_model(self.config.registered_model_name, self.config.version)

    def get_model(self, model_name: str, version: int) -> BaseEstimator:
        try:
            mlflow.set_tracking_uri(os.getenv('MLFLOW_TRACKING_URI'))
            model = mlflow.sklearn.load_model(f"models:/{model_name}/{version}")
            return model
        except Exception as e:
            logger.error(e) 

    def predict(self, data: Union[pd.DataFrame, np.ndarray, np.array]) -> np.array:
        """
        Make predictions on input data.

        Args:
            data Union[pd.DataFrame, np.ndarray, np.array]: Preprocessed input data for which predictions are to be made.

        Returns:
            np.array: The predicted values.
        """
        logger.info("Predict")
        prediction = self.model.predict(data)
        return prediction

    def predict_proba(
        self, data: Union[pd.DataFrame, np.ndarray, np.array]
    ) -> np.array:
        """
        Make probability predictions on input data.

        Args:
            data Union[pd.DataFrame, np.ndarray, np.array]: Preprocessed input data for which probability predictions are to be made.

        Returns:
            np.array: The predicted probabilities.
        """
        logger.info("Predict probabilities")
        prediction = self.model.predict_proba(data)
        return prediction[:, -1]
    
    def score(
        self, data: Union[pd.DataFrame, np.ndarray, np.array]
    ) -> np.array:
        """
        Give credit scores on input data.

        Args:
            data Union[pd.DataFrame, np.ndarray, np.array]: Preprocessed input data for which credit scores are to be made.

        Returns:
            np.array: The credit scores.
        """
        logger.info("Get credit score")
        prediction = self.model.score(data)
        return prediction


In [61]:
try:
    configuration_manager = ConfigurationManager()
    model_inference = ModelInference(configuration_manager.get_model_inference_config())
    test = pd.read_csv("artifacts/data_preprocessing/test.csv")
    X_test, y_test = test.drop(columns=['loan_status']), test['loan_status']
    display(model_inference.predict(X_test))
except Exception as e:
    logger.error(e)

2024-06-11 09:10:10,447 - credit-scorecard-logger - INFO - yaml file: config.yaml loaded successfully
2024-06-11 09:10:10,447 - credit-scorecard-logger - INFO - yaml file: params.yaml loaded successfully
2024-06-11 09:10:10,447 - credit-scorecard-logger - INFO - yaml file: schema.yaml loaded successfully
2024-06-11 09:10:10,447 - credit-scorecard-logger - INFO - Created directory at: artifacts
2024-06-11 09:10:10,447 - credit-scorecard-logger - INFO - Created directory at: models/


Downloading artifacts: 100%|██████████| 9/9 [00:00<00:00, 17.91it/s]

2024-06-11 09:10:12,553 - credit-scorecard-logger - INFO - Predict





array([0, 0, 0, ..., 0, 0, 0], dtype=int64)

In [33]:
import mlflow
# get last experiment id
mlflow.set_tracking_uri(os.getenv('MLFLOW_TRACKING_URI'))

In [35]:
configuration_manager = ConfigurationManager()
inference_config = configuration_manager.get_model_inference_config()

2024-06-11 08:59:10,177 - credit-scorecard-logger - INFO - yaml file: config.yaml loaded successfully
2024-06-11 08:59:10,197 - credit-scorecard-logger - INFO - yaml file: params.yaml loaded successfully
2024-06-11 08:59:10,197 - credit-scorecard-logger - INFO - yaml file: schema.yaml loaded successfully
2024-06-11 08:59:10,197 - credit-scorecard-logger - INFO - Created directory at: artifacts
2024-06-11 08:59:10,197 - credit-scorecard-logger - INFO - Created directory at: models/


In [40]:
# Load model:
model = load_model(inference_config.registered_model_name, inference_config.version)

# Test model:
test = pd.read_csv("artifacts/data_preprocessing/test.csv")
X_test, y_test = test.drop(columns=['loan_status']), test['loan_status']
model.score(X_test)

Downloading artifacts: 100%|██████████| 9/9 [00:00<00:00, 15.97it/s]


array([572.08448915, 583.40533029, 625.106093  , ..., 577.6666715 ,
       556.21258813, 600.6620341 ])

In [41]:
X_test.iloc[0].to_dict()


{'person_age': 22,
 'person_income': 50000,
 'person_home_ownership': 'RENT',
 'person_emp_length': 6.0,
 'loan_intent': 'PERSONAL',
 'loan_grade': 'B',
 'loan_amnt': 6000,
 'loan_int_rate': 11.89,
 'loan_percent_income': 0.12,
 'cb_person_default_on_file': 'N',
 'cb_person_cred_hist_length': 2}

# API Test
The API script is developed using FastAPI and it is available in `app.py`

In [11]:
import requests

ENDPOINT="http://127.0.0.1:8000/credit-score"
input = {
    'person_age': 22,
    'person_income': 50000,
    'person_home_ownership': 'RENT',
    'person_emp_length': 6.0,
    'loan_intent': 'PERSONAL',
    'loan_grade': 'B',
    'loan_amnt': 6000,
    'loan_int_rate': 11.89,
    'loan_percent_income': 0.12,
    'cb_person_default_on_file': 'N',
    'cb_person_cred_hist_length': 2
}
prediction = requests.post(
    url=ENDPOINT,
    json=input,
    headers={"Content-Type": "application/json"}
)

prediction #.json()

<Response [500]>