In [1]:
import joblib
from pathlib import Path
import pandas as pd
import httpx
from typing import Tuple

JSON = dict[str: float|str]
ColumnTransformer = "sklearn.ColumnTransformer"
NumpyArray = "np.ndarray"

In [2]:
customer = {
"seniority": 2,
"income": 200,
"assets": 200,
"time": 1,
"amount": 1000,
"monthly_payment": 1000,
"job": "fixed",
"home": "rent",
"records": "no",
"status": 1,
"id": 1
}

def stripping_id(customer: JSON) -> Tuple[str, pd.DataFrame]:
    """
      Strip the 'id' part of an incoming JSON file since the preloaded ColumnTransformer doesn't expects it. 
      It would take whole new ColumnTransformer to train just for sake of 'id', and at this point of development I'm too lazy to fix
      everything. Will make it into consideration next time I develop an app.
      At the same time we need to keep the `id` because after the ColumnTransformer makes it's magic the FastAPI expects an 'id' as a part
      of "base model". Also, ColumnTransformer expects a DataFrame, and hence we convert and return customer dictionary as DataFrame.

    Parameters:
        customer (JSON): Dictionary containing customer data.

    Returns:
        customer_id, customer (Tuple): Tuple containing both the id of a customer and stripped version of customer dictionary as DataFrame.
    """
    
    customer_id = customer.pop('id')  # Removes 'id' key and returns its value
    customer_df = pd.DataFrame([customer])

    return customer_id, customer_df

#Flow of data pt 1
cust_id, cust_df = stripping_id(customer)

In [3]:
cust_df

Unnamed: 0,seniority,income,assets,time,amount,monthly_payment,job,home,records,status
0,2,200,200,1,1000,1000,fixed,rent,no,1


In [4]:
def load_column_transformer(model_path: Path) -> ColumnTransformer:
    """
    Load the ColumnTransformer model from the specified path.

    Parameters:
        model_path (str or Path): Path to the saved ColumnTransformer model.

    Returns:
        object: Loaded ColumnTransformer model.
    """
    # Convert the input path to a Path object
    model_path = Path(model_path)

    # Use the resolved path to handle relative paths
    resolved_model_path = model_path.resolve()
    
    return joblib.load(resolved_model_path)

In [5]:
#Flow of data pt 2 

PATH = '../model/column_transformer.pkl'

ct = load_column_transformer(PATH)

In [6]:
def scale_customer_data(column_transformer: ColumnTransformer, customer_df: pd.DataFrame) -> pd.DataFrame:
    """
    Given the pre-loaded ColumnTransformer use it to properly scale the columns. 
    
    Parameters:
        column_transformer (ColumnTransformer): Preloaded ColumnTransformer.
        customer_df (pd.DataFrame): Expects particular type of DataFrame with particular columns suitable just for this case.
        customer_id (str): ID of a customer. 

    Returns:
        cust_scaled (pd.DataFrame): DataFrame scaled using pre-loaded ColumnTransformer.
    """
    
    
    ct = column_transformer
    cust_df = customer_df

    #Scaling the df using pre-loaded ColumnTransformer
    cust_scaled: numpy.ndarray = ct.transform(cust_df)

    #Loading feature names from ColumnTransformer
    feature_names = ct.get_feature_names_out()

    #Converting the cust_scaled to DataFrame
    cust_df = pd.DataFrame(cust_scaled, columns = feature_names)

    
    return cust_df
    

In [7]:
#Flow of Data pt 3
df_scaled = scale_customer_data(column_transformer=ct, customer_df = cust_df)
df_scaled.head()

Unnamed: 0,scaler__seniority,scaler__income,scaler__assets,scaler__time,scaler__amount,scaler__monthly_payment,passthrough__job,passthrough__home,passthrough__records,passthrough__status
0,-0.732469,0.811583,-0.446586,-3.10081,-0.082021,56.841009,fixed,rent,no,1


In [8]:
def preprocess_scaled_customer_dataframe(scaled_df: pd.DataFrame, customer_id: int) -> JSON:
    """
        Since scaled DataFrame returned from scale_customer_data function returns a DataFrame which has artifacts in column names 
        from ColumnTransformer such as scaler__ and passthrough__ we need to strip it. We also need to drop the unused columns which FastAPI
        doesn't expects such as 'status', and finally we need to convert the DataFrame to a dictionary. 

    Parameters:
        scaled_df (pd.DataFrame): Expects the DataFrame scaled using pre-loaded ColumnTransformer. 
    Returns:
        json (JSON): Dictionary ready for the FastAPI entrypoint.

        
        
    """
    
    #Stripping the misc parts of column names
    scaled_df.columns = scaled_df.columns.str.replace('scaler__', '').str.replace('passthrough__', '')

    #Now we are safe to drop the unused 'status' column
    scaled_df = scaled_df.drop('status', axis=1)

    #Convert the DataFrame to dictionary
    customer_dict = scaled_df.iloc[0].to_dict()

    #Fix the 'id' column to be of type 'str' since FastAPI endpoint expects it that way
    customer_dict['id'] = str(cust_id)

    return customer_dict

In [9]:
df_scaled

Unnamed: 0,scaler__seniority,scaler__income,scaler__assets,scaler__time,scaler__amount,scaler__monthly_payment,passthrough__job,passthrough__home,passthrough__records,passthrough__status
0,-0.732469,0.811583,-0.446586,-3.10081,-0.082021,56.841009,fixed,rent,no,1


In [10]:
#Final part of flow 

dict_final = preprocess_scaled_customer_dataframe(scaled_df = df_scaled, customer_id = cust_id)

In [11]:
dict_final

{'seniority': -0.7324690255666864,
 'income': 0.811583129165432,
 'assets': -0.44658591157252725,
 'time': -3.1008103708931554,
 'amount': -0.08202080613084786,
 'monthly_payment': 56.841009123871764,
 'job': 'fixed',
 'home': 'rent',
 'records': 'no',
 'id': '1'}

In [12]:
def send_request(customer_dict: JSON, url: str = "http://localhost:8000/") -> JSON:
    """
        I don't want to overcomplicate this part and use concurrent programming, since I'm not really sure if Streamlit handles it
        out-of-the-box and how good. I'm going to use a simple httpx function to send a POST request, and receive the proper JSON response.

        Parameters:
            customer_dict (JSON): Expects dictionary that is properly defined as an input to a FastAPI endpoint
            url (str): URL of an FastAPI endpoint. 
    """
    response = httpx.post(url = url, json = customer_dict).json()
    return response