In [8]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/trained-data/synthetic_crosssell_unsupervised_train.csv
/kaggle/input/database-final/database.csv
/kaggle/input/loan-sanction/scikitlearn/default/1/scaler (1).pkl
/kaggle/input/loan-sanction/scikitlearn/default/1/label_encoders (1).pkl
/kaggle/input/sklearn/scikitlearn/default/1/label_encoders.pkl
/kaggle/input/sklearn/scikitlearn/default/1/glm.pkl
/kaggle/input/sklearn/scikitlearn/default/1/lgbm_model.pkl
/kaggle/input/sklearn/scikitlearn/default/1/feature_columns.pkl
/kaggle/input/sklearn/scikitlearn/default/1/scaler.pkl
/kaggle/input/sklearn/scikitlearn/default/1/le.pkl
/kaggle/input/sklearn/scikitlearn/default/1/knn.pkl
/kaggle/input/keras/keras/default/1/config.json
/kaggle/input/keras/keras/default/1/metadata.json
/kaggle/input/keras/keras/default/1/model.weights.h5


In [6]:
import re
import statsmodels.api as sm
import joblib
from tensorflow.keras.models import load_model
import warnings

2026-02-08 18:04:14.275220: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1770573854.541319      55 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1770573854.611767      55 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1770573855.244228      55 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1770573855.244332      55 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1770573855.244335      55 computation_placer.cc:177] computation placer alr

In [9]:
smoothing={'Advertising': np.float64(0.08156557675300376),
 'Agriculture': np.float64(0.10462958124779571),
 'Bank': np.float64(0.05196952252465978),
 'Business Entity Type 1': np.float64(0.08138259729639118),
 'Business Entity Type 2': np.float64(0.08527949334417956),
 'Business Entity Type 3': np.float64(0.09299443087254153),
 'Cleaning': np.float64(0.11039736368358759),
 'Construction': np.float64(0.11674450872003694),
 'Culture': np.float64(0.05605986682408393),
 'Electricity': np.float64(0.06646592520267568),
 'Emergency': np.float64(0.07159173367468184),
 'Government': np.float64(0.06979136625644024),
 'Hotel': np.float64(0.06435172970754985),
 'Housing': np.float64(0.07944989494426169)
 }

In [None]:
### LoanSanction Class

A class that loads the saved LGBM model + label encoders + scaler and performs:

Target encoding for ORGANIZATION_TYPE

Categorical label encoding

Scaling numerical features

Predicting loan sanction result

In [None]:
class LoanSanction:
    def __init__(self, model_path: str, training_data_path: str, smoothing):
        self.model_path = model_path
        self.training_data_path = training_data_path
        self.models = {}
        self.smoothing = smoothing
        self._load_models()
        
    def _load_models(self):
        self.models['label_encoder'] = joblib.load(f"{self.model_path}/label_encoders (1).pkl")
        self.models['Scaler'] = joblib.load(f"{self.model_path}/scaler (1).pkl")
        self.models['lgbm'] = joblib.load(f"{self.model_path}/lgbm_model.pkl")
        
    def _Encode(self, inputs: pd.DataFrame):
        inputs['ORGANIZATION_TYPE_encoded'] = inputs['ORGANIZATION_TYPE'].map(self.smoothing)
        categorical_cols = [c for c in inputs.columns if inputs[c].dtype == 'object']
        for col in categorical_cols:
            inputs[col] = self.models['label_encoder'][col].transform(inputs[col].astype(str))
        return inputs
        
    def _predict(self, d: dict) -> float:
        inputs = pd.DataFrame([d])
        inputs = self._Encode(inputs)
        numerical_cols = [c for c in inputs.columns 
                          if inputs[c].dtype in ['int64','float64'] and c != 'ORGANIZATION_TYPE_encoded']
        inputs[numerical_cols] = self.models['Scaler'].transform(inputs[numerical_cols])
        inputs = inputs.rename(columns=lambda x: re.sub('[^A-Za-z0-9_]+', '', x))
        pred = self.models['lgbm'].predict(inputs)
        return pred


### CustomerInferenceEngine Class

Responsible for:

Cross-sell recommendations

Nearest neighbor lookup

Spend prediction using GLM

Integrating LoanSanction as pre-check

In [None]:
warnings.filterwarnings("ignore")
class CustomerInferenceEngine:
    def __init__(self, model_path: str, training_data_path: str):
        self.model_path = model_path
        self.training_data_path = training_data_path
        self.data_base_path = "/kaggle/input/database-final/database.csv"
        self.models = {}
        self._load_models()
        self._load_training_data()
        self._load_data_base()

    def _load_models(self):
        self.models["encoder"] = load_model(f"{self.model_path}/encoder")
        self.models["scaler"] = joblib.load(f"{self.model_path}/scaler.pkl")
        self.models["knn"] = joblib.load(f"{self.model_path}/knn.pkl")
        self.models["label_encoders"] = joblib.load(f"{self.model_path}/label_encoders.pkl")
        self.models["feature_columns"] = joblib.load(f"{self.model_path}/feature_columns.pkl")
        self.models["glm"] = joblib.load(f"{self.model_path}/glm.pkl")
        self.models["loan_encoder"] = joblib.load(f"{self.model_path}/le.pkl")

    def _load_training_data(self):
        df = pd.read_csv(self.training_data_path)
        if "Unnamed: 0" in df.columns:
            df = df.drop(columns=["Unnamed: 0"])
        self.training_data = df

    def _load_data_base(self):
        df = pd.read_csv(self.data_base_path)
        if "Unnamed: 0" in df.columns:
            df = df.drop(columns=["Unnamed: 0"])
        self.data_base = df

    def _encode_common_features(self, df: pd.DataFrame) -> pd.DataFrame:
        for col in ["PREFERRED_CHANNEL", "LAST_CAMPAIGN_RESPONSE"]:
            if col in df.columns:
                df[col] = self.models["label_encoders"][col].transform(df[col].astype(str))
        return df

    def _find_similar_customers(self, customer_data: pd.DataFrame, k: int = 10):
        
        df = customer_data.drop(columns=["CURRENT_LOAN_OWNED", "Value_12M"], errors="ignore")
        df = self._encode_common_features(df)
        df = df[self.models["feature_columns"]]

        scaled = self.models["scaler"].transform(df)
        embedding = self.models["encoder"].predict(scaled, verbose=0)

        distances, indices = self.models["knn"].kneighbors(embedding, n_neighbors=k)

        neighbors = []
        for i, idx in enumerate(indices[0]):
            row = self.training_data.iloc[int(idx)]
            neighbors.append({
                "rank": i + 1,
                "neighbor_index": int(idx),
                "distance": float(distances[0][i]),
                "similarity_score": float(1 / (distances[0][i] + 1e-6)),
                "product": row.get("CURRENT_LOAN_OWNED")
            })

        return neighbors

    def _recommend_products(self, neighbors):
        product_scores = {}

        for n in neighbors:
            product = n.get("product")
            if pd.notna(product):
                if product not in product_scores:
                    product_scores[product] = {"count": 0, "total_similarity": 0.0}
                product_scores[product]["count"] += 1
                product_scores[product]["total_similarity"] += n["similarity_score"]

        for product, info in product_scores.items():
            info["avg_similarity"] = info["total_similarity"] / info["count"]

        return sorted(product_scores.items(), key=lambda x: x[1]["total_similarity"], reverse=True)

    def _predict_spend(self, customer_data: pd.DataFrame) -> float:
        
        df = customer_data.drop(columns=["CURRENT_LOAN_OWNED", "Value_12M"], errors="ignore")
        df = self._encode_common_features(df)

        df["CURRENT_LOAN_OWNED"] = self.models["loan_encoder"].transform(df["CURRENT_LOAN_OWNED"].astype(str))

        df = df.drop(columns=["MONTHLY_SPEND"], errors="ignore")
        df.insert(0, "const", 1.0)

        glm_cols = list(self.models["glm"].params.index)
        df = df[glm_cols]

        return float(self.models["glm"].predict(df)[0])

    def _predict(self, customer_data: dict) -> dict:
        
        ls = LoanSanction(self.model_path, self.training_data_path, smoothing=None)

        pred = ls._predict(customer_data)

        if pred != 1:
            return {"customer_id": customer_data["SK_ID_CURR"], "loan_approved": False}

        val = customer_data["SK_ID_CURR"]

        if val not in self.data_base['SK_ID_CURR'].values:
            return {"customer_id": val, "loan_approved": True, "error": "Customer not found in database"}

        df = self.data_base[self.data_base["SK_ID_CURR"] == val].copy()

        neighbors = self._find_similar_customers(df)
        recommendations = self._recommend_products(neighbors)
        predicted_spend = self._predict_spend(df)

        return {
            "customer_id": customer_data["SK_ID_CURR"],
            "current_product": customer_data.get("CURRENT_LOAN_OWNED"),
            "predicted_monthly_spend": predicted_spend,
            "neighbors": neighbors[:5],
            "recommendations": recommendations[:5],
        }
