In [14]:
import pandas as pd
import joblib
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

# üîπ Load your training data to fit encoders
df = pd.read_csv("E:\Personalized Financial Offer Recommendation System\Learning\decision tree\loan_prediction_datset\loan_data.csv")  # üëà use your original dataset file (same one used in setup)
X = df.drop('loan_status', axis=1)
y = df['loan_status']

# üîπ Define categorical and numeric columns (based on your df)
cat_cols = [
    'person_gender', 'person_education', 'person_home_ownership',
    'loan_intent', 'previous_loan_defaults_on_file'
]
num_cols = [
    'person_age', 'person_income', 'person_emp_exp', 'loan_amnt',
    'loan_int_rate', 'loan_percent_income',
    'cb_person_cred_hist_length', 'credit_score'
]

# üîπ Create the preprocessing transformer
categorical_transformer = OneHotEncoder(drop='first', handle_unknown='ignore')

preprocessor = ColumnTransformer(
    transformers=[
        ('cat', categorical_transformer, cat_cols)
        # no numeric scaler because PyCaret didn't normalize
    ],
    remainder='passthrough'  # keep numeric columns as they are
)

# üîπ Load your trained Decision Tree model
dt_model = joblib.load("decision_tree_model.pkl")

# üîπ Combine into one pipeline
full_pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('model', dt_model)
])

# üîπ Fit the preprocessor on your data
full_pipeline.fit(X, y)

# üîπ Save for deployment
joblib.dump(full_pipeline, "loan_approval_pipeline.pkl")

print("‚úÖ Final pipeline saved as loan_approval_pipeline.pkl")


‚úÖ Final pipeline saved as loan_approval_pipeline.pkl


In [15]:
import pandas as pd
import joblib

# Load the full pipeline
pipeline = joblib.load("loan_approval_pipeline.pkl")

# üîπ Sample test cases (raw input)
test_cases = [
    {
        "person_age": 25,
        "person_gender": "male",
        "person_education": "Bachelor",
        "person_income": 45000,
        "person_emp_exp": 3,
        "person_home_ownership": "RENT",
        "loan_amnt": 8000,
        "loan_intent": "PERSONAL",
        "loan_int_rate": 12.5,
        "loan_percent_income": 0.18,
        "cb_person_cred_hist_length": 4,
        "credit_score": 670,
        "previous_loan_defaults_on_file": "No"
    },
    {
        "person_age": 42,
        "person_gender": "female",
        "person_education": "Master",
        "person_income": 120000,
        "person_emp_exp": 15,
        "person_home_ownership": "MORTGAGE",
        "loan_amnt": 25000,
        "loan_intent": "VENTURE",
        "loan_int_rate": 9.8,
        "loan_percent_income": 0.20,
        "cb_person_cred_hist_length": 12,
        "credit_score": 710,
        "previous_loan_defaults_on_file": "No"
    },
    {
        "person_age": 30,
        "person_gender": "female",
        "person_education": "High School",
        "person_income": 28000,
        "person_emp_exp": 2,
        "person_home_ownership": "OWN",
        "loan_amnt": 10000,
        "loan_intent": "EDUCATION",
        "loan_int_rate": 14.2,
        "loan_percent_income": 0.36,
        "cb_person_cred_hist_length": 3,
        "credit_score": 580,
        "previous_loan_defaults_on_file": "Yes"
    },
    {
        "person_age": 35,
        "person_gender": "male",
        "person_education": "Bachelor",
        "person_income": 75000,
        "person_emp_exp": 10,
        "person_home_ownership": "RENT",
        "loan_amnt": 15000,
        "loan_intent": "DEBTCONSOLIDATION",
        "loan_int_rate": 10.0,
        "loan_percent_income": 0.22,
        "cb_person_cred_hist_length": 8,
        "credit_score": 690,
        "previous_loan_defaults_on_file": "No"
    }
]

# Convert to DataFrame
test_df = pd.DataFrame(test_cases)

# üîπ Run predictions
predictions = pipeline.predict(test_df)

# Show results
for i, p in enumerate(predictions):
    print(f"Applicant {i+1}: {'‚úÖ Approved' if p == 1 else '‚ùå Rejected'}")


Applicant 1: ‚úÖ Approved
Applicant 2: ‚ùå Rejected
Applicant 3: ‚ùå Rejected
Applicant 4: ‚ùå Rejected


In [13]:
proba = pipeline.predict_proba(test_df)
print(proba)


[[0.40859773 0.59140227]
 [0.98737601 0.01262399]
 [1.         0.        ]
 [0.62188244 0.37811756]]
