In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.ensemble import GradientBoostingClassifier
import pickle

# Load dataset
df = pd.read_csv("loan_approval_dataset.csv")

# Features and target
X = df.drop(columns=["loan_id","loan_status"])
y = df["loan_status"]

# Identify categorical and numerical columns
categorical_cols = ['education', 'self_employed']
numerical_cols = [col for col in X.columns if col not in categorical_cols]

# Preprocessing pipeline
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numerical_cols),
        ('cat', OneHotEncoder(handle_unknown="ignore"), categorical_cols)
    ]
)

# Create full pipeline with Gradient Boosting model
pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', GradientBoostingClassifier(random_state=42))
])

# Train/test split and model fitting
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
pipeline.fit(X_train, y_train)

# Save the model (this is what streamlit needs)
with open("gb_model.pkl", "wb") as f:
    pickle.dump(pipeline, f)

print("✅ Model saved successfully as gb_model.pkl")


✅ Model saved successfully as gb_model.pkl
