<a href="https://colab.research.google.com/github/sush0677/predictive-analysis/blob/main/predictive_analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# ***Step 1: Importing Libraries***

In [1]:
# Data manipulation and analysis
import pandas as pd

# Machine Learning - Model Selection & Evaluation
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Machine Learning - Preprocessing
from sklearn.preprocessing import StandardScaler

# Machine Learning - Models
from sklearn.ensemble import RandomForestClassifier, StackingClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
import xgboost as xgb

# Visualization
import matplotlib.pyplot as plt
import seaborn as sns


# ***Step 2: Load and Preprocess Data***

In [3]:
# Load dataset
df = pd.read_csv('/content/loan_default_prediction_100k.csv')

# Preprocessing
X = df.drop('Default', axis=1)  # Assuming 'Default' is your target variable
y = df['Default']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Feature scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


# ***Step 3: Define Base Models and Meta-model***

In [4]:
# Base models
base_models = [
    ('rf', RandomForestClassifier(n_estimators=100, random_state=42)),
    ('xgb', xgb.XGBClassifier(n_estimators=100, use_label_encoder=False, eval_metric='logloss', random_state=42)),
    ('lr', LogisticRegression(max_iter=1000))
]

# Meta-model
meta_model = GradientBoostingClassifier(n_estimators=50, learning_rate=0.1, random_state=42)


# ***Step 4: Define and Train Stacking Ensemble***

In [5]:
# Stacking ensemble
stacking_model = StackingClassifier(estimators=base_models, final_estimator=meta_model, cv=5)

# Training
stacking_model.fit(X_train_scaled, y_train)


# ***Step 5: Predictions and Evaluation***

In [6]:
# Predictions
y_pred = stacking_model.predict(X_test_scaled)

# Evaluation
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy*100:.2f}%')
print(classification_report(y_test, y_pred))

# Confusion Matrix
cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(cm)


Accuracy: 50.02%
              precision    recall  f1-score   support

           0       0.50      0.21      0.30      9960
           1       0.50      0.78      0.61     10040

    accuracy                           0.50     20000
   macro avg       0.50      0.50      0.46     20000
weighted avg       0.50      0.50      0.46     20000

Confusion Matrix:
[[2126 7834]
 [2162 7878]]
