# Credit Card Approval Prediction using Ensemble Models
This notebook builds a model to predict if a credit card application should be approved based on applicant data and credit history.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, VotingClassifier
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import warnings
warnings.filterwarnings('ignore')

## Load Data

In [None]:
app = pd.read_csv('application_record.csv')
credit = pd.read_csv('credit_record.csv')

## Preprocessing & Merging

In [None]:
bad_status = ['1', '2', '3', '4', '5']
credit['label'] = credit['STATUS'].isin(bad_status).astype(int)
credit_status = credit.groupby('ID')['label'].max().reset_index()
data = app.merge(credit_status, on='ID')

## Feature Engineering

In [None]:
data['AGE'] = (-data['DAYS_BIRTH']) // 365
data['EMPLOYED_YEARS'] = data['DAYS_EMPLOYED'].apply(lambda x: x if x < 0 else -999) // -365
data['EMPLOYED_YEARS'].replace(-999, np.nan, inplace=True)
data.drop(columns=['DAYS_BIRTH', 'DAYS_EMPLOYED', 'ID'], inplace=True)
data['OCCUPATION_TYPE'].fillna('Unknown', inplace=True)
categorical_cols = data.select_dtypes(include='object').columns
data = pd.get_dummies(data, columns=categorical_cols, drop_first=True)

## Train-Test Split

In [None]:
X = data.drop('label', axis=1)
y = data['label']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

## Model Training

In [None]:
rf = RandomForestClassifier(n_estimators=100, random_state=42)
xgb = XGBClassifier(use_label_encoder=False, eval_metric='logloss')
gb = GradientBoostingClassifier(n_estimators=100, random_state=42)
voting = VotingClassifier(estimators=[('rf', rf), ('xgb', xgb), ('gb', gb)], voting='soft')
voting.fit(X_train, y_train)

## Evaluation

In [None]:
y_pred = voting.predict(X_test)
print('Accuracy Score:', accuracy_score(y_test, y_pred))
print('Confusion Matrix:
', confusion_matrix(y_test, y_pred))
print('Classification Report:
', classification_report(y_test, y_pred))