In [2]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

df = pd.read_csv("Bank_Credit_Default.csv")
df.shape, df.info(), df.head()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 32581 entries, 0 to 32580
Data columns (total 12 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   person_age                  32581 non-null  int64  
 1   person_income               32581 non-null  int64  
 2   person_home_ownership       32581 non-null  object 
 3   person_emp_length           31686 non-null  float64
 4   loan_intent                 32581 non-null  object 
 5   loan_grade                  32581 non-null  object 
 6   loan_amnt                   32581 non-null  int64  
 7   loan_int_rate               29465 non-null  float64
 8   loan_status                 32581 non-null  int64  
 9   loan_percent_income         32581 non-null  float64
 10  cb_person_default_on_file   32581 non-null  object 
 11  cb_person_cred_hist_length  32581 non-null  int64  
dtypes: float64(3), int64(5), object(4)
memory usage: 3.0+ MB


((32581, 12),
 None,
    person_age  person_income person_home_ownership  person_emp_length  \
 0          22          59000                  RENT              123.0   
 1          21           9600                   OWN                5.0   
 2          25           9600              MORTGAGE                1.0   
 3          23          65500                  RENT                4.0   
 4          24          54400                  RENT                8.0   
 
   loan_intent loan_grade  loan_amnt  loan_int_rate  loan_status  \
 0    PERSONAL          D      35000          16.02            1   
 1   EDUCATION          B       1000          11.14            0   
 2     MEDICAL          C       5500          12.87            1   
 3     MEDICAL          C      35000          15.23            1   
 4     MEDICAL          C      35000          14.27            1   
 
    loan_percent_income cb_person_default_on_file  cb_person_cred_hist_length  
 0                 0.59                    

In [4]:
df['Age_Group'] = pd.cut(df['person_age'], bins=[18, 30, 45, 60, 80], labels=['18–30','31–45','46–60','60+'])
df['Income_Bucket'] = pd.qcut(df['person_income'], q=4, labels=['Low','Medium','High','Very High'])


In [5]:
from ydata_profiling import ProfileReport
profile = ProfileReport(df, title="Bank Credit Default EDA Report")
profile.to_file("Bank_Credit_Profiling.html")


ModuleNotFoundError: No module named 'ydata_profiling'

In [None]:
sns.histplot(df['Credit_Score'], kde=True, bins=30)


In [None]:
sns.countplot(x='Default_Flag', data=df)


In [None]:
sns.countplot(x='Card_Type', hue='Default_Flag', data=df)


In [None]:
sns.boxplot(x='Default_Flag', y='Credit_Score', data=df)


In [None]:
sns.boxplot(x='Default_Flag', y='Utilization_Rate', data=df)


In [None]:
sns.scatterplot(x='Income', y='Credit_Limit', hue='Default_Flag', data=df)


In [None]:
sns.histplot(df[df['Default_Flag']==1]['Payment_Ratio'], kde=True, color='r')


In [None]:
sns.heatmap(df.corr(), annot=True, cmap='YlGnBu')


In [None]:
sns.barplot(x='Age_Group', y='Default_Flag', data=df)


In [None]:
sns.barplot(x='Region', y='Default_Flag', data=df)


# Banking Analytics EDA — Credit Card Default Risk Analysis

## 1. Business Context
FinWise Bank is experiencing increased defaults in its credit card portfolio. 
This EDA identifies patterns and risk factors driving defaults among customers.

## 2. Objectives
- Explore customer and transactional data
- Identify correlations between behavior and default
- Recommend actions for risk mitigation

## 3. Dataset
5,000 customer records, 18 variables (demographics, credit usage, payment behavior)

## 4. Tools
Python (pandas, seaborn, matplotlib, ydata-profiling)

## 5. Methodology
1. Data cleaning & preprocessing
2. EDA (univariate, bivariate, multivariate)
3. Correlation & behavioral analysis
4. Insight generation & recommendations

## 6. Key Findings
- Default rate: 22.4%
- Credit Score <600 → 3× higher default risk
- Utilization >80% and Payment Ratio <0.3 strongly predict default
- Younger, high-spend users = higher risk
- Digital card users show lower defaults

## 7. Recommendations
- Implement real-time credit risk scoring
- Targeted alerts for high utilization accounts
- Financial education for younger demographics
- Incentivize higher monthly payments

## 8. Deliverables
- Banking_EDA.ipynb
- Bank_Credit_Profiling.html
- Insights_Report.pdf

## 9. Next Steps
- Build credit default prediction model (Logistic Regression)
- Integrate alerts into Power BI risk dashboard
