In [None]:
"""
Banking Analytics Dashboard - Data Analysis Notebook
Author: Your Name
Date: YYYY-MM-DD

Objective:
- Perform data cleaning & preprocessing
- Create new features for analysis
- Conduct exploratory data analysis (EDA)
- Prepare clean dataset for Power BI dashboard
"""

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Load dataset
df = pd.read_excel("D:\data analytics\banking-analytics-dashboard\data")
df.head()


In [None]:
# Handle missing values
df.fillna(0, inplace=True)

# Convert date column
df['Joined Bank'] = pd.to_datetime(df['Joined Bank'], format='%d-%m-%Y')

# Create Income Band
bins = [0, 100000, 300000, float('inf')]
labels = ['Low', 'Mid', 'High']
df['Income Band'] = pd.cut(df['Estimated Income'], bins=bins, labels=labels, include_lowest=True)

# Create Age Band
def age_band(age):
    if age >= 18 and age <= 30:
        return "18-30"
    elif age <= 45:
        return "31-45"
    elif age <= 60:
        return "46-60"
    else:
        return "60+"

df['Age Band'] = df['Age'].apply(age_band)

df.head()


In [None]:
# Distribution of Age
plt.figure(figsize=(8,5))
sns.histplot(df['Age'], bins=20, kde=True)
plt.title('Age Distribution')
plt.show()

# Loan by Occupation
plt.figure(figsize=(10,6))
sns.barplot(data=df, x='Occupation', y='Bank Loans', estimator=sum)
plt.xticks(rotation=45)
plt.title('Total Loan by Occupation')
plt.show()

# Correlation Matrix
numerical_cols = ['Age','Estimated Income','Credit Card Balance','Bank Loans','Bank Deposits']
corr = df[numerical_cols].corr()
plt.figure(figsize=(8,6))
sns.heatmap(corr, annot=True, cmap='coolwarm')
plt.title('Correlation Matrix')
plt.show()


In [None]:
df.to_csv("../data/banking_clean.csv", index=False)
print("Clean dataset exported successfully.")


In [None]:
"""
Key Findings:
- High-income clients hold most bank deposits and loans.
- Age 31–45 group has the highest average deposits.
- Loyalty classification strongly correlates with deposits.
- Foreign currency accounts are most popular among high-income clients.

Next Step:
- Visualize these insights interactively in Power BI.
"""
