# 01 Data Exploration - Bondora Credit Risk Analysis
This notebook covers the initial exploration of the Bondora P2P loan dataset.


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os

# Set plotting style
sns.set(style="whitegrid")

# Define path to raw data
raw_data_path = os.path.join("..", "data", "raw", "loan_dataset_investor.xlsx")

# Load only the first 10,000 rows for initial exploration if dataset is huge, 
# or load all if manageable.
df = pd.read_excel(raw_data_path, sheet_name="Loan Dataset")
print(f"Dataset Shape: {df.shape}")
df.head()


: 

In [None]:
# Summary info
df.info()

# Basic descriptive statistics
df.describe()


In [None]:
# Target Variable Analysis: Default Frequency
if 'is_default' in df.columns:
    plt.figure(figsize=(8, 5))
    sns.countplot(x='is_default', data=df, palette='viridis')
    plt.title('Distribution of Loan Defaults')
    plt.show()
    
    default_rate = df['is_default'].mean() * 100
    print(f"Overall Default Rate: {default_rate:.2f}%")
else:
    print("Column 'is_default' not found.")
