# Credit Risk & Decision Engine

## Loading Libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split           # For train/test split, and cross-validation strategies
from sklearn.ensemble import RandomForestClassifier            # For Random Forest
from sklearn.metrics import confusion_matrix, accuracy_score    # For model evaluation

ModuleNotFoundError: No module named 'pandas'

## Data Loading
 - Picking Data from [Kaggle](https://raw.githubusercontent.com/selva86/datasets/master/GermanCredit.csv)

In [None]:
df = pd.read_csv('../data/german_credit_data.csv')
df.head(3)

# orgin

In [None]:
# ==========================================
# üè¶ CREDIT RISK & PROFITABILITY ENGINE
# ==========================================

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, accuracy_score

# --- 1. DATA LOADING ---
# Load the Kaggle German Credit Data
# If you haven't downloaded it yet, this URL pulls a clean version
url = "https://raw.githubusercontent.com/selva86/datasets/master/GermanCredit.csv"
df = pd.read_csv(url)

# --- 2. PREPROCESSING ---
# Select business-relevant features
# Response: 1=Good, 2=Bad
cols = ['Duration', 'Amount', 'InstallmentRatePercentage', 'Age', 'NumberExistingCredits', 'Response']
df = df[cols].copy()

# Convert Target to: 0 = Good, 1 = Bad (Standard ML format)
df['Target'] = df['Response'].apply(lambda x: 1 if x == 2 else 0)
df = df.drop('Response', axis=1)

# --- 3. TRAIN/TEST SPLIT (The "Final Exam" Setup) ---
X = df.drop('Target', axis=1)
y = df['Target']

# We hold out 20% of data to test if the model actually learned
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)

# --- 4. MODELING (The "Brain") ---
# We use Random Forest because it handles "Amount" and "Age" well together
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# --- 5. BUSINESS LOGIC & DECISIONING ---
# We don't just want 0 or 1, we want the PROBABILITY of default
test_preds = X_test.copy()
test_preds['Actual_Risk'] = y_test
test_preds['Risk_Prob'] = model.predict_proba(X_test)[:, 1]

# Apply Tiered Decisions
def make_decision(prob):
    if prob < 0.25: return 'Auto-Approve'
    if prob < 0.55: return 'Manual Review'
    return 'Decline'

test_preds['Decision'] = test_preds['Risk_Prob'].apply(make_decision)

# --- 6. FINANCIAL IMPACT CALCULATION ---
# Good Loan = +10% Interest | Bad Loan = -100% Principal
test_preds['Net_Profit'] = np.where(
    test_preds['Actual_Risk'] == 0,
    test_preds['Amount'] * 0.10, # Interest gain
    -test_preds['Amount']        # Default loss
)

# --- 7. EXPORT FOR TABLEAU ---
# This goes into your 'outputs' folder
test_preds.to_csv('../outputs/credit_results_final.csv', index=False)

print("‚úÖ Pipeline Complete. File saved for Tableau.")
print(f"Model Accuracy: {accuracy_score(y_test, model.predict(X_test)):.2%}")