# Testing and Validation

- Validate Monte Carlo simulation
- Check portfolio data integrity
- Test SQL insertions and queries


In [1]:
import sqlite3
import pandas as pd
import numpy as np
from pathlib import Path

project_path = Path("/home/skumar/Desktop/credit-risk-analytics")
db_path = project_path / "sql/credit_portfolio.db"

conn = sqlite3.connect(db_path)
cursor = conn.cursor()


In [2]:
# Check number of rows matches CSV
portfolio_csv = project_path / "data/input_raw/credit_portfolio.csv"
portfolio_df = pd.read_csv(portfolio_csv)

cursor.execute("SELECT COUNT(*) FROM portfolio")
rows_in_db = cursor.fetchone()[0]

assert rows_in_db == len(portfolio_df), f"Row mismatch! CSV: {len(portfolio_df)}, DB: {rows_in_db}"
print("✅ Portfolio table row count matches CSV")


✅ Portfolio table row count matches CSV


In [3]:
# Check simulation results
cursor.execute("SELECT COUNT(*) FROM simulation_results")
sim_rows = cursor.fetchone()[0]
print(f"Simulation table has {sim_rows} rows")

# Optional: quick stats check
cursor.execute("SELECT AVG(portfolio_loss), MAX(portfolio_loss) FROM simulation_results")
avg_loss, max_loss = cursor.fetchone()
print(f"Average portfolio loss: {avg_loss:.2f}, Max loss: {max_loss:.2f}")


Simulation table has 10000 rows
Average portfolio loss: 5675571.79, Max loss: 9111867.62


In [4]:
conn.close()
print("✅ Database connection closed")


✅ Database connection closed
