In [1]:
from sqlalchemy import create_engine, text, MetaData, Table
from urllib.parse import quote_plus
import pyarrow.parquet as pq

# Config
DB_USER, DB_PASS = 'admin', 'Admin@1234Strong!'
PASS_ENC = quote_plus(DB_PASS)
# Use 127.0.0.1 for local notebook to Docker mapping
FULL_URL = f"mysql+pymysql://{DB_USER}:{PASS_ENC}@127.0.0.1:3306/lending_club"
engine = create_engine(FULL_URL)

print("Ad-Hoc Bridge Connected.")

Ad-Hoc Bridge Connected.


In [2]:
def inspect_table(table_name):
    print(f"\n Inspecting Table: {table_name}")
    with engine.connect() as conn:
        count = conn.execute(text(f"SELECT COUNT(*) FROM {table_name}")).scalar()
        print(f" Total Rows: {count:,}")
        
        result = conn.execute(text(f"DESCRIBE {table_name}"))
        print("Schema:")
        for col in result:
            print(f"      - {col[0]}: {col[1]}")

inspect_table("loans_clean")


 Inspecting Table: loans_clean
 Total Rows: 316,413
Schema:
      - target: int(11)
      - loan_amnt: float
      - int_rate: float
      - term: float
      - grade: varchar(5)
      - sub_grade: varchar(5)
      - purpose: varchar(100)
      - application_type: varchar(50)
      - annual_inc: float
      - dti: float
      - emp_length_num: float
      - home_ownership: varchar(50)
      - verification_status: int(11)
      - fico_score: float
      - revol_util: float
      - open_acc: float
      - pub_rec: float
      - addr_state: varchar(5)
      - issue_date: datetime
      - issue_year: int(11)


In [3]:
query = """
SELECT fico_score, dti, (int_rate * dti) / fico_score AS risk_index FROM loans_clean LIMIT 5;
"""

with engine.connect() as conn:
    result = conn.execute(text(query))
    print("\n Experiment Result:")
    for row in result:
        print(dict(row._mapping))


 Experiment Result:
{'fico_score': 600.0, 'dti': 30.46, 'risk_index': 1.1346349852625524}
{'fico_score': 600.0, 'dti': 50.53, 'risk_index': 1.359256915761313}
{'fico_score': 600.0, 'dti': 18.92, 'risk_index': 0.2383919991569519}
{'fico_score': 600.0, 'dti': 4.64, 'risk_index': 0.08746400072828919}
{'fico_score': 600.0, 'dti': 12.37, 'risk_index': 0.5622165042362213}


In [7]:
eda_audit_query = """
SELECT 
    COUNT(*) as total_rows,
    SUM(CASE WHEN fico_score = 0 OR fico_score IS NULL THEN 1 ELSE 0 END) as bad_fico,
    SUM(CASE WHEN annual_inc = 0 OR annual_inc IS NULL THEN 1 ELSE 0 END) as bad_income,
    SUM(CASE WHEN dti = 0 OR dti IS NULL THEN 1 ELSE 0 END) as bad_dti,
    SUM(CASE WHEN tot_cur_bal = 0 OR tot_cur_bal IS NULL THEN 1 ELSE 0 END) as bad_balance,
    SUM(CASE WHEN revol_util = 0 OR revol_util IS NULL THEN 1 ELSE 0 END) as bad_util,
    SUM(CASE WHEN pct_tl_nvr_dlq IS NULL THEN 1 ELSE 0 END) as null_pct_dlq
FROM loans_clean
"""

with engine.connect() as conn:
    res = conn.execute(text(eda_audit_query)).fetchone()
    print("ðŸ“Š DATA QUALITY AUDIT:")
    print(dict(res._mapping))

ðŸ“Š DATA QUALITY AUDIT:
{'total_rows': 1303607, 'bad_fico': Decimal('1303607'), 'bad_income': Decimal('302'), 'bad_dti': Decimal('1127'), 'bad_balance': Decimal('67875'), 'bad_util': Decimal('7501'), 'null_pct_dlq': Decimal('0')}
