<a href="https://colab.research.google.com/github/norman-AI-2025/hackathon-2025/blob/main/Loan_Risk_App_V4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import subprocess
import time
import os
import sys
import re
import urllib.request

# ==========================================
# PART 1: CLEANUP & SETUP
# ==========================================

print("üßπ Cleaning up old processes...")
subprocess.run(["pkill", "cloudflared"])
subprocess.run(["pkill", "streamlit"])
time.sleep(2)

print("üì¶ Installing dependencies...")
!pip install -q streamlit pandas transformers torch

# ==========================================
# PART 2: CREATE APP FILE (With Advanced AI)
# ==========================================

print("üìù Writing application file...")

loan_risk_app_code = '''
import pandas as pd
import streamlit as st
import numpy as np
import time
from datetime import date

# --- CONSTANTS ---
PAGES = {1: "Applicant Info", 2: "Guarantor Info", 3: "Loan Details", 4: "Results"}
GENDER_OPTIONS = ["Male", "Female", "Other"]
MARITAL_OPTIONS = ["Single", "Married", "Divorced", "Widowed"]
EDUCATION_OPTIONS = ["High School", "Graduate", "Post-Graduate", "PhD", "Other"]
EMPLOYMENT_OPTIONS = ["Salaried", "Self-Employed", "Business Owner", "Unemployed", "Retired"]
RELATIONSHIP_OPTIONS = ["Spouse", "Parent", "Sibling", "Friend", "Company", "Other"]
PURPOSE_OPTIONS = ["Car Loan", "Education", "Personal", "Renovation", "Business Expansion", "Other"]
COLLATERAL_OPTIONS = ["Real Estate", "Vehicle", "Fixed Deposit", "None"]

# --- ADVANCED AI MODEL SETUP ---
@st.cache_resource
def load_nlp_model():
    try:
        from transformers import pipeline
        # Using Zero-Shot Classification instead of simple Sentiment
        return pipeline("zero-shot-classification", model="valhalla/distilbart-mnli-12-1")
    except: return None

classifier = load_nlp_model()

def compute_text_score(text):
    """
    Analyzes the intent of the text and assigns a risk score based on the topic.
    """
    if classifier is None or not text or len(str(text)) < 5: return 50.0

    # Define topics and their inherent risk (0-100)
    risk_categories = {
        "investment or education": 20,      # Low Risk (Growth)
        "home improvement": 30,             # Low Risk (Asset)
        "debt consolidation": 50,           # Medium Risk (Management)
        "medical emergency": 65,            # Medium-High Risk (Shock)
        "luxury purchase or vacation": 85,  # High Risk (Discretionary)
        "financial distress": 95            # Very High Risk (Instability)
    }

    try:
        labels = list(risk_categories.keys())
        result = classifier(str(text), labels, multi_label=False)

        # Calculate weighted score based on probability of each topic
        # e.g., if 80% sure it's "luxury", score moves closer to 85
        final_score = 0
        scores = dict(zip(result['labels'], result['scores']))

        for label, prob in scores.items():
            final_score += prob * risk_categories[label]

        return final_score
    except: return 50.0

def compute_numeric_risk_scores(data):
    # 1. Income & DTI
    total_income = data['app_monthly_income'] + data['app_other_monthly_income']
    total_commitments = data['app_total_monthly_loan_repayment'] + data['app_other_fixed_monthly_commitments']
    dti = total_commitments / (total_income + 1e-6)

    # 2. Utilization
    util = data['app_credit_card_outstanding'] / (data['app_total_credit_limit'] + 1e-6)

    # 3. LTV (if secured)
    if data['loan_is_secured']:
        ltv = data['loan_amount_requested'] / (data['loan_collateral_value'] + 1e-6)
    else:
        ltv = 1.5 # Penalty for unsecured

    score = 0
    breakdown = {}

    # Scoring - DTI (Max 30 pts)
    dti_score = 0
    if dti > 0.50: dti_score = 30
    elif dti > 0.40: dti_score = 20
    elif dti > 0.30: dti_score = 10
    score += dti_score
    breakdown['DTI Risk'] = dti_score

    # Scoring - Utilization (Max 20 pts)
    util_score = 0
    if util > 0.75: util_score = 20
    elif util > 0.50: util_score = 10
    elif util > 0.30: util_score = 5
    score += util_score
    breakdown['Credit Util Risk'] = util_score

    # Scoring - Stability (Max 10 pts)
    stability_score = 0
    if data['app_years_in_job'] < 1: stability_score = 10
    elif data['app_years_in_job'] < 3: stability_score = 5
    score += stability_score
    breakdown['Stability Risk'] = stability_score

    # Scoring - Security/Collateral (Max 20 pts)
    sec_score = 0
    if not data['loan_is_secured']: sec_score = 20
    elif ltv > 1.0: sec_score = 15 # Under-collateralized
    elif ltv > 0.8: sec_score = 5
    score += sec_score
    breakdown['Security Risk'] = sec_score

    # Penalties
    if data['app_in_legal_proceedings']:
        score += 30
        breakdown['Legal Penalty'] = 30
    if data['app_convicted_financial_crime']:
        score += 50
        breakdown['Crime Penalty'] = 50

    # Guarantor Mitigation
    if data['guar_exists']:
        guar_income_ratio = data['guar_monthly_income'] / (total_income + 1e-6)
        if guar_income_ratio > 0.5:
            score -= 15
            breakdown['Guarantor Bonus'] = -15

    final_score = np.clip(score, 0, 100)
    data['numeric_score'] = final_score
    data['risk_breakdown'] = breakdown
    return data

def compute_fusion_risk(data, text_multiplier=0.2):
    # Text Score comes from the new AI model (0-100)
    # 0 = Low Risk Topic, 100 = High Risk Topic
    text_score = compute_text_score(data['loan_essay_text'])
    data['text_score'] = text_score

    # Calculate Multiplier
    # 50 is neutral.
    # If score > 50 (Risky topic), multiplier > 1 (Increases risk)
    # If score < 50 (Safe topic), multiplier < 1 (Decreases risk)
    risk_factor = (text_score - 50.0) / 50.0
    multiplier = 1.0 + (risk_factor * text_multiplier)

    final_risk = data['numeric_score'] * multiplier
    data['risk_score'] = np.clip(final_risk, 0, 100)
    data['score_multiplier'] = multiplier

    if data['risk_score'] >= 75: data['risk_category'] = "High Risk"
    elif data['risk_score'] >= 35: data['risk_category'] = "Medium Risk"
    else: data['risk_category'] = "Low Risk"
    return data

# --- STATE INIT ---
def init_state():
    if 'form_data' not in st.session_state:
        st.session_state.form_data = {
            # App Info
            'app_age': 30, 'app_gender': 'Male', 'app_marital_status': 'Single',
            'app_dependents': 0, 'app_education': 'Graduate', 'app_employment_status': 'Salaried',
            'app_years_in_job': 2, 'app_total_work_experience': 5,
            'app_monthly_income': 5000, 'app_other_monthly_income': 0,
            'app_has_mortgage': False, 'app_has_car_loan': False, 'app_has_personal_loan': False,
            'app_has_credit_card': True, 'app_num_credit_cards': 1,
            'app_total_credit_limit': 10000, 'app_credit_card_outstanding': 1000,
            'app_total_monthly_loan_repayment': 500, 'app_other_fixed_monthly_commitments': 200,
            'app_in_legal_proceedings': False, 'app_convicted_financial_crime': False,

            # Guarantor
            'guar_exists': False, 'guar_relationship': 'Spouse', 'guar_age': 30,
            'guar_employment_status': 'Salaried', 'guar_monthly_income': 0, 'guar_other_monthly_income': 0,
            'guar_total_monthly_loan_repayment': 0,
            'guar_has_credit_card': False, 'guar_num_credit_cards': 0,
            'guar_total_credit_limit': 0, 'guar_credit_card_outstanding': 0,

            # Loan
            'loan_amount_requested': 20000, 'loan_tenure_months': 24, 'loan_purpose': 'Personal',
            'loan_is_secured': False, 'loan_collateral_type': 'None', 'loan_collateral_value': 0,
            'loan_application_date': date.today(),
            'loan_essay_text': "I plan to use this loan to expand my small business operations...",

            # Results
            'numeric_score': 0, 'text_score': 0, 'risk_score': 0, 'score_multiplier': 1.0
        }
    if 'current_page' not in st.session_state: st.session_state.current_page = 1

# --- PAGES ---
def page_1():
    st.header("Page 1: Applicant Info")

    st.subheader("Demographics")
    c1, c2, c3 = st.columns(3)
    st.session_state.form_data['app_age'] = c1.number_input("Age", 18, 100, st.session_state.form_data['app_age'])
    st.session_state.form_data['app_gender'] = c2.selectbox("Gender", GENDER_OPTIONS, index=GENDER_OPTIONS.index(st.session_state.form_data['app_gender']))
    st.session_state.form_data['app_marital_status'] = c3.selectbox("Marital Status", MARITAL_OPTIONS, index=MARITAL_OPTIONS.index(st.session_state.form_data['app_marital_status']))

    c4, c5, c6 = st.columns(3)
    st.session_state.form_data['app_dependents'] = c4.number_input("Dependents", 0, 20, st.session_state.form_data['app_dependents'])
    st.session_state.form_data['app_education'] = c5.selectbox("Education", EDUCATION_OPTIONS, index=EDUCATION_OPTIONS.index(st.session_state.form_data['app_education']))
    st.session_state.form_data['app_employment_status'] = c6.selectbox("Employment Status", EMPLOYMENT_OPTIONS, index=EMPLOYMENT_OPTIONS.index(st.session_state.form_data['app_employment_status']))

    c7, c8 = st.columns(2)
    st.session_state.form_data['app_years_in_job'] = c7.number_input("Years in Current Job", 0, 60, st.session_state.form_data['app_years_in_job'])
    st.session_state.form_data['app_total_work_experience'] = c8.number_input("Total Work Experience", 0, 60, st.session_state.form_data['app_total_work_experience'])

    st.markdown("---")
    st.subheader("Financials")
    f1, f2 = st.columns(2)
    st.session_state.form_data['app_monthly_income'] = f1.number_input("Monthly Income ($)", 0, value=st.session_state.form_data['app_monthly_income'], step=100)
    st.session_state.form_data['app_other_monthly_income'] = f2.number_input("Other Monthly Income ($)", 0, value=st.session_state.form_data['app_other_monthly_income'], step=100)

    st.subheader("Commitments")
    chk1, chk2, chk3, chk4 = st.columns(4)
    st.session_state.form_data['app_has_mortgage'] = chk1.checkbox("Mortgage?", st.session_state.form_data['app_has_mortgage'])
    st.session_state.form_data['app_has_car_loan'] = chk2.checkbox("Car Loan?", st.session_state.form_data['app_has_car_loan'])
    st.session_state.form_data['app_has_personal_loan'] = chk3.checkbox("Personal Loan?", st.session_state.form_data['app_has_personal_loan'])
    st.session_state.form_data['app_has_credit_card'] = chk4.checkbox("Credit Card?", st.session_state.form_data['app_has_credit_card'])

    cc1, cc2, cc3 = st.columns(3)
    st.session_state.form_data['app_num_credit_cards'] = cc1.number_input("Num Credit Cards", 0, value=st.session_state.form_data['app_num_credit_cards'])
    st.session_state.form_data['app_total_credit_limit'] = cc2.number_input("Total Credit Limit", 0, value=st.session_state.form_data['app_total_credit_limit'], step=500)
    st.session_state.form_data['app_credit_card_outstanding'] = cc3.number_input("Credit Card Outstanding", 0, value=st.session_state.form_data['app_credit_card_outstanding'], step=100)

    rep1, rep2 = st.columns(2)
    st.session_state.form_data['app_total_monthly_loan_repayment'] = rep1.number_input("Total Monthly Loan Repayments", 0, value=st.session_state.form_data['app_total_monthly_loan_repayment'], step=100)
    st.session_state.form_data['app_other_fixed_monthly_commitments'] = rep2.number_input("Other Fixed Commitments", 0, value=st.session_state.form_data['app_other_fixed_monthly_commitments'], step=100)

    st.markdown("---")
    st.subheader("Legal")
    st.session_state.form_data['app_in_legal_proceedings'] = st.checkbox("Active Legal Proceedings?", st.session_state.form_data['app_in_legal_proceedings'])
    st.session_state.form_data['app_convicted_financial_crime'] = st.checkbox("Convicted of Financial Crime?", st.session_state.form_data['app_convicted_financial_crime'])

def page_2():
    st.header("Page 2: Guarantor Info")
    st.session_state.form_data['guar_exists'] = st.radio("Do you have a guarantor?", [True, False], index=0 if st.session_state.form_data['guar_exists'] else 1)

    if st.session_state.form_data['guar_exists']:
        # Reset checks
        if st.session_state.form_data['guar_relationship'] not in RELATIONSHIP_OPTIONS:
            st.session_state.form_data['guar_relationship'] = RELATIONSHIP_OPTIONS[0]
        if st.session_state.form_data['guar_age'] < 18: st.session_state.form_data['guar_age'] = 18

        g1, g2, g3 = st.columns(3)
        st.session_state.form_data['guar_relationship'] = g1.selectbox("Relationship", RELATIONSHIP_OPTIONS, index=RELATIONSHIP_OPTIONS.index(st.session_state.form_data['guar_relationship']))
        st.session_state.form_data['guar_age'] = g2.number_input("Guarantor Age", 18, 100, st.session_state.form_data['guar_age'])
        st.session_state.form_data['guar_employment_status'] = g3.selectbox("Guarantor Employment", EMPLOYMENT_OPTIONS, index=EMPLOYMENT_OPTIONS.index(st.session_state.form_data['guar_employment_status']))

        g4, g5, g6 = st.columns(3)
        st.session_state.form_data['guar_monthly_income'] = g4.number_input("Guarantor Income", 0, value=st.session_state.form_data['guar_monthly_income'], step=100)
        st.session_state.form_data['guar_other_monthly_income'] = g5.number_input("Guarantor Other Income", 0, value=st.session_state.form_data['guar_other_monthly_income'], step=100)
        st.session_state.form_data['guar_total_monthly_loan_repayment'] = g6.number_input("Guarantor Repayments", 0, value=st.session_state.form_data['guar_total_monthly_loan_repayment'], step=100)

        st.markdown("---")
        st.subheader("Guarantor Credit")
        st.session_state.form_data['guar_has_credit_card'] = st.checkbox("Guarantor Has Credit Card?", st.session_state.form_data['guar_has_credit_card'])

        gc1, gc2, gc3 = st.columns(3)
        st.session_state.form_data['guar_num_credit_cards'] = gc1.number_input("Guarantor Num Cards", 0, value=st.session_state.form_data['guar_num_credit_cards'])
        st.session_state.form_data['guar_total_credit_limit'] = gc2.number_input("Guarantor Credit Limit", 0, value=st.session_state.form_data['guar_total_credit_limit'], step=500)
        st.session_state.form_data['guar_credit_card_outstanding'] = gc3.number_input("Guarantor Outstanding", 0, value=st.session_state.form_data['guar_credit_card_outstanding'], step=100)
    else:
        # Zero out logic
        st.session_state.form_data['guar_monthly_income'] = 0
        st.session_state.form_data['guar_age'] = 0

def page_3():
    st.header("Page 3: Loan Details")
    l1, l2 = st.columns(2)
    st.session_state.form_data['loan_amount_requested'] = l1.number_input("Amount Requested ($)", 1000, value=st.session_state.form_data['loan_amount_requested'], step=1000)
    st.session_state.form_data['loan_tenure_months'] = l2.number_input("Tenure (Months)", 6, 360, st.session_state.form_data['loan_tenure_months'])

    l3, l4 = st.columns(2)
    st.session_state.form_data['loan_purpose'] = l3.selectbox("Purpose", PURPOSE_OPTIONS, index=PURPOSE_OPTIONS.index(st.session_state.form_data['loan_purpose']))
    st.session_state.form_data['loan_application_date'] = l4.date_input("Application Date", value=st.session_state.form_data['loan_application_date'])

    st.markdown("---")
    st.session_state.form_data['loan_is_secured'] = st.radio("Is this a Secured Loan?", [True, False], index=0 if st.session_state.form_data['loan_is_secured'] else 1)

    if st.session_state.form_data['loan_is_secured']:
        # Reset collateral check
        if st.session_state.form_data['loan_collateral_type'] not in COLLATERAL_OPTIONS:
             st.session_state.form_data['loan_collateral_type'] = COLLATERAL_OPTIONS[0]

        c1, c2 = st.columns(2)
        st.session_state.form_data['loan_collateral_type'] = c1.selectbox("Collateral Type", COLLATERAL_OPTIONS, index=COLLATERAL_OPTIONS.index(st.session_state.form_data['loan_collateral_type']))
        st.session_state.form_data['loan_collateral_value'] = c2.number_input("Collateral Value ($)", 0, value=st.session_state.form_data['loan_collateral_value'], step=1000)
    else:
        st.session_state.form_data['loan_collateral_type'] = 'None'
        st.session_state.form_data['loan_collateral_value'] = 0

    st.subheader("Essay")
    st.caption("Please describe why you need this loan. The AI will analyze your intent.")
    st.session_state.form_data['loan_essay_text'] = st.text_area("Reason for loan:", value=st.session_state.form_data['loan_essay_text'], height=150)

def page_4(mult):
    st.header("Page 4: Results")
    if st.button("Calculate Risk", type="primary"):
        with st.spinner("Analyzing Risk Profile... (Loading AI Models)"):
            d = compute_numeric_risk_scores(st.session_state.form_data)
            final = compute_fusion_risk(d, mult)
            st.session_state.form_data.update(final)
            time.sleep(1)
        st.success("Assessment Complete")

        r1, r2, r3, r4 = st.columns(4)
        r1.metric("Risk Category", final['risk_category'])
        r2.metric("Final Score", f"{final['risk_score']:.1f}")
        r3.metric("Numeric Base", f"{final['numeric_score']:.1f}")
        r4.metric("Intent Impact", f"x{final['score_multiplier']:.2f}")

        st.markdown("### Score Breakdown")
        df = pd.DataFrame([
            {"Component": k, "Points": v}
            for k, v in final['risk_breakdown'].items()
        ])
        st.dataframe(df, use_container_width=True)

        st.info(f"Text Analysis Score: {final['text_score']:.1f}/100 (Higher = Riskier Topic)")

def main():
    init_state()
    st.set_page_config(page_title="Loan Risk App", layout="wide")

    with st.sidebar:
        st.title("Navigation")
        if st.session_state.current_page > 1:
            if st.button("Previous"): st.session_state.current_page -= 1
        if st.session_state.current_page < 4:
            if st.button("Next"): st.session_state.current_page += 1

        st.markdown("---")
        mult = st.slider("AI Sensitivity", 0.0, 0.5, 0.2, 0.05)
        st.caption("Controls how much the text analysis impacts the final score.")
        st.info(f"Page {st.session_state.current_page} of 4")

    if st.session_state.current_page == 1: page_1()
    elif st.session_state.current_page == 2: page_2()
    elif st.session_state.current_page == 3: page_3()
    elif st.session_state.current_page == 4: page_4(mult)

if __name__ == "__main__": main()
'''

with open('app.py', 'w') as f: f.write(loan_risk_app_code)
print("‚úÖ Application file saved.")

# ==========================================
# PART 3: LAUNCH SYSTEM
# ==========================================

PORT = 8501

# 1. Launch Streamlit in Background (With Logging)
print(f"\nüöÄ Launching Streamlit on port {PORT}...")
# We redirect output to a file so we can debug crashes
st_log = open("streamlit.log", "w")
app_process = subprocess.Popen(
    f"streamlit run app.py --server.port {PORT} --server.address 0.0.0.0 --server.headless true",
    shell=True,
    preexec_fn=os.setsid,
    stdout=st_log,
    stderr=st_log
)

# 2. Wait for App to be Healthy
print("‚è≥ Waiting for app to initialize (this prevents 'Site Cant Be Reached')...")
ready = False
for i in range(60): # Increased to 60 seconds for model loading
    try:
        with urllib.request.urlopen(f"http://localhost:{PORT}/_stcore/health", timeout=1) as response:
            if response.status == 200:
                print("\n‚úÖ Streamlit is ready!")
                ready = True
                break
    except:
        time.sleep(1)
        if i % 5 == 0: print(".", end="", flush=True)

if not ready:
    print("\n‚ùå Error: App failed to start. Checking logs...")
    if os.path.exists("streamlit.log"):
        with open("streamlit.log", "r") as f:
            print(f.read()[-2000:]) # Print last 2000 characters of error log
    else:
        print("No log file found.")
else:
    # 3. Install Cloudflare
    if not os.path.exists("cloudflared-linux-amd64.deb"):
        print("‚¨áÔ∏è Downloading Cloudflare Tunnel...")
        !wget -q https://github.com/cloudflare/cloudflared/releases/latest/download/cloudflared-linux-amd64.deb
        !dpkg -i cloudflared-linux-amd64.deb > /dev/null 2>&1

    # 4. Launch Tunnel
    print("üöá Starting Tunnel...")
    log = open("cf.log", "w")
    subprocess.Popen(["cloudflared", "tunnel", "--url", f"http://localhost:{PORT}"], stdout=log, stderr=log, preexec_fn=os.setsid)

    # 5. Extract URL
    time.sleep(5) # increased buffer
    found = False
    for i in range(15):
        if os.path.exists("cf.log"):
            with open("cf.log", "r") as f:
                txt = f.read()
                match = re.search(r'(https:\/\/[a-zA-Z0-9-]+\.trycloudflare\.com)', txt)
                if match:
                    print(f"\nüéØ \033[1;32mYOUR APP IS LIVE:\033[0m {match.group(1)}")
                    found = True
                    break
        time.sleep(1)

    if not found: print("‚ö†Ô∏è Link not found yet. Check logs: !cat cf.log")

üßπ Cleaning up old processes...
üì¶ Installing dependencies...
üìù Writing application file...
‚úÖ Application file saved.

üöÄ Launching Streamlit on port 8501...
‚è≥ Waiting for app to initialize (this prevents 'Site Cant Be Reached')...
.
‚úÖ Streamlit is ready!
üöá Starting Tunnel...

üéØ [1;32mYOUR APP IS LIVE:[0m https://boss-significance-jane-relatively.trycloudflare.com
