<a href="https://colab.research.google.com/github/siddhi-patil-06/Wallet_Risk_Scoring_Engine/blob/main/Wallet_Risk_Scoring_Engine.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install requests pandas numpy




In [51]:
import requests
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import time
from tqdm import tqdm

# Configuration
COMPOUND_V2_SUBGRAPH = "https://api.thegraph.com/subgraphs/name/graphprotocol/compound-v2"
COMPOUND_V3_SUBGRAPH = "https://api.thegraph.com/subgraphs/name/compound-finance/compound-v3"
ETHERSCAN_API = "https://api.etherscan.io/api"
ETHERSCAN_API_KEY = "P26IXMUPZGVMFS2PU6BM3YFCXM5ZBARPDM"
SCORE_MAX = 1000
BATCH_SIZE = 3
DELAY_SECONDS = 2

def fetch_compound_data(wallet_address):
    """Fetch enhanced Compound protocol data"""
    query = """{
      account(id: "%s") {
        hasBorrowed
        countLiquidated
        borrows(first: 10) { amount }
        repayments(first: 10) { amount }
        liquidations(first: 5) { amountRepaid }
      }
    }""" % wallet_address.lower()

    try:
        for endpoint in [COMPOUND_V3_SUBGRAPH, COMPOUND_V2_SUBGRAPH]:
            response = requests.post(endpoint, json={'query': query}, timeout=10)
            if response.status_code == 200:
                data = response.json()
                if data.get('data', {}).get('account'):
                    return data['data']['account']
    except Exception:
        pass
    return None

def fetch_etherscan_data(wallet_address):
    """Fetch transaction data with enhanced features"""
    params = {
        'module': 'account',
        'action': 'txlist',
        'address': wallet_address,
        'startblock': 0,
        'endblock': 99999999,
        'sort': 'desc',
        'apikey': ETHERSCAN_API_KEY
    }

    try:
        response = requests.get(ETHERSCAN_API, params=params, timeout=15)
        if response.status_code == 200:
            return response.json().get('result', [])
    except Exception:
        pass
    return []

def calculate_risk_score(wallet_address):
    """Final optimized scoring model with full 0-1000 range"""
    try:
        compound_data = fetch_compound_data(wallet_address)
        eth_txs = fetch_etherscan_data(wallet_address)

        # 1. Protocol Risk (0-600 points)
        protocol_score = 0
        if compound_data:
            borrowed = sum(float(tx['amount']) for tx in compound_data.get('borrows', []))
            liquidated = compound_data.get('countLiquidated', 0)
            repay_ratio = min(
                sum(float(tx['amount']) for tx in compound_data.get('repayments', [])) /
                max(borrowed, 1),
                1
            )

            protocol_score = min(
                (borrowed ** 0.6) * 15 +          # Progressive borrow amount scaling
                liquidated * 300 +                # Heavy liquidation impact
                (1 - repay_ratio) * 300,          # Significant repayment penalty
                600
            )

        # 2. Activity Risk (0-400 points)
        tx_count = len(eth_txs)
        contract_txs = sum(1 for tx in eth_txs if tx['to'] == '')
        recent_txs = sum(1 for tx in eth_txs if
                        datetime.now() - datetime.fromtimestamp(int(tx['timeStamp'])) <
                        timedelta(days=7))

        activity_score = min(
            (tx_count ** 0.7) * 5 +              # Progressive tx count scaling
            (contract_txs ** 0.8) * 10 +         # Weighted contract interactions
            recent_txs * 20,                     # Recent activity boost
            400
        )

        # Combine with non-linear final adjustment
        raw_score = protocol_score + activity_score
        final_score = min(int(raw_score * (1 + raw_score/800)), SCORE_MAX)  # Enhanced stretch

        return final_score

    except Exception:
        return 0

def generate_risk_scores(wallet_addresses):
    """Generate final optimized risk scores"""
    results = []

    with tqdm(total=len(wallet_addresses), desc="Generating scores") as pbar:
        for i in range(0, len(wallet_addresses), BATCH_SIZE):
            batch = wallet_addresses[i:i+BATCH_SIZE]

            for address in batch:
                score = calculate_risk_score(address)
                results.append({'wallet_id': address, 'score': score})
                pbar.update(1)

            if i + BATCH_SIZE < len(wallet_addresses):
                time.sleep(DELAY_SECONDS)

    return pd.DataFrame(results)

if __name__ == "__main__":
    # Load wallet addresses
    wallets = pd.read_csv('wallets.csv')['wallet_id'].tolist()

    # Generate scores
    scores_df = generate_risk_scores(wallets)

    # Save results
    scores_df.to_csv('wallet_risk_scores_final.csv', index=False)

    # Display distribution
    print("\nFinal Score Distribution:")
    print(scores_df['score'].describe())

    # Show highest risk wallets
    print("\nTop 5 Highest Risk Wallets:")
    print(scores_df.sort_values('score', ascending=False).head())

Generating scores: 100%|██████████| 103/103 [02:05<00:00,  1.21s/it]


Final Score Distribution:
count    103.000000
mean      67.893204
std      130.295510
min        0.000000
25%       10.000000
50%       15.000000
75%       42.500000
max      600.000000
Name: score, dtype: float64

Top 5 Highest Risk Wallets:
                                     wallet_id  score
0   0x0039f22efb07a647557c7c5d17854cfd6d489ef3    600
68  0x96479b087cb8f236a5e2dcbfc50ce63b2f421da6    600
48  0x70d8e4ab175dfe0eab4e9a7f33e0a2d19f44001e    599
75  0xa7f3c74f0255796fd5d3ddcf88db769f7a6bf46a    512
96  0xf340b9f2098f80b86fbc5ede586c319473aa11f3    498



