<a href="https://colab.research.google.com/github/kraven681/aave-wallet-credibility/blob/main/aave_wallet_credibility.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd

# Example: Load from JSON or CSV
df = pd.read_json("/content/user-wallet-transactions.json")

In [None]:
print(df.head())

                                    _id  \
0  {'$oid': '681d38fed63812d4655f571a'}   
1  {'$oid': '681aa70dd6df53021cc6f3c0'}   
2  {'$oid': '681d04c2d63812d4654c733e'}   
3  {'$oid': '681d133bd63812d46551b6ef'}   
4  {'$oid': '681899e4ba49fc91cf2f4454'}   

                                   userWallet  network protocol  \
0  0x00000000001accfa9cef68cf5371a23025b6d4b6  polygon  aave_v2   
1  0x000000000051d07a4fb3bd10121a343d85818da6  polygon  aave_v2   
2  0x000000000096026fb41fc39f9875d164bd82e2dc  polygon  aave_v2   
3  0x000000000096026fb41fc39f9875d164bd82e2dc  polygon  aave_v2   
4  0x0000000000e189dd664b9ab08a33c4839953852c  polygon  aave_v2   

                                              txHash  \
0  0x695c69acf608fbf5d38e48ca5535e118cc213a89e3d6...   
1  0xe6fc162c86b2928b0ba9b82bda672763665152b9de9d...   
2  0xe2d7eb815c89331a734ed6f204a06c385a1b39040baa...   
3  0x0d63a2eacd82b82f868db825ea7385e6bd8d046ee729...   
4  0x590eabb812c5006a6f4766f44e6e9d3ad0b5b563de69...   

 

In [None]:
print(df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100000 entries, 0 to 99999
Data columns (total 13 columns):
 #   Column       Non-Null Count   Dtype         
---  ------       --------------   -----         
 0   _id          100000 non-null  object        
 1   userWallet   100000 non-null  object        
 2   network      100000 non-null  object        
 3   protocol     100000 non-null  object        
 4   txHash       100000 non-null  object        
 5   logId        100000 non-null  object        
 6   timestamp    100000 non-null  datetime64[ns]
 7   blockNumber  100000 non-null  int64         
 8   action       100000 non-null  object        
 9   actionData   100000 non-null  object        
 10  __v          100000 non-null  int64         
 11  createdAt    100000 non-null  object        
 12  updatedAt    100000 non-null  object        
dtypes: datetime64[ns](1), int64(2), object(10)
memory usage: 9.9+ MB
None


In [None]:
print(df.describe())

                           timestamp   blockNumber       __v
count                         100000  1.000000e+05  100000.0
mean   2021-06-17 00:55:30.034620160  1.623891e+09       0.0
min              2021-03-31 17:00:04  1.617210e+09       0.0
25%       2021-05-21 08:21:36.500000  1.621585e+09       0.0
50%              2021-06-11 12:43:53  1.623415e+09       0.0
75%              2021-07-12 02:29:14  1.626057e+09       0.0
max              2021-09-02 17:54:35  1.630605e+09       0.0
std                              NaN  3.016214e+06       0.0


In [None]:
df = df.dropna()

In [None]:
df['timestamp'] = pd.to_datetime(df['timestamp'])
df['amount'] = df['actionData'].apply(lambda x: float(x.get('amount', 0)))
df['action'] = df['action'].str.lower()


In [None]:
grouped = df.groupby('userWallet')

features = grouped.agg(
    num_deposits=('action', lambda x: (x == 'deposit').sum()),
    total_deposit_amount=('amount', lambda x: df.loc[x.index][df.loc[x.index]['action'] == 'deposit']['amount'].sum()),

    num_borrows=('action', lambda x: (x == 'borrow').sum()),
    total_borrow_amount=('amount', lambda x: df.loc[x.index][df.loc[x.index]['action'] == 'borrow']['amount'].sum()),

    num_repayments=('action', lambda x: (x == 'repay').sum()),
    total_repaid=('amount', lambda x: df.loc[x.index][df.loc[x.index]['action'] == 'repay']['amount'].sum()),

    num_liquidations=('action', lambda x: (x == 'liquidationcall').sum()),
    num_redeems=('action', lambda x: (x == 'redeemunderlying').sum()),
    total_redeemed=('amount', lambda x: df.loc[x.index][df.loc[x.index]['action'] == 'redeemunderlying']['amount'].sum()),

    num_actions=('action', 'count'),
    active_days=('timestamp', lambda x: x.dt.date.nunique()),
    first_tx=('timestamp', 'min'),
    last_tx=('timestamp', 'max')
).reset_index()

# Add derived features
features['borrow_repay_ratio'] = features.apply(
    lambda row: row['total_repaid'] / row['total_borrow_amount']
    if row['total_borrow_amount'] > 0 else 1.0, axis=1
)

features['redeem_ratio'] = features.apply(
    lambda row: row['total_redeemed'] / row['total_deposit_amount']
    if row['total_deposit_amount'] > 0 else 0.0, axis=1
)

features['activity_span_days'] = (features['last_tx'] - features['first_tx']).dt.days + 1
features.drop(columns=['first_tx', 'last_tx'], inplace=True)



In [None]:
from sklearn.preprocessing import StandardScaler

X = features.drop(columns=['userWallet'])  # input features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


In [None]:
from sklearn.ensemble import RandomForestRegressor

model = RandomForestRegressor(n_estimators=100, random_state=42)


In [None]:
from sklearn.model_selection import train_test_split

X = df.drop(columns=['credit_score'])  # Features
y = df['credit_score']                 # Target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
# Option 1: Select only numeric columns automatically
X_train_numeric = X_train.select_dtypes(include='number')
X_test_numeric = X_test.select_dtypes(include='number')

# Then scale
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_numeric)
X_test_scaled = scaler.transform(X_test_numeric)


In [None]:
model.fit(X_train_scaled, y_train)


In [None]:
y_pred = model.predict(X_test_scaled)


In [None]:
from sklearn.metrics import mean_squared_error, r2_score

mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"MSE: {mse:.2f}, R²: {r2:.2f}")


MSE: 39927.04, R²: -0.33


In [None]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report


In [None]:
import joblib

joblib.dump(model, 'credit_score_model.pkl')
joblib.dump(scaler, 'scaler.pkl')


['scaler.pkl']

In [None]:
from flask import Flask, request, jsonify
import joblib
import numpy as np

app = Flask(__name__)
model = joblib.load('credit_score_model.pkl')
scaler = joblib.load('scaler.pkl')

@app.route('/predict', methods=['POST'])
def predict():
    input_data = request.json['features']
    input_array = np.array(input_data).reshape(1, -1)
    input_scaled = scaler.transform(input_array)
    prediction = model.predict(input_scaled)
    return jsonify({'credit_score': prediction[0]})

if __name__ == '__main__':
    app.run(debug=True)


 * Serving Flask app '__main__'
 * Debug mode: on


 * Running on http://127.0.0.1:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m
INFO:werkzeug: * Restarting with stat


In [None]:
import pandas as pd

def extract_features(df):
    # Ensure timestamp is datetime
    df['timestamp'] = pd.to_datetime(df['timestamp'])

    # Normalize the action field
    df['action'] = df['action'].str.lower()

    # Extract amount safely from actionData (nested dictionary)
    df['amount'] = df['actionData'].apply(lambda x: int(x.get('amount', '0')) if isinstance(x, dict) else 0)

    # Group by wallet
    grouped = df.groupby('userWallet')

    features = grouped.agg(
        total_deposit=('amount', lambda x: x[df.loc[x.index, 'action'] == 'deposit'].sum()),
        total_borrow=('amount', lambda x: x[df.loc[x.index, 'action'] == 'borrow'].sum()),
        total_repay=('amount', lambda x: x[df.loc[x.index, 'action'] == 'repay'].sum()),
        total_withdraw=('amount', lambda x: x[df.loc[x.index, 'action'] == 'redeemunderlying'].sum()),
        num_liquidations=('action', lambda x: (x == 'liquidationcall').sum()),
        num_txns=('action', 'count'),
        active_days=('timestamp', lambda x: (x.max() - x.min()).days + 1)
    ).fillna(0)

    # Add derived features
    features['repay_ratio'] = features['total_repay'] / (features['total_borrow'] + 1)
    features['deposit_ratio'] = features['total_deposit'] / (features['total_withdraw'] + 1)

    return features.reset_index()


In [None]:
def score_wallets(df_features):
    # Apply a simple rule-based scoring function
    scores = (
        df_features['total_deposit'] * 0.001 +
        df_features['repay_ratio'] * 300 -
        df_features['num_liquidations'] * 100
    ).clip(0, 1000)

    df_features['credit_score'] = scores
    return df_features


In [None]:
raw_df = pd.read_json('/content/user-wallet-transactions.json')  # or from POST /score
features_df = extract_features(raw_df)
scored_df = score_wallets(features_df)

print(scored_df[['userWallet', 'credit_score']].head())


                                   userWallet credit_score
0  0x00000000001accfa9cef68cf5371a23025b6d4b6         1000
1  0x000000000051d07a4fb3bd10121a343d85818da6         1000
2  0x000000000096026fb41fc39f9875d164bd82e2dc         1000
3  0x0000000000e189dd664b9ab08a33c4839953852c          0.0
4  0x0000000002032370b971dabd36d72f3e5a7bf1ee         1000


In [None]:
import numpy as np

df['credit_score'] = np.random.randint(300, 900, size=len(df))  # Range 300–900


In [None]:
X = df.drop(columns=['credit_score'])
y = df['credit_score']


In [None]:
# Example: Label wallets that were ever liquidated as 'risky'
features['target'] = (features['num_liquidations'] > 0).astype(int)


In [None]:
features.to_csv("wallet_features.csv", index=False)


In [None]:
# Add this at the top of app.py
def extract_features(df):
    df['timestamp'] = pd.to_datetime(df['timestamp'])
    df['amount'] = df['actionData'].apply(lambda x: float(x.get('amount', 0)))
    df['action'] = df['action'].str.lower()

    grouped = df.groupby('userWallet')

    features = grouped.agg(
        num_deposits=('action', lambda x: (x == 'deposit').sum()),
        total_deposit_amount=('amount', lambda x: df.loc[x.index][df.loc[x.index]['action'] == 'deposit']['amount'].sum()),
        num_borrows=('action', lambda x: (x == 'borrow').sum()),
        total_borrow_amount=('amount', lambda x: df.loc[x.index][df.loc[x.index]['action'] == 'borrow']['amount'].sum()),
        num_repayments=('action', lambda x: (x == 'repay').sum()),
        total_repaid=('amount', lambda x: df.loc[x.index][df.loc[x.index]['action'] == 'repay']['amount'].sum()),
        num_liquidations=('action', lambda x: (x == 'liquidationcall').sum()),
        num_redeems=('action', lambda x: (x == 'redeemunderlying').sum()),
        total_redeemed=('amount', lambda x: df.loc[x.index][df.loc[x.index]['action'] == 'redeemunderlying']['amount'].sum()),
        num_actions=('action', 'count'),
        active_days=('timestamp', lambda x: x.dt.date.nunique()),
        first_tx=('timestamp', 'min'),
        last_tx=('timestamp', 'max')
    ).reset_index()

    features['borrow_repay_ratio'] = features.apply(
        lambda row: row['total_repaid'] / row['total_borrow_amount']
        if row['total_borrow_amount'] > 0 else 1.0, axis=1)

    features['redeem_ratio'] = features.apply(
        lambda row: row['total_redeemed'] / row['total_deposit_amount']
        if row['total_deposit_amount'] > 0 else 0.0, axis=1)

    features['activity_span_days'] = (features['last_tx'] - features['first_tx']).dt.days + 1
    features.drop(columns=['first_tx', 'last_tx'], inplace=True)

    return features


In [None]:
# file: score_wallets.py

import pandas as pd
import joblib

def score_wallets(features_df):
    model = joblib.load('wallet_credit_model.pkl')
    X = features_df.drop(columns=['userWallet'])
    scores = model.predict(X)
    features_df['credit_score'] = scores
    return features_df[['userWallet', 'credit_score']]


In [None]:
# file: feature_engineering.py

import pandas as pd

def extract_features(df):
    # Basic feature engineering per wallet
    df['timestamp'] = pd.to_datetime(df['timestamp'])

    features = (
        df.groupby('userWallet')
        .agg(
            num_transactions=('action', 'count'),
            num_deposits=('action', lambda x: (x == 'deposit').sum()),
            num_borrows=('action', lambda x: (x == 'borrow').sum()),
            num_repays=('action', lambda x: (x == 'repay').sum()),
            num_liquidations=('action', lambda x: (x == 'liquidationcall').sum()),
            total_amount=('actionData', lambda x: sum(float(eval(a).get('amount', 0)) for a in x)),
            first_tx=('timestamp', 'min'),
            last_tx=('timestamp', 'max'),
        )
        .reset_index()
    )

    # Additional features
    features['active_days'] = (features['last_tx'] - features['first_tx']).dt.days + 1
    features = features.drop(columns=['first_tx', 'last_tx'])

    return features


In [None]:
# file: train_model.py

import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
import joblib

def train_model(df):
    X = df.drop(columns=['userWallet'])
    y = df['credit_score'] if 'credit_score' in df else None  # Use heuristics if no labels

    # Optional: Create label if needed
    if y is None:
        X['score_raw'] = (
            + 2 * X['num_deposits']
            + 3 * X['total_deposit_amount']
            - 5 * X['num_liquidations']
            - 2 * (1 - X['borrow_repay_ratio']) * X['total_borrow_amount']
            + 3 * X['redeem_ratio']
            + X['active_days']
        )
        from sklearn.preprocessing import MinMaxScaler
        scaler = MinMaxScaler((0, 1000))
        y = scaler.fit_transform(X[['score_raw']]).flatten()
        X.drop(columns=['score_raw'], inplace=True)

    X_train, X_test, y_train, y_test = train_test_split(X.drop(columns=['userWallet']), y, test_size=0.2, random_state=42)

    model = RandomForestRegressor(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)

    joblib.dump(model, 'wallet_credit_model.pkl')
    print("✅ Model saved as 'wallet_credit_model.pkl'")
