This notebook downloads the latest user snapshot data from the crvUSD subgraph and processes it to find the losses from soft liquidation in different band ranges

This first portion of code manages the downloading and saving the data as a CSV called user_snapshots.csv

In [1]:
import pluck
import pandas as pd

# crvusd subgraph url
subgraph_url = 'https://api.thegraph.com/subgraphs/name/convex-community/crvusd'

# function to get a portion of user data
def get_user_data(skip_snapshots=0, skip_user_states=0):
  query = f"""
  {{
    snapshots(first: 1000, skip: {skip_snapshots}, where: {{userStateSnapshot: true}}) {{
      basePrice
      oraclePrice
      activeBand
      userStates (first: 1000, skip: {skip_user_states}) {{
        collateral
        stablecoin
        n
        n1
        n2
        debt
        depositedCollateral
        health
        loss
        lossPct
        timestamp
        user {{
          id
        }}
      }}
      market {{
        id
        collateralName
      }}
    }}
  }}
  """
  frame, = pluck.execute(query, column_names="short", url=subgraph_url)
  return frame

# function to get all user data
def fetch_all_user_data():
    skip_snapshots = 0
    snapshot_df = pd.DataFrame()
    
    while True:
        skip_user_states = 0
        
        while True:
            print(f"skip snapshots: {skip_snapshots}, user states: {skip_user_states}, snapshot_df length: {snapshot_df.shape[0]}")
            data = get_user_data(skip_snapshots, skip_user_states)
            if(data.shape[1] == 17) and not data.isin(snapshot_df).all().all():
                snapshot_df = pd.concat([snapshot_df, data], ignore_index=True)
                skip_user_states += 1000
            elif skip_user_states == 0:
                return snapshot_df
            else:
                break
        skip_snapshots += 1000

In [None]:
# fetch all user data
data = fetch_all_user_data()

# clean, make types correct and rename columns
data = data.dropna()
columns_to_int = ['activeBand', 'n', 'n1', 'n2', 'timestamp']
columns_to_float = [
    'basePrice', 'oraclePrice', 'collateral', 'stablecoin', 'debt',
    'depositedCollateral', 'health', 'loss', 'lossPct'
]
data[columns_to_int] = data[columns_to_int].astype(int)
data[columns_to_float] = data[columns_to_float].astype(float)
data = data.rename(columns={'id': 'marketId', 'user.id': 'user'})

# create some columns
data['softLiq'] = data['activeBand'] >= data['n1']
data['collateralUsd'] = data['collateral'] * data['oraclePrice']

# save to csv
data.to_csv("user_snapshots.csv", index=False)

This portion calculates the losses from the user snapshots.  If you have already downloaded the snapshots, you can just run the notebook from here

In [1]:
import pandas as pd

# if you want the data, run the get_data.ipynb notebook first, this will pull all the latest snapshots.
data = pd.read_csv('user_snapshots.csv')

# find the portion of collateral in crvUSD and the collateral token as a percentage
data['collateralPct'] = data['collateralUsd']/(data['collateralUsd']+data['stablecoin'])*100
data['stablecoinPct'] = 100-data['collateralPct']

# currently softLiq column is True even when under softliq, let's create new columns to show the real soft liquidation
data['under_softLiq'] = data['collateralPct'].eq(0)
data['real_softLiq'] = (~data['under_softLiq']) & (data['softLiq'])

# find the loan to value ratio
data['ltv'] = data['debt']/(data['collateralUsd']+data['stablecoin'])*100

# sort the data by user, marketId and timestamp
data = data.sort_values(by=['user', 'marketId', 'timestamp']).reset_index(drop=True)

In [None]:
# The soft-liquidation loss statistics are misleading in the current form.  E.g. if a user loses 20% of their collateral
# and then pays back most debt and withdraws 80% collateral the statistics will say the user lost 100% of their collateral

# Let's calculate the loss per day while the user is in soft liquidation.  We will remove time periods where the user
# did an action e.g. deposited or withdrew collateral, paid back debt or borrowed more.

# create a lossPctPerDay column which counts the % a user lost between snapshots standardized to a day
data['lossPctPerDay'] = 0

# count the times a user changes their collateral and debt
data['debtActions'] = 0

# count the days a user is in soft liquidation
data['softLiqDays'] = 0


# need to iterate through data to get the above data.  This is slow but works.
i = 0
length = len(data)

while i < length:

    # get the current row data
    row = data.iloc[i]
    loan_id = row['user'] + row['marketId'] + str(row['depositedCollateral'])
    collat_value = (row['collateralUsd'] + row['stablecoin']) / row['oraclePrice']
    debt = row['debt']
    timestamp = row['timestamp']
    
    # it the current loan is the same as the previous loan, ie. same user, marketId and depositedCollateral
    # then we can calculate the lost value and log it if they didn't change their debt
    if i > 0 and prev_loan_id == loan_id:

        # lost value is how much the user lost between snapshots in their collateral e.g., WETH
        lost_value = prev_collat_value - collat_value
        time_days_diff = (timestamp - prev_timestamp) / 86400

        # if the debt changed by more than 2% then we log it as an action
        if prev_debt > debt * 1.02 or prev_debt < debt * 0.98:
            data.at[i, 'debtActions'] += 1
        
        # else we log the lost value and the time between snapshots
        elif lost_value > 0:
            lossPctPerDay = lost_value / prev_collat_value / time_days_diff
            data.at[i, 'softLiqDays'] = time_days_diff
            data.at[i, 'lossPctPerDay'] = lossPctPerDay

    # set the previous values to the current values
    prev_collat_value = collat_value
    prev_loan_id = loan_id
    prev_timestamp = timestamp
    prev_debt = debt

    # print progress every 10,000 rows
    if i % 10000 == 0:
        print(f"{i / length * 100}%")

    i += 1

In [None]:
# get real soft liquidation subset of data
softLiqData = data.loc[data['real_softLiq']].copy()

# create bins for the number of bands a user chose
bins = [3, 9, 19, 35, 50]
labels = ['4-9', '10-19', '20-35', '36-50']
softLiqData.loc[:, 'n_range'] = pd.cut(softLiqData['n'], bins=bins, labels=labels, right=False)

# group the data by the number of bands a user chose
sl_n_stats = softLiqData.groupby(['n_range']).agg({
    'timestamp': 'count',
    'lossPctPerDay': ['min', 'median', 'mean', 'std', 'max'],
    'softLiqDays': 'sum'
}).reset_index(drop=False)

# rename the columns and save to csv
sl_n_stats.columns = ['n_range', 'entries', 'lossPctDay_min', 'lossPctDay_median', 'lossPctDay_mean', 'lossPctDay_std', 'lossPctDay_max', 'softLiqDays']
sl_n_stats.to_csv('grouped_soft_liq_stats.csv', index=False)
print(sl_n_stats)