This notebook reads user snapshot data from a csv and processes it to find the losses from soft liquidation in different band ranges

In [1]:
import pandas as pd

# if you want the data, run the get_data.ipynb notebook first, this will pull all the latest snapshots.
data = pd.read_csv('user_snapshots.csv')
print(data.columns)

# find the portion of collateral in crvUSD and the collateral token as a percentage
data['collateralPct'] = data['collateralUsd']/(data['collateralUsd']+data['stablecoin'])*100
data['stablecoinPct'] = 100-data['collateralPct']

# currently softLiq column is True even when under softliq, let's create new columns to show the real soft liquidation
data['under_softLiq'] = data['collateralPct'].eq(0)
data['real_softLiq'] = (~data['under_softLiq']) & (data['softLiq'])

# find the loan to value ratio
data['ltv'] = data['debt']/(data['collateralUsd']+data['stablecoin'])*100

# sort the data by user, marketId and timestamp
data = data.sort_values(by=['user', 'marketId', 'timestamp']).reset_index(drop=True)

Index(['basePrice', 'oraclePrice', 'activeBand', 'collateral', 'stablecoin',
       'n', 'n1', 'n2', 'debt', 'depositedCollateral', 'health', 'loss',
       'lossPct', 'timestamp', 'user', 'marketId', 'collateralName', 'softLiq',
       'collateralUsd'],
      dtype='object')


In [None]:
# The soft-liquidation loss statistics are misleading in the current form.  E.g. if a user loses 20% of their collateral
# and then pays back most debt and withdraws 80% collateral the statistics will say the user lost 100% of their collateral

# Let's calculate the loss per day while the user is in soft liquidation.  We will remove time periods where the user
# did an action e.g. deposited or withdrew collateral, paid back debt or borrowed more.

# create a lossPctPerDay column which counts the % a user lost between snapshots standardized to a day
data['lossPctPerDay'] = 0

# count the times a user changes their collateral and debt
data['debtActions'] = 0

# count the days a user is in soft liquidation
data['softLiqDays'] = 0


# need to iterate through data to get the above data.  This is slow but works.
i = 0
length = len(data)

while i < length:

    # get the current row data
    row = data.iloc[i]
    loan_id = row['user'] + row['marketId'] + str(row['depositedCollateral'])
    collat_value = (row['collateralUsd'] + row['stablecoin']) / row['oraclePrice']
    debt = row['debt']
    timestamp = row['timestamp']
    
    # it the current loan is the same as the previous loan, ie. same user, marketId and depositedCollateral
    # then we can calculate the lost value and log it if they didn't change their debt
    if i > 0 and prev_loan_id == loan_id:

        # lost value is how much the user lost between snapshots in their collateral e.g., WETH
        lost_value = prev_collat_value - collat_value
        time_days_diff = (timestamp - prev_timestamp) / 86400

        # if the debt changed by more than 2% then we log it as an action
        if prev_debt > debt * 1.02 or prev_debt < debt * 0.98:
            data.at[i, 'debtActions'] += 1
        
        # else we log the lost value and the time between snapshots
        elif lost_value > 0:
            lossPctPerDay = lost_value / prev_collat_value / time_days_diff
            data.at[i, 'softLiqDays'] = time_days_diff
            data.at[i, 'lossPctPerDay'] = lossPctPerDay

    # set the previous values to the current values
    prev_collat_value = collat_value
    prev_loan_id = loan_id
    prev_timestamp = timestamp
    prev_debt = debt

    # print progress every 10,000 rows
    if i % 10000 == 0:
        print(f"{i / length * 100}%")

    i += 1

In [3]:
# get real soft liquidation subset of data
softLiqData = data.loc[data['real_softLiq']].copy()

# create bins for the number of bands a user chose
bins = [3, 9, 19, 35, 50]
labels = ['4-9', '10-19', '20-35', '36-50']
softLiqData.loc[:, 'n_range'] = pd.cut(softLiqData['n'], bins=bins, labels=labels, right=False)

# group the data by the number of bands a user chose
sl_n_stats = softLiqData.groupby(['n_range']).agg({
    'timestamp': 'count',
    'lossPctPerDay': ['min', 'median', 'mean', 'std', 'max'],
    'softLiqDays': 'sum'
}).reset_index(drop=False)

# rename the columns and save to csv
sl_n_stats.columns = ['n_range', 'entries', 'lossPctDay_min', 'lossPctDay_median', 'lossPctDay_mean', 'lossPctDay_std', 'lossPctDay_max', 'softLiqDays']
sl_n_stats.to_csv('grouped_soft_liq_stats.csv', index=False)
print(sl_n_stats)

  n_range  entries  lossPctDay_min  lossPctDay_median  lossPctDay_mean  \
0     4-9    39340             0.0           0.000936         0.010990   
1   10-19    17417             0.0           0.000341         0.007385   
2   20-35     1023             0.0           0.000145         0.002491   
3   36-50     1942             0.0           0.000077         0.005477   

   lossPctDay_std  lossPctDay_max  softLiqDays  
0        0.026158        0.389285  5411.026528  
1        0.021998        0.430594  2389.507639  
2        0.006218        0.064132   142.907917  
3        0.013566        0.138341   257.078333  


This portion of the code was created to take band_snapshots and evaluate the soft-liquidation losses within bands through the snapshot periods.  It was found to not be a good estimate of the losses.  The snapshots were taken too wide apart.

In [1]:
import pandas as pd

data = pd.read_csv('band_snapshots.csv')
print(data.columns)

# find the portion of collateral in crvUSD and the collateral token as a percentage
data['collateralPct'] = data['collateralUsd']/(data['collateralUsd']+data['stableCoin'])*100
data['stablecoinPct'] = 100-data['collateralPct']

# sort the data by user, marketId and timestamp
data = data.sort_values(by=['marketId', 'timestamp', 'index']).reset_index(drop=True)

Index(['Unnamed: 0', 'timestamp', 'marketName', 'marketId', 'activeBand',
       'basePrice', 'index', 'collateral', 'collateralUsd', 'stableCoin',
       'oraclePrice'],
      dtype='object')


In [None]:
data['lossPctPerDay'] = 0

# count the days a user is in soft liquidation
data['softLiqDays'] = 0

# need to iterate through data to get the above data.  This is slow but works.
i = 0
dataActiveBands = data[(data['activeBand'] == data['index'])].copy()
length = len(dataActiveBands)
new_data = pd.DataFrame(columns=data.columns)

while i < length:

    # get the current row data
    row = dataActiveBands.iloc[i].copy()
    marketId = row['marketId']
    activeBand = int(row['activeBand'])
    timestamp = row['timestamp']
    
    collat_amount = (row['collateralUsd'] + row['stableCoin']) / row['oraclePrice']
    collat_value = row['collateralUsd'] + row['stableCoin']

    if i > 0 and marketId == prev_marketId:

        if activeBand >= prev_activeBand:
            ## price has dropped, calc loss in above bands
            bandRange = len(range(prev_activeBand, activeBand+1))
            for j in range(prev_activeBand, activeBand+1):
                print(j)
                prev_row = data[(data['marketId'] == marketId) & (data['timestamp'] == prev_timestamp) & (data['index'] == j)].copy()
                cur_row = data[(data['marketId'] == marketId) & (data['timestamp'] == timestamp) & (data['index'] == j)].copy()
                try:
                    prev_basePrice = prev_row['basePrice'].values[0]
                    basePrice = cur_row['basePrice'].values[0]
                except:
                    print("happened")
                    continue

                if prev_row['activeBand'].values[0] == j:
                    prev_price = prev_row['oraclePrice'].values[0]
                elif prev_row['activeBand'].values[0] < j:
                    prev_price = prev_basePrice * (99/100)**(j)
                else:
                    prev_price = prev_basePrice * (99/100)**(j+1)
                prev_collat_amount = (prev_row['collateralUsd'].values[0] + prev_row['stableCoin'].values[0]) / prev_price
                
                if cur_row['activeBand'].values[0] == j:
                    price = cur_row['oraclePrice'].values[0]
                elif cur_row['activeBand'].values[0] < j:
                    price = basePrice * (99/100)**(j)
                else:
                    price = basePrice * (99/100)**(j+1)

                collat_amount = (cur_row['collateralUsd'].values[0] + cur_row['stableCoin'].values[0]) / price
                prev_collat_amount = (prev_row['collateralUsd'].values[0] + prev_row['stableCoin'].values[0]) / prev_price
                lost_amount = prev_collat_amount - collat_amount

                time_days_diff = (timestamp - prev_timestamp) / bandRange / 86400
                
                # else we log the lost value and the time between snapshots
                lossPctPerDay = lost_amount / prev_collat_amount / time_days_diff
                if lossPctPerDay > 0 and lossPctPerDay < 0.10:
                    cur_row.loc[cur_row.index[0], 'softLiqDays'] = time_days_diff
                    cur_row.loc[cur_row.index[0], 'lossPctPerDay'] = lossPctPerDay
                    if not cur_row['marketId'].isnull().any():
                        print(f"market: {cur_row['marketId'].values[0]}")
                        new_data = pd.concat([new_data, cur_row], ignore_index=True)
                    else:
                        pass

    # set the previous values to the current values
    prev_collat_amount = collat_amount
    prev_marketId = marketId
    prev_timestamp = timestamp
    prev_activeBand = activeBand

    i += 1
    print(i)

In [3]:
# get real soft liquidation subset of data
softLiqData = new_data.copy()
softLiqData['liquidity'] = softLiqData['collateralUsd'] + softLiqData['stableCoin']

# create bins for the number of bands a user chose
bins = [0, 1000, 5000, 20000, 100000, 500000, 2000000, 10000000]
labels = ['<1000', '1000-5000', '5000-20000', '20000-100000', '100000-500000', '500000-2000000', '2000000-10000000']
softLiqData.loc[:, 'liq_range'] = pd.cut(softLiqData['liquidity'], bins=bins, labels=labels, right=False)

# group the data by the number of bands a user chose
sl_liq_stats = softLiqData.groupby(['liq_range']).agg({
    'timestamp': 'count',
    'lossPctPerDay': ['min', 'median', 'mean', 'std', 'max'],
    'softLiqDays': 'sum'
}).reset_index(drop=False)

# rename the columns and save to csv
sl_liq_stats.columns = ['liq_range', 'entries', 'lossPctDay_min', 'lossPctDay_median', 'lossPctDay_mean', 'lossPctDay_std', 'lossPctDay_max', 'softLiqDays']
sl_liq_stats.to_csv('liquidity_soft_liq_stats.csv', index=False)
print(sl_liq_stats)

          liq_range  entries  lossPctDay_min  lossPctDay_median  \
0             <1000      103    4.922813e-10       6.524876e-07   
1         1000-5000       73    8.078126e-08       9.001594e-03   
2        5000-20000      207    2.239918e-16       1.708548e-02   
3      20000-100000      131    2.704587e-06       1.515267e-02   
4     100000-500000      180    3.043710e-05       1.183717e-02   
5    500000-2000000       95    1.769966e-04       2.179184e-02   
6  2000000-10000000        0             NaN                NaN   

   lossPctDay_mean  lossPctDay_std  lossPctDay_max  softLiqDays  
0         0.004779        0.014503        0.083157   112.882824  
1         0.019389        0.025856        0.094127    57.423658  
2         0.026873        0.027027        0.096814   146.835847  
3         0.023264        0.023442        0.099534    93.228032  
4         0.022190        0.024037        0.097771   146.910199  
5         0.029060        0.025399        0.097941    58.295457  
6