In [1]:
import pandas as pd

data = pd.read_csv('loans.csv')
print(data.columns)

# find the portion of collateral in crvUSD and the collateral token as a percentage
data['collateralPct'] = data['collateralUsd']/(data['collateralUsd']+data['stablecoin'])*100
data['stablecoinPct'] = 100-data['collateralPct']

# currently softLiq column is True even when under softliq, let's create new columns to show the real soft liquidation
data['under_softLiq'] = data['collateralPct'].eq(0)
data['real_softLiq'] = (~data['under_softLiq']) & (data['softLiq'])

# find the loan to value ratio
data['ltv'] = data['debt']/(data['collateralUsd']+data['stablecoin'])*100

# sort the data by user, marketId and timestamp
data = data.sort_values(by=['user', 'marketId', 'timestamp']).reset_index(drop=True)

Index(['id', 'user', 'marketId', 'collateral', 'collateralUsd', 'collateralUp',
       'depositedCollateral', 'debt', 'n', 'n1', 'n2', 'health', 'loss',
       'lossPct', 'softLiq', 'timestamp', 'activeBand', 'oraclePrice',
       'stablecoin'],
      dtype='object')


In [2]:
# The soft-liquidation loss statistics are misleading in the current form.  E.g. if a user loses 20% of their collateral
# and then pays back most debt and withdraws 80% collateral the statistics will say the user lost 100% of their collateral

# Let's calculate the loss per day while the user is in soft liquidation.  We will remove time periods where the user
# did an action e.g. deposited or withdrew collateral, paid back debt or borrowed more.

# create a lossPctPerDay column which counts the % a user lost between snapshots standardized to a day
data['lossPctPerDay'] = 0

# count the times a user changes their collateral and debt
data['debtActions'] = 0

# count the days a user is in soft liquidation
data['softLiqDays'] = 0


# need to iterate through data to get the above data.  This is slow but works.
i = 0
length = len(data)

while i < length:

    # get the current row data
    row = data.iloc[i]
    loan_id = row['user'] + row['marketId'] + str(row['depositedCollateral'])
    collat_value = (row['collateralUsd'] + row['stablecoin']) / row['oraclePrice']
    debt = row['debt']
    timestamp = row['timestamp']
    
    # it the current loan is the same as the previous loan, ie. same user, marketId and depositedCollateral
    # then we can calculate the lost value and log it if they didn't change their debt
    if i > 0 and prev_loan_id == loan_id:

        # lost value is how much the user lost between snapshots in their collateral e.g., WETH
        lost_value = prev_collat_value - collat_value
        time_days_diff = (timestamp - prev_timestamp) / 86400

        # if the debt changed by more than 2% then we log it as an action
        if prev_debt > debt * 1.02 or prev_debt < debt * 0.98:
            data.at[i, 'debtActions'] += 1
        
        # else we log the lost value and the time between snapshots
        elif lost_value > 0:
            lossPctPerDay = lost_value / prev_collat_value / time_days_diff
            data.at[i, 'softLiqDays'] = time_days_diff
            data.at[i, 'lossPctPerDay'] = lossPctPerDay

    # set the previous values to the current values
    prev_collat_value = collat_value
    prev_loan_id = loan_id
    prev_timestamp = timestamp
    prev_debt = debt

    # print progress every 10,000 rows
    if i % 10000 == 0:
        print(f"{i / length * 100}%")

    i += 1

0.0%
1.1248922915630828%
2.2497845831261656%
3.374676874689248%
4.499569166252331%
5.624461457815414%
6.749353749378496%
7.874246040941579%
8.999138332504662%
10.124030624067744%
11.248922915630828%
12.37381520719391%
13.498707498756993%
14.623599790320077%
15.748492081883159%
16.873384373446243%
17.998276665009325%
19.12316895657241%
20.24806124813549%
21.372953539698575%
22.497845831261657%
23.622738122824742%
24.74763041438782%
25.872522705950907%
26.997414997513985%
28.12230728907707%
29.247199580640153%
30.37209187220324%
31.496984163766317%
32.6218764553294%
33.746768746892485%
34.87166103845557%
35.99655333001865%
37.121445621581735%
38.24633791314482%
39.3712302047079%
40.49612249627098%
41.62101478783406%
42.74590707939715%
43.87079937096023%
44.99569166252331%
46.1205839540864%
47.245476245649485%
48.37036853721256%
49.49526082877564%
50.62015312033873%
51.74504541190181%
52.8699377034649%
53.99482999502797%
55.119722286591056%
56.24461457815414%
57.36950686971723%
58.4943991

In [7]:
# get real soft liquidation subset of data
softLiqData = data.loc[data['real_softLiq']].copy()

# create bins for the number of bands a user chose
bins = [3, 9, 19, 35, 50]
labels = ['4-9', '10-19', '20-35', '36-50']
softLiqData.loc[:, 'n_range'] = pd.cut(softLiqData['n'], bins=bins, labels=labels, right=False)

# group the data by the number of bands a user chose
sl_n_stats = softLiqData.groupby(['n_range']).agg({
    'timestamp': 'count',
    'lossPctPerDay': ['min', 'median', 'mean', 'std', 'max'],
    'softLiqDays': 'sum'
}).reset_index(drop=False)

# rename the columns and save to csv
sl_n_stats.columns = ['n_range', 'entries', 'lossPctDay_min', 'lossPctDay_median', 'lossPctDay_mean', 'lossPctDay_std', 'lossPctDay_max', 'softLiqDays']
sl_n_stats.to_csv('grouped_soft_liq_stats.csv', index=False)
print(sl_n_stats)

  n_range  entries  lossPctDay_min  lossPctDay_median  lossPctDay_mean  \
0     4-9    32961             0.0           0.000927         0.009268   
1   10-19    16190             0.0           0.000331         0.006250   
2   20-35      875             0.0           0.000127         0.001989   
3   36-50      848             0.0           0.000004         0.001081   

   lossPctDay_std  lossPctDay_max  softLiqDays  
0        0.021779        0.389285  4601.170694  
1        0.019800        0.430594  2248.192500  
2        0.005356        0.064132   124.924306  
3        0.003015        0.039769   114.991667  
