In [None]:
import pandas as pd
from dotenv import load_dotenv
import os
import requests
from mezo.currency_utils import format_musd_currency_columns, get_token_price
from mezo.datetime_utils import format_datetimes
from mezo.data_utils import add_rolling_values, add_pct_change_columns, add_cumulative_columns
from mezo.clients import SupabaseClient, BigQueryClient, SubgraphClient, Web3Client
from scripts.get_raw_data import get_all_loans, get_liquidation_data, get_trove_liquidated_data
load_dotenv(dotenv_path='../.env', override=True)
COINGECKO_KEY = os.getenv('COINGECKO_KEY')

In [2]:
# import raw data
raw_loans = get_all_loans()
raw_liquidations = get_liquidation_data()
raw_troves_liquidated = get_trove_liquidated_data()

🔍 Trying troveUpdates query...
Fetching transactions with skip=0...
Fetching transactions with skip=1000...
No more records found.
✅ Found 366 loan records
🔍 Trying liquidations query...
Fetching transactions with skip=0...
Fetching transactions with skip=1000...
No more records found.
✅ Found 2 liquidation records
🔍 Trying troveLiquidateds query...
Fetching transactions with skip=0...
Fetching transactions with skip=1000...
No more records found.
✅ Found 2 trove liquidation records


In [3]:
# helpers
def clean_loan_data(raw, sort_col, date_cols, currency_cols):
    df = raw.copy().sort_values(by=sort_col, ascending=False)
    df = format_datetimes(df, date_cols)
    df = format_musd_currency_columns(df, currency_cols)
    df['count'] = 1
    df['id'] = range(1, len(df) + 1)

    return df

def find_coll_ratio(df, token_id):
    """Computes the collateralization ratio"""
    usd = get_token_price(token_id)
    df['coll_usd'] = df['coll'] * usd
    df['coll_ratio'] = (df['coll_usd']/df['principal'] ).fillna(0)

    return df

def get_loans_subset(df, operation: int, equals):
    """Create a df with only new, adjusted, or closed loans
    0 = opened, 1 = closed, 2 = adjusted
    note: operation = 2 also includes liquidated loans, so we have to remove those manually
    """
    df['operation'] = df['operation'].astype(int)
    if equals is True:
        adjusted = df.loc[df['operation'] == operation]
    elif equals is False:
        adjusted = df.loc[df['operation'] != operation]

    return adjusted

def process_liquidation_data(liquidations, troves_liquidated):
    # Merge raw liquidation data from two queries
    liquidation_df_merged = pd.merge(
        liquidations, 
        troves_liquidated, 
        how='left', 
        on='transactionHash_'
    )

    liquidation_df_merged = liquidation_df_merged[
        ['timestamp__x', 
        'liquidatedPrincipal', 
        'liquidatedInterest', 
        'liquidatedColl', 
        'borrower',
        'transactionHash_',
        'count_x'
        ]
    ]

    liquidations_df_final = liquidation_df_merged.rename(
        columns = {
            'timestamp__x': 'timestamp_', 
            'liquidatedPrincipal': 'principal', 
            'liquidatedInterest': 'interest',
            'liquidatedColl': 'coll',
            'count_x': 'count'
        }
    )

    liquidations_final = liquidations_df_final.copy()
    liquidations_final['coll'] = liquidations_final['coll'].astype(float)

    return liquidations_final

In [4]:
# clean raw data
loans = clean_loan_data(
    raw_loans, 
    sort_col='timestamp_', 
    date_cols=['timestamp_'], 
    currency_cols=['principal', 'coll', 'stake', 'interest']
)

loans = find_coll_ratio(loans, 'bitcoin')

liquidations = clean_loan_data(
    raw_liquidations,
    sort_col='timestamp_',
    date_cols=['timestamp_'],
    currency_cols=['liquidatedPrincipal', 'liquidatedInterest', 'liquidatedColl']
)

troves_liquidated = clean_loan_data(
    raw_troves_liquidated,
    sort_col='timestamp_',
    date_cols=['timestamp_'],
    currency_cols=['debt', 'coll']
)

# Create df for liquidated loans
liquidations_final = process_liquidation_data(liquidations, troves_liquidated)

# Create df's for new loans, closed loans, and adjusted loans and upload to BigQuery
new_loans = get_loans_subset(loans, 0, True)
closed_loans = get_loans_subset(loans, 1, True)
adjusted_loans = get_loans_subset(loans, 2, True) # Only adjusted loans (incl multiple adjustments from a single user)

## Remove liquidations from adjusted loans
liquidated_borrowers = liquidations_final['borrower'].unique()
adjusted_loans = adjusted_loans[~adjusted_loans['borrower'].isin(liquidated_borrowers)]

##################################

# Get latest loans
latest_loans = loans.drop_duplicates(subset='borrower', keep='first')

# Create df with only open loans
latest_open_loans = get_loans_subset(latest_loans, 1, False)

# Remove liquidated loans from list of latest loans w/o closed loans
latest_open_loans = latest_open_loans[~latest_open_loans['borrower'].isin(liquidated_borrowers)]

##################################

# Break down adjusted loan types for analysis
adjusted_loans = adjusted_loans.sort_values(by=['borrower', 'timestamp_'])
first_tx = adjusted_loans.groupby('borrower').first().reset_index()

adjusted_loans_merged = adjusted_loans.merge(
    first_tx[['borrower', 'principal', 'coll']], 
    on='borrower', 
    suffixes=('', '_initial')
)

## Loan increases
increased_loans = adjusted_loans_merged[adjusted_loans_merged['principal'] 
                                        > adjusted_loans_merged['principal_initial']].copy()
increased_loans['type'] = 1

## Collateral changes
coll_increased = adjusted_loans_merged[adjusted_loans_merged['coll'] 
                                       > adjusted_loans_merged['coll_initial']].copy()
coll_increased['type'] = 2

coll_decreased = adjusted_loans_merged[adjusted_loans_merged['coll'] 
                                       < adjusted_loans_merged['coll_initial']].copy()
coll_decreased['type'] = 3

## MUSD Repayments
principal_decreased = adjusted_loans_merged[adjusted_loans_merged['principal'] 
                                            < adjusted_loans_merged['principal_initial']].copy()
principal_decreased['type'] = 4

## Create final_adjusted_loans dataframe with type column
final_adjusted_loans = pd.concat([
    increased_loans,
    coll_increased, 
    coll_decreased,
    principal_decreased
], ignore_index=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['operation'] = df['operation'].astype(int)


In [5]:
# Create daily dataframe
daily_new_loans = new_loans.groupby(['timestamp_']).agg(
    loans_opened = ('count', 'sum'),
    borrowers = ('borrower', lambda x: x.nunique()),
    principal = ('principal', 'sum'),
    collateral = ('coll', 'sum'),
    interest = ('interest', 'sum')
).reset_index()

daily_closed_loans = closed_loans.groupby(['timestamp_']).agg(
    loans_closed = ('count', 'sum'),
    borrowers_who_closed = ('borrower', lambda x: x.nunique())
).reset_index()

daily_new_and_closed_loans = pd.merge(daily_new_loans, daily_closed_loans, how = 'outer', on = 'timestamp_').fillna(0)
daily_new_and_closed_loans[['loans_opened', 'borrowers', 'loans_closed', 'borrowers_who_closed']] = daily_new_and_closed_loans[['loans_opened', 'borrowers', 'loans_closed', 'borrowers_who_closed']].astype('int')      
daily_adjusted_loans = adjusted_loans.groupby(['timestamp_']).agg(
    loans_adjusted = ('count', 'sum'),
    borrowers_who_adjusted = ('borrower', lambda x: x.nunique())
).reset_index()

daily_loan_data = pd.merge(daily_new_and_closed_loans, daily_adjusted_loans, how='outer', on='timestamp_').fillna(0)
daily_loan_data[['loans_adjusted', 'borrowers_who_adjusted']] = daily_loan_data[['loans_adjusted', 'borrowers_who_adjusted']].astype(int)

daily_balances = latest_loans.groupby(['timestamp_']).agg(
    musd = ('principal', 'sum'),
    interest = ('interest', 'sum'),
    collateral = ('coll', 'sum')
).reset_index()

daily_balances = daily_balances.rename(
    columns={'musd': 'net_musd', 
             'interest': 'net_interest',
             'collateral': 'net_coll'}
)

daily_loans_merged = pd.merge(daily_loan_data, daily_balances, how='outer', on='timestamp_')

cols = {
    'timestamp_': 'date', 
    'principal': 'gross_musd', 
    'collateral': 'gross_coll', 
    'interest': 'gross_interest',
    'borrowers_who_closed': 'closers', 
    'borrowers_who_adjusted': 'adjusters'
}

daily_loans_merged = daily_loans_merged.rename(columns = cols)

daily_musd_final = add_rolling_values(daily_loans_merged, 30, ['net_musd', 'net_interest', 'net_coll']).fillna(0)
daily_musd_final_2 = add_cumulative_columns(daily_musd_final, ['net_musd', 'net_interest', 'net_coll'])
daily_musd_final_3 = add_pct_change_columns(daily_musd_final_2, ['net_musd', 'net_interest', 'net_coll'], 'daily').fillna(0)
final_daily_musd = daily_musd_final_3.replace([float('inf'), -float('inf')], 0)
final_daily_musd['date'] = pd.to_datetime(final_daily_musd['date']).dt.strftime('%Y-%m-%d')

In [6]:
GET_BORROW_FEES = """
query getBorrowFees ($skip: Int!) {
  borrowingFeePaids (
    orderBy: timestamp_
    orderDirection: desc
    first: 1000
    skip: $skip
  ){
    timestamp_
    fee
    borrower
    transactionHash_
  }
}"""

In [None]:

musd = SubgraphClient(
    url=SubgraphClient.BORROWER_OPS_SUBGRAPH, 
    headers= SubgraphClient.SUBGRAPH_HEADERS
)

In [8]:
fees =  musd.fetch_subgraph_data(GET_BORROW_FEES, 'borrowingFeePaids')

Fetching transactions with skip=0...
Fetching transactions with skip=1000...
No more records found.


In [9]:
if fees:
    fees = pd.DataFrame(fees)
    print(f"✅ Found {len(fees)} fee records")
else:
    print("⚠️ Query returned no data")

✅ Found 293 fee records


In [11]:
loan_fees = fees.copy()
loan_fees.shape

(293, 4)

In [17]:
loans.shape

(290, 13)

In [1]:
pd.merge(loans, loan_fees, how='left', on='transactionHash_')

NameError: name 'pd' is not defined