### Notebook to pull the latest user snapshot data for crvUSD from the subgraph

Saves data as a file called **user_snapshots.csv**

In [16]:
import pluck
import pandas as pd

# crvusd subgraph url
subgraph_url = 'https://api.thegraph.com/subgraphs/name/convex-community/crvusd'

# function to get a portion of user data
def get_user_data(skip_snapshots=0, skip_user_states=0):
  query = f"""
  {{
    snapshots(first: 1000, skip: {skip_snapshots}, where: {{userStateSnapshot: true}}) {{
      basePrice
      oraclePrice
      activeBand
      userStates (first: 1000, skip: {skip_user_states}) {{
        collateral
        stablecoin
        n
        n1
        n2
        debt
        depositedCollateral
        health
        loss
        lossPct
        timestamp
        user {{
          id
        }}
      }}
      market {{
        id
        collateralName
      }}
    }}
  }}
  """
  frame, = pluck.execute(query, column_names="short", url=subgraph_url)
  return frame

# function to get all user data
def fetch_all_user_data():
    skip_snapshots = 0
    snapshot_df = pd.DataFrame()
    
    while True:
        skip_user_states = 0
        
        while True:
            print(f"skip snapshots: {skip_snapshots}, user states: {skip_user_states}, snapshot_df length: {snapshot_df.shape[0]}")
            data = get_user_data(skip_snapshots, skip_user_states)
            if(data.shape[1] == 17) and not data.isin(snapshot_df).all().all():
                snapshot_df = pd.concat([snapshot_df, data], ignore_index=True)
                skip_user_states += 1000
            elif skip_user_states == 0:
                return snapshot_df
            else:
                break
        skip_snapshots += 1000

In [18]:
# fetch all user data
data = fetch_all_user_data()

# clean, make types correct and rename columns
data = data.dropna()
columns_to_int = ['activeBand', 'n', 'n1', 'n2', 'timestamp']
columns_to_float = [
    'basePrice', 'oraclePrice', 'collateral', 'stablecoin', 'debt',
    'depositedCollateral', 'health', 'loss', 'lossPct'
]
data[columns_to_int] = data[columns_to_int].astype(int)
data[columns_to_float] = data[columns_to_float].astype(float)
data = data.rename(columns={'id': 'marketId', 'user.id': 'user'})

# create some columns
data['softLiq'] = data['activeBand'] >= data['n1']
data['collateralUsd'] = data['collateral'] * data['oraclePrice']

# save to csv
data.to_csv("user_snapshots.csv", index=False)