From 0e7cb2f22b75ef7d1e77e5ce399ad5b5583657af Mon Sep 17 00:00:00 2001 From: thelazyliz Date: Wed, 29 May 2024 15:39:36 +0100 Subject: [PATCH] Fix trading volume and update prompts (#12) * add proxy implementation to Bybit API * update tvl graph precision, and prompts * add extra info for trading volumes * comment out to_csv * update bybit to use cryptocompare * remove cryptocompare, use alternate bybit api * cleanup --- graphing/graph.py | 2 +- llm/prompts.py | 8 ++++--- utils/cex_data_loader.py | 50 +++++++++++++++++++++++++++------------ utils/data_transformer.py | 14 +++++++---- 4 files changed, 50 insertions(+), 24 deletions(-) diff --git a/graphing/graph.py b/graphing/graph.py index 31940f5..a37cc6c 100644 --- a/graphing/graph.py +++ b/graphing/graph.py @@ -128,7 +128,7 @@ def graph_tvl(self, df: pd.DataFrame): # Format the values in the table for col in df.columns: if "change" in col.lower(): - df[col] = pd.to_numeric(df[col], errors='coerce').apply(lambda x: "" if pd.isnull(x) else "{:.0f}%".format(x * 100)) # Rounded to no decimals + df[col] = pd.to_numeric(df[col], errors='coerce').apply(lambda x: "" if pd.isnull(x) else "{:.2f}%".format(x * 100)) # Rounded to 2 decimals elif col == "Tokens deposited": df[col] = pd.to_numeric(df[col], errors='coerce').apply(lambda x: "" if pd.isnull(x) else "{:,.0f}".format(x)) # Express in whole numbers with comma separators, rounded to no decimals elif col == "TVL": diff --git a/llm/prompts.py b/llm/prompts.py index ca2f5a9..68b1df5 100644 --- a/llm/prompts.py +++ b/llm/prompts.py @@ -20,7 +20,7 @@ tvl_prompt = """ You are a data analytics professional at Lido DAO. Your boss has assigned you to the team that writes weekly twitter threads about Lido statistics. -Today, you are responsible for writing the section of the thread about Total Value Locked (TVL). +Today, you are responsible for writing the section of the thread about TVL. Here are some examples: --- @@ -188,8 +188,8 @@ (w)stETH 7d trading volume is $2.47b, +66.0% higher than last week. --- -Use your knowledge of Lido, the data provided by your boss, and the examples above to write a section of the thread about the amount of wstETH bridged to Cosmos. -Follow the examples closely. +Use your knowledge of Lido, the data provided by your boss, and the examples above to write a section of the thread about the trading volume of (w)stETH. +Follow the examples closely. You do not need to include the breakdown stats for each chain. """ thread_prompt = """ @@ -266,6 +266,8 @@ Final instructions: Be sure to create a succint TL;DR section that summarizes the most important information from the thread. You must be sure to include every provided block in the thread, and follow the format of the examples closely. Do not omit any data in any block. +Please pay attention to the "Lido on L2" section - you must include the individual network breakdown according to the format given. +You do not need to explain the abbreviations such as TVL, APR, and L2. It is understood by the audience what they mean. You can use a more varied vocabulary than the examples provided. For example, instead of always saying "increase" or "decrease", you can use words like "dropped", "soared", "plummeted", "rose", "shrank", "jumped up", etc. Be sure to use the correct word for the situation. diff --git a/utils/cex_data_loader.py b/utils/cex_data_loader.py index de8d71e..30bf56f 100644 --- a/utils/cex_data_loader.py +++ b/utils/cex_data_loader.py @@ -4,6 +4,7 @@ import json from datetime import datetime, timedelta import logging +import os logging.basicConfig( format='%(asctime)s %(levelname)s %(message)s', @@ -32,7 +33,17 @@ def __init__(self, start_date: datetime, end_date: datetime): 'cointr': self.fetch_cointr_daily_data, 'bitget': self.fetch_bitget_daily_data, } - + # default pairs for all exchanges + self.all_steth_pairs = [ + "STETH/USDT", "STETH/USDC", "STETH/LUSD", "STETH/USD", "STETH/DAI", + "STETH/BUSD", "STETH/USDP", "STETH/TUSD", "STETH/WBTC", "STETH/BTC", + "STETH/LDO", "STETH/BTC","STETH/EUR", "STETH/WETH", "STETH/ETH" + ] + # specific pairs for exchanges (to override the default list above) + self.exchange_pairs = { + 'bybit': ["STETH/USDT"] + } + def get_data_formated(self, data: pd.DataFrame, pair: str) -> pd.DataFrame: data['symbol'] = pair data = data.set_index('date') @@ -142,17 +153,23 @@ def fetch_okx_daily_data(self, pair: str) -> pd.DataFrame: logging.info(f"Did not receieve OK response from OKX API for {pair}") return pd.DataFrame() - # https://bybit-exchange.github.io/docs/v5/market/kline + # https://www.bybit.com/en/trade/spot/STETH/USDT def fetch_bybit_daily_data(self, pair: str) -> pd.DataFrame: timestamp_from = int(datetime.timestamp(self.start_date)) * 1000 # as ms timestamp_to = int(datetime.timestamp(self.end_date)+86400) * 1000 # as ms symbol = pair.replace('/', '') - url = f'https://api.bybit.com/v5/market/kline?category=spot&symbol={symbol}&interval=D&end={timestamp_to}&start={timestamp_from}' - response = requests.get(url) + params = { + "symbol": symbol, + "interval": "1d", + "limit": (datetime.now() - self.start_date).days + 1, + "r": round(datetime.now().timestamp() * 1000) # current timestamp in ms + } + url = 'https://api2.bybit.com/spot/api/quote/v2/klines' + response = requests.get(url, params=params) if response.status_code == 200 and len(json.loads(response.text)['result']) > 0: data = pd.DataFrame( - json.loads(response.text)['result']['list'], - columns=['t', 'o', 'h', 'l', 'c', 'v', 'volume_quote'] + json.loads(response.text)['result'], + columns=['t', 'o', 'h', 'l', 'c', 'v'] ) if data.empty: logging.info(f"Did not return any data from Bybit for {pair}") @@ -314,11 +331,15 @@ def get_klines_by_exchange_pair(self, exchange: str, pair: str) -> pd.DataFrame: else: logging.info(f"No data for {exchange}") - def get_klines(self, pairs: list[str]) -> dict[tuple[str, str], pd.DataFrame]: + def get_klines(self) -> dict[tuple[str, str], pd.DataFrame]: klines_by_exchange = {} for exchange in self.exchange_functions.keys(): - for pair in pairs: - klines_by_exchange.update({(exchange, pair): self.get_klines_by_exchange_pair(exchange, pair)}) + if exchange in self.exchange_pairs: + for pair in self.exchange_pairs[exchange]: + klines_by_exchange.update({(exchange, pair): self.get_klines_by_exchange_pair(exchange, pair)}) + else: + for pair in self.all_steth_pairs: + klines_by_exchange.update({(exchange, pair): self.get_klines_by_exchange_pair(exchange, pair)}) return klines_by_exchange def get_trading_volume(self, symbol: str) -> pd.DataFrame: @@ -340,17 +361,12 @@ def get_trading_volume(self, symbol: str) -> pd.DataFrame: return data[['total_volume', 'price']] def get_offchain_df(self) -> pd.DataFrame: - all_steth_pairs = [ - "STETH/USDT", "STETH/USDC", "STETH/LUSD", "STETH/USD", "STETH/DAI", - "STETH/BUSD", "STETH/USDP", "STETH/TUSD", "STETH/WBTC", "STETH/BTC", - "STETH/LDO", "STETH/BTC","STETH/EUR", "STETH/WETH", "STETH/ETH" - ] # get coingecko price steth_trading_volume = self.get_trading_volume('staked-ether') # get volume on exchanges - stethtot_klines = self.get_klines(all_steth_pairs) + stethtot_klines = self.get_klines() stethtot_offchain_all = [] for key in stethtot_klines.keys(): if stethtot_klines[key].empty == False: @@ -378,4 +394,8 @@ def get_offchain_df(self) -> pd.DataFrame: df_stethtot_offchain = df_stethtot_offchain[['total_volume']] df_stethtot_offchain = df_stethtot_offchain.rename(columns = {'total_volume': 'volume'}) + + # df_stethtot_offchain.to_csv('df_stethtot_offchain.csv') + # df_stethtot_offchain = pd.read_csv('df_stethtot_offchain.csv', index_col='date') + # df_stethtot_offchain.index = pd.to_datetime(df_stethtot_offchain.index) return df_stethtot_offchain diff --git a/utils/data_transformer.py b/utils/data_transformer.py index 85918f7..1627075 100644 --- a/utils/data_transformer.py +++ b/utils/data_transformer.py @@ -45,14 +45,14 @@ def enrich_stethVolumes(df: pd.DataFrame, start_date: datetime, end_date: dateti tv_by_chain = {} for chain in chainlist: tv_by_chain.update({(chain): df.query('chain==@chain')[['date','volume']].set_index('date')}) - + stethtot_klines_chain = [] for key in tv_by_chain.keys(): if tv_by_chain[key].empty == False: k = tv_by_chain[key].copy() k.columns = [key] stethtot_klines_chain.append(k) - + # off-chain section (exchange APIs) # first we need to extend the start date to include 1 more period before @@ -284,14 +284,18 @@ def process_stethVolumes(df: pd.DataFrame) -> str: period_length = (max_date - min_date + timedelta(days = 1)) / 2 # this is start_date of current period start_date = min_date + period_length - previous_sum = df[pd.to_datetime(df.index) < start_date].sum().sum() - current_sum = df[pd.to_datetime(df.index) >= start_date].sum().sum() + previous_vol_by_chain = df[pd.to_datetime(df.index) < start_date].sum() + current_vol_by_chain = df[pd.to_datetime(df.index) >= start_date].sum() + previous_sum = previous_vol_by_chain.sum() + current_sum = current_vol_by_chain.sum() pct_change = (current_sum/previous_sum - 1) * 100 result_string = ( f"{period_length.days}d trading volume: ${current_sum}\n" f"Previous trading volume: ${previous_sum}\n" - f"Percentage change: {pct_change}" + f"Percentage change: {pct_change}\n" + f"{period_length.days}d trading volume breakdown: {current_vol_by_chain.to_json()}\n" + f"Previous trading volume breakdown: {previous_vol_by_chain.to_json()}\n" ) return result_string