In [1]:
from datastreams.datastream import Streamer

import matplotlib.pyplot as plt
import os
import pandas as pd

In [2]:
pd.set_option('display.max_columns', None)
pd.set_option('display.expand_frame_repr', False)
pd.set_option('max_colwidth', None)

In [3]:
cd ..

/home/evan/Documents/github/Feedlot


### Load transformed cow swap dataframe

In [4]:
cow_swaps_df = pd.read_csv('data/transformed/cow_swaps.csv', low_memory=False)

In [5]:
# print memory size of cow_swap_df
print('Memory size of cow_swaps_df: ', cow_swaps_df.memory_usage().sum() / 1024**2, 'MB')

Memory size of cow_swaps_df:  44.165401458740234 MB


In [6]:
cow_swaps_df.head(5)

Unnamed: 0,timestamp,gasPrice,feeAmount,txHash,settlement,buyAmount,sellAmount,order,buyAmountEth,sellAmountEth,buyAmountUsd,sellAmountUsd,solvers,environment,name,active,sellTokenSymbol,sellTokenDecimals,buyTokenSymbol,buyTokenDecimals
0,1663907363,6062637482,1344622,0x0454b81f9e497fe91fc2df0e1e7420b9a5e20439b065132daab91c09de8512cf,0x0454b81f9e497fe91fc2df0e1e7420b9a5e20439b065132daab91c09de8512cf,917011543008284856417,4093394367,0x000050299d2697ae6d93d9ccec7b55dbe79c920596927402db5784e8e7ef1c9d961eed3b7d4b7c261ee5b023e5f857af5af1e700632d3cf0,3.04748,3.049075,4087.560206,4093.394367,0xc9ec550bea1c64d779124b23a26292cc223327b6,prod,Otex,True,USDC,6,FXS,18
1,1668043871,30744415450,37223401,0xbe9648a3caed2adc9254ea595dac1cda93af8b243b7a8c507ea33d3e4e8efbe4,0xbe9648a3caed2adc9254ea595dac1cda93af8b243b7a8c507ea33d3e4e8efbe4,8621106771373760291720,45331499213,0x0004d9673bd844cc4d44a33b1f5c1df7ef6cf5eff63ccaaf99d4442089a2deb0d1f2739ad714045be6146915275d0a2b822ec1cc636c5b20,39.399436,39.811826,44861.934154,45331.499213,0xc9ec550bea1c64d779124b23a26292cc223327b6,prod,Otex,True,USDC,6,FXS,18
2,1669126187,24494377559,13386244,0x66e01c20e7640f86113a8fef5b80d7393eebb01e153d33310dd94409d4adfe32,0x66e01c20e7640f86113a8fef5b80d7393eebb01e153d33310dd94409d4adfe32,1583112031078343925188,6719000000,0x00668e787a470a68eee83463fb4618ce9bedec7c50ac7ea9d2bf0d073792fe4d95861c41e7767dc737bca90b175ad51e5f7b9ada637cde92,5.996258,5.917571,6821.422376,6719.0,0xb20b86c4e6deeb432a22d773a221898bbbd03036,prod,Gnosis_1inch,True,USDC,6,FXS,18
3,1662029515,12869344497,4808912,0xb126e3c1fdffde8ece3b6b6554ff5d6cb313ae67a692ea92f01ecfe17fb9d982,0xb126e3c1fdffde8ece3b6b6554ff5d6cb313ae67a692ea92f01ecfe17fb9d982,369820837360497071942,2138328191,0x00a3de84225174293e86edf848100dfb1123ea727209aa63bd9d72edc9838de8254725a2d9989ab4202e1f6a6560b970dc4a67da6310958d,1.38013,1.372932,2126.884281,2138.328191,0xc9ec550bea1c64d779124b23a26292cc223327b6,prod,Otex,True,USDC,6,FXS,18
4,1653185840,14600197606,20683162,0xc5c3c4cb97c972f83630f7a415ce064c9c2e17c316936d382ed3bfb54266c8e8,0xc5c3c4cb97c972f83630f7a415ce064c9c2e17c316936d382ed3bfb54266c8e8,8901647727774327086836,59366191057,0x00a5ee906f05258014318b5488bded192c21ed6a9ee7a65506a80cb459d362d8cbdd2638b5f2e0360ea43806936276ba16d0a4ab6289a3b9,29.458128,30.052786,58191.504954,59366.191057,0x6fa201c3aff9f1e4897ed14c7326cf27548d9c35,prod,Otex,False,USDC,6,FXS,18


In [7]:
# get the timestamp when each token first appeared in the cow_swaps_df
first_sell_appearance = cow_swaps_df.groupby('sellTokenSymbol')['timestamp'].min().reset_index()
first_buy_appearance = cow_swaps_df.groupby('buyTokenSymbol')['timestamp'].min().reset_index()

In [8]:
# combine appearance dataframes with the min timestamp value for each token symbol
first_appearance = pd.concat([first_sell_appearance, first_buy_appearance], axis=0)
first_appearance = first_appearance.groupby('sellTokenSymbol')['timestamp'].min().reset_index()

# rename columns
first_appearance.columns = ['tokenSymbol', 'firstAppearance']

In [9]:
first_appearance.head(5)

Unnamed: 0,tokenSymbol,firstAppearance
0,$MECHA,1664789687
1,0xBTC,1639067616
2,1INCH,1628253224
3,1ONE,1634276319
4,3Crv,1669763447


In [10]:
# check if ETH is in first_appearance tokenSymbol. ALso check that there exists tokens that don't start with $
'WETH' in first_appearance['tokenSymbol'].values

True

In [11]:
# get list of unique buy and sell token symbols
unique_buy_tokens = cow_swaps_df['buyTokenSymbol'].unique()
unique_sell_tokens = cow_swaps_df['sellTokenSymbol'].unique()

In [12]:
# find the unique tokens in both lists
unique_cow_tokens = list(set(unique_buy_tokens) & set(unique_sell_tokens))

In [13]:
# remove "nan" float value from the unique tokens list
unique_cow_tokens = [token for token in unique_cow_tokens if type(token) != float]

### Create Chainlink Streamer with DataStreams

In [14]:
endpoint = 'https://api.thegraph.com/subgraphs/name/openpredict/chainlink-prices-subgraph'

In [15]:
# load streamer class
ds = Streamer(endpoint)

In [16]:
# check available query fields
ds.queryFields

[FieldPath(https://api.thegraph.com/subgraphs/name/openpredict/chainlink-prices-subgraph, Query, ['prices']),
 FieldPath(https://api.thegraph.com/subgraphs/name/openpredict/chainlink-prices-subgraph, Query, ['assetPairs'])]

In [17]:
asset_pairs_df = ds.runQuery(
    query_field = ds.queryDict.get('assetPairs'),
    query_size = 10000
    )

FIELD - assetPairs


In [18]:
# check how many asset pairs there are
print(f'there are {len(asset_pairs_df)} asset pairs in this chainlink subgraph node.')

# convert to list
asset_pairs_list = asset_pairs_df['assetPairs_id'].to_list()

print(f'some sample values are: {asset_pairs_list[:5]}')

there are 409 asset pairs in this chainlink subgraph node.
some sample values are: ['1INCH/ETH', '1INCH/USD', 'AAPL/USD', 'AAVE/ETH', 'AAVE/USD']


In [19]:
# get all the unique values to the right of /
denominated_asset_pairs_list = [x.split('/') for x in asset_pairs_list]

# get unique values in denominated_asset_pairs_list if in ETH or USD
eth_usd_asset_denomination_list = [x for x in denominated_asset_pairs_list if x[-1] in ['ETH', 'USD']]

In [20]:
eth_usd_asset_denomination_list[0:5]

[['1INCH', 'ETH'],
 ['1INCH', 'USD'],
 ['AAPL', 'USD'],
 ['AAVE', 'ETH'],
 ['AAVE', 'USD']]

In [21]:
# get assets denominated in USD
usd_asset_denomination_list = [x for x in eth_usd_asset_denomination_list if x[-1] == 'USD']

# cross reference usd assets with cow tokens
cow_usd_asset_denomination_list = [x for x in usd_asset_denomination_list if x[0] in unique_cow_tokens]

In [22]:
# sample output
cow_usd_asset_denomination_list[0:5]

[['1INCH', 'USD'],
 ['AAVE', 'USD'],
 ['ACH', 'USD'],
 ['ALCX', 'USD'],
 ['AMPL', 'USD']]

### Create custom search vals parameters based on tokens in cow_swaps dataset

In [23]:
# make a token search pair list for the cow tokens
token_search_pair_list = [x[0] + '/' + x[1] for x in cow_usd_asset_denomination_list]

In [24]:
# ETH and BTC are not in unique_tokens, but WETH and WBTC are.
'WETH' in unique_cow_tokens

True

In [25]:
# manually add ETH/USD and BTC/USD into the token_search_pair_list
token_search_pair_list.append('ETH/USD')
token_search_pair_list.append('BTC/USD')

In [26]:
print(f'there are {len(token_search_pair_list)} token pairs denominated in USD. Sample output is: {token_search_pair_list[0:5]}')

there are 72 token pairs denominated in USD. Sample output is: ['1INCH/USD', 'AAVE/USD', 'ACH/USD', 'ALCX/USD', 'AMPL/USD']


In [27]:
# get smallest first_appearance value
earliest_appearance = first_appearance['firstAppearance'].min()

In [28]:
keys = ['timestamp_lt', 'assetPair']
initial_vals = [1703800000, 'ETH/USD']

In [29]:
price_dfs_list = ds.runSameQuerySearch(
    fieldParam = ds.queryDict.get('prices'),
    keys = keys, 
    values = initial_vals,
    searchKey = keys[1],
    searchVals = token_search_pair_list,
    query_size = 10000000
    )

search_dict: {'timestamp_lt': 1703800000, 'assetPair': 'ETH/USD'}
val: 1INCH/USD
FIELD - prices
Filter based on these values: {'timestamp_lt': 1703800000, 'assetPair': '1INCH/USD'}
1INCH/USD                   prices_id prices_assetPair_id  prices_timestamp  prices_price
0      1INCH/USD/0x63c89d0b           1INCH/USD        1674091787      45208510
1      1INCH/USD/0x63c85a3f           1INCH/USD        1674074687      44728242
2      1INCH/USD/0x63c84377           1INCH/USD        1674068855      45212994
3      1INCH/USD/0x63c828fb           1INCH/USD        1674062075      45706455
4      1INCH/USD/0x63c82667           1INCH/USD        1674061415      45160472
...                     ...                 ...               ...           ...
14972  1INCH/USD/0x6066cb91           1INCH/USD        1617349521     448793445
14973  1INCH/USD/0x6066ca8e           1INCH/USD        1617349262     448729456
14974  1INCH/USD/0x6066b91e           1INCH/USD        1617344798     448758913
14975  1I

In [34]:
price_dfs_list[0].head(5)

Unnamed: 0,prices_id,prices_assetPair_id,prices_timestamp,prices_price
0,1INCH/USD/0x63c89d0b,1INCH/USD,1674091787,45208510
1,1INCH/USD/0x63c85a3f,1INCH/USD,1674074687,44728242
2,1INCH/USD/0x63c84377,1INCH/USD,1674068855,45212994
3,1INCH/USD/0x63c828fb,1INCH/USD,1674062075,45706455
4,1INCH/USD/0x63c82667,1INCH/USD,1674061415,45160472


In [35]:
# get prices_id to the left of /
name = price_dfs_list[0]['prices_id'].apply(lambda x: x.split('/')[0])[0]

In [37]:
price_dfs_list[-2].head(5)

Unnamed: 0,prices_id,prices_assetPair_id,prices_timestamp,prices_price
0,ETH/USD/0x63c8a317,ETH/USD,1674093335,151757982958
1,ETH/USD/0x63c89507,ETH/USD,1674089735,151759500000
2,ETH/USD/0x63c88c37,ETH/USD,1674087479,151955000000
3,ETH/USD/0x63c886f7,ETH/USD,1674086135,151171319000
4,ETH/USD/0x63c8867f,ETH/USD,1674086015,151153760000


In [38]:
# change prices_assetPair_id from ETH/USD to WETH/USD
price_dfs_list[-2]['prices_assetPair_id'] = 'WETH/USD'

# change prices_assetPair_id from ETH/USD to WBTC/USD
price_dfs_list[-1]['prices_assetPair_id'] = 'WBTC/USD'

In [39]:
# make directory if it doesn't exist
if not os.path.exists(f'data/chainlink_prices'):
    os.makedirs(f'data/chainlink_prices')

# save each dataframe to a csv file
for df in price_dfs_list:
    name = df['prices_id'].apply(lambda x: x.split('/')[0])[0]
    df.to_csv(f'data/chainlink_prices/{name}.csv', index=False)

In [41]:
# rename ETH.csv to WETH.csv
os.rename('data/chainlink_prices/ETH.csv', 'data/chainlink_prices/WETH.csv')

# rename BTC.csv to WBTC.csv
os.rename('data/chainlink_prices/BTC.csv', 'data/chainlink_prices/WBTC.csv')

In [40]:
print(len(price_dfs_list))

72
