In [None]:
#Written using Python 3.9.7
#Author: icebreaker, May 2022

from time import sleep, time
from web3 import Web3
import numpy as np
import json
from scipy.stats import norm
from os.path import exists
from hexbytes import HexBytes
from datetime import datetime
import pandas as pd
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
import warnings
from pycoingecko import CoinGeckoAPI
cg = CoinGeckoAPI()


pd.options.display.float_format = '{:.2f}'.format
warnings.filterwarnings("ignore")

ETHERSCAN_API_KEY = os.getenv("ETHERSCAN_API_KEY")
INFURA_KEY = os.getenv("INFURA_KEY")

ETHERSCAN_API_BASE_URL = "https://api.etherscan.io/api"

w3 = Web3(Web3.HTTPProvider("https://mainnet.infura.io/v3/{}".format(INFURA_KEY)))

In [None]:
indexerContract = '0x84cE03AE547330f61584aD6103743732B1AE97C5' #MainIndexer SET contract
topicSig = '0x54e9e24dab7ba662ede90396b0de464467c3d65f95485171a60381185219f10b' #trade executed events

currentBlockNumber = w3.eth.get_block_number()
#Finding and scraping onchain tx's of interest takes approximately ~2 hours via infura
v2Events = findTargetTX(12928622, 10000, indexerContract, topicSig, 'tradeRebalExecs')
#v2Events = findTargetTX(12726304, 10000, sETH2, topicSig, 'sETH2Transfers') #debug line

columnsFromScrape = ["txHash","gasLimit","gasUsed","gasPrice", "timestamp","blockNumber","from","to",'_setToken','_sellComponent','_buyComponent', '_exchangeAdapter',1 '_executor', '_netAmountSold', '_netAmountReceived' , '_protocolFee']
finals = pd.DataFrame(v2Events, columns = columnsFromScrape)
finals['datetime'] = list(map(lambda x: datetime.fromtimestamp(x).strftime('%Y-%m-%d %H:%M:%S.%f')[:-3], finals["timestamp"]))

combinedFills = pd.concat([finals])


In [None]:
##Scrape all tradeExecuted Events from the MainIndexer SET contract
def findTargetTX(startBlock, blockStep, _targetContract, _targetEventTopicSignature, _fileNamePreFix):
    targetContract = _targetContract
    targetEventTopicSignature = _targetEventTopicSignature
    fileNamePreFix = _fileNamePreFix
    latestBlockNumber = startBlock
    currentBlockNumber = w3.eth.get_block_number()
    filteredTXs = []

    while latestBlockNumber < currentBlockNumber:
        eventFilter = w3.eth.filter(
            {
                "fromBlock": latestBlockNumber,
                "toBlock": latestBlockNumber + blockStep,
                "address": targetContract,
                "topics": [targetEventTopicSignature],
            }
        )
        txLogs = w3.eth.get_filter_logs(eventFilter.filter_id)

        if len(txLogs):
            ts = pd.to_datetime((w3.eth.get_block(latestBlockNumber)["timestamp"]), unit='s').to_datetime64()
            print(
                datetime.now().strftime("%H:%M:%S"), 
                "Time: {} Covering Block {}-{}: Found {} {} Transfer events".format(
                    ts,
                    latestBlockNumber,
                    latestBlockNumber + blockStep,
                    len(txLogs),
                    fileNamePreFix
                )
            )
            idx = 0
            for log in txLogs:
                txnHash = log["transactionHash"]
                print("Getting TX: ",txnHash.hex(), " ", idx, " of", len(txLogs))

                txn = w3.eth.get_transaction(txnHash)
                gasLimit = txn["gas"]
                gasPrice = txn["gasPrice"]
                blockNumber = txn["blockNumber"]

                txnReceipt = w3.eth.get_transaction_receipt(txnHash)
                gasUsed = txnReceipt["gasUsed"]

                block = w3.eth.get_block(blockNumber)
                timestamp = block["timestamp"]

                dataStr = log["data"][2:]
                data = [dataStr[i : i + 64] for i in range(0, len(dataStr), 64)]
                filteredTXs.append(
                    {
                        "txHash": txnHash.hex(),
                        "gasLimit": int(gasLimit),
                        "gasUsed": int(gasUsed),
                        "gasPrice": int(gasPrice),
                        "timestamp": timestamp,
                        "blockNumber": blockNumber,
                        "from": txn["from"],
                        "to": txn["to"],
                        '_setToken' : log.topics[1][-20:].hex(),
                        '_sellComponent' : log.topics[2][-20:].hex(),
                        '_buyComponent' : log.topics[3][-20:].hex(),
                        '_exchangeAdapter' : ("0x" + data[0][-40:]),
                        '_executor' : ("0x" + data[1][-40:]),
                        '_netAmountSold' : int(data[2][-40:],16),
                        '_netAmountReceived' : int(data[3][-40:],16),
                        '_protocolFee' : int(data[4][-40:],16)
                    }
                )
                idx += 1
        latestBlockNumber += blockStep
        # sleep to avoid getting rate limited
        sleep(0.001)
        #np.save(fileNamePreFix + "filteredTransactions", np.array(filteredTXs), allow_pickle=True)
    # Save all the order fill data in a pickle file
    print("Found {} total tradExecs ", fileNamePreFix ," fills".format(len(filteredTXs)))
    return filteredTXs

In [None]:
#ADD IN Symbols and Decimals
uniqueTokens = pd.concat([combinedFills['_buyComponent'], combinedFills['_sellComponent']])
uniqueTokens = uniqueTokens.unique()

with open('./abi/erc20.abi') as json_file:
    erc20ABI = json.load(json_file)

metaData=[]

for token in uniqueTokens:
    print(token)

    contract = w3.eth.contract(w3.toChecksumAddress(token), abi=erc20ABI)
    try:
        symbol = contract.functions.symbol().call()
    except:
        print("error")
        symbol = "MKR" #catch MKR's bytes32 symbol encoding

    metaData.append(
        {
            "tokenAddress": token,
            "decimals": contract.functions.decimals().call(),
            "symbol" : symbol
        })
    sleep(0.01)


sellMetaDataDF = pd.DataFrame(data=metaData)
sellMetaDataDF.columns =['tokenAddress','sellDecimals','sellSymbol']

buyMetaDataDF = pd.DataFrame(data=metaData)
buyMetaDataDF.columns =['tokenAddress','buyDecimals','buySymbol']
tradeExecs = pd.merge(combinedFills, 
                     sellMetaDataDF, 
                     left_on ='_sellComponent',
                     right_on ='tokenAddress',
                     how ='left')

tradeExecs = pd.merge(tradeExecs, 
                     buyMetaDataDF, 
                     left_on ='_buyComponent',
                     right_on ='tokenAddress',
                     how ='left')

#Normalise Token Quantities
tradeExecs['_netAmountSold'] = tradeExecs['_netAmountSold'] / (10 ** tradeExecs['sellDecimals'])
tradeExecs['_netAmountReceived'] = tradeExecs['_netAmountReceived'] / (10 ** tradeExecs['buyDecimals'])
tradeExecs['datetime'] = list(map(lambda x: datetime.fromtimestamp(x).strftime('%d-%m-%Y'), tradeExecs["timestamp"]))


#Add in set names
setNameMapping = [['0x1494ca1f11d487c2bbe4543e90080aeba4ba3c2b',	'DPI'],
['0x72e364f2abdc788b7e918bc238b21f109cd634d7',	'MVI'],
['0x2af1df3ab0ab157e1e2ad8f88a7d04fbea0c7dc6',	'BED'],
['0xc7c9f0d98123d48f6720cd36d0da09b603173613',	'SANX'],
['0x33d63ba1e57e54779f7ddaeaa7109349344cf5f1',	'DATA'],
['0x47110d43175f7f2c2425e7d15792acc5817eb44f',	'GMI']]
setNameMapping = pd.DataFrame(setNameMapping, columns = ['_setToken', '_setTokenSymbol'])
tradeExecs = pd.merge(tradeExecs, 
                     setNameMapping, 
                     left_on ='_setToken',
                     right_on ='_setToken',
                     how ='left')



In [None]:
#Get approx USD amount, do this by creating a primary key of sold token + date and then making it unique, 
# run the query (contract address + date) vs coingeckos API to get USD mark then join back to main DF
uniquePriceQueries = []

cols = ['_sellComponent', 'datetime']
tradeExecs['uniquePriceQueriesPK'] = tradeExecs[cols].apply(lambda row: '_'.join(row.values.astype(str)), axis=1)

In [None]:
def getPriceUSD(contractAdd, date):
    id = cg.get_coin_info_from_contract_address_by_id('ethereum',contractAdd).get('id')
    price = cg.get_coin_history_by_id(id, date).get('market_data').get('current_price').get('usd')
    return price
i = 0 

for priceQuery in (tradeExecs['uniquePriceQueriesPK'].unique()):
    contract = (priceQuery[:42])
    date = (priceQuery[-10:])

    uniquePriceQueries.append({
        'uniquePriceQueriesPK' : priceQuery,
        'approxUSDMarkSell' : getPriceUSD(contract, date)
    })
    print(i, "/", len(tradeExecs['uniquePriceQueriesPK'].unique()))
    i += 1
    sleep(2) #run the query slow, because CG is not l33t

In [None]:
#join prices to trades
priceQueryDF = pd.DataFrame(data=uniquePriceQueries)
priceQueryDF.columns =['uniquePriceQueriesPK', 'approxUSDMarkSell']
tradeExecs = pd.merge(tradeExecs, 
                     priceQueryDF, 
                     left_on ='uniquePriceQueriesPK',
                     right_on ='uniquePriceQueriesPK',
                     how ='left')
                
tradeExecs['approxUSDNotional'] = tradeExecs['approxUSDNotional_y'] * tradeExecs['_netAmountSold']


In [None]:
#tidy up 
del tradeExecs['approxUSDNotional_x']
del tradeExecs['uniquePriceQueries']
del tradeExecs['tokenAddress_x']

In [177]:
#Dump Numpy and CSV of tradeExec DF to disk
np.save("rebalExecsFinal" , np.array(tradeExecs), allow_pickle=True)
tradeExecs.to_csv("rebalExecsFinal.csv")