In [16]:
import requests
import json
import time
import pprint as pp
from pyspark.sql.types import *
from pyspark.sql.functions import udf
import web3

In [8]:
spark.conf.set("viewsEnabled","true")
spark.conf.set("materializationDataset","<dataset>")

eth_tx = spark.read.format("bigquery")\
    .option('table', 'bigquery-public-data:crypto_ethereum.transactions') \
    .load()

eth_tx.createOrReplaceTempView('eth_tx')
sql_hash = """
select hash, from_address, to_address, input from eth_tx
where to_address = lower('0xBC4CA0EdA7647A8aB7C2061c2E118A18a936f13D')
"""
tx_hash = spark.sql(sql_hash)
tx_hash.show()

+--------------------+--------------------+--------------------+--------------------+
|                hash|        from_address|          to_address|               input|
+--------------------+--------------------+--------------------+--------------------+
|0xf1a7312bec04f7b...|0x93f36c4d96ad967...|0xbc4ca0eda7647a8...|0xa22cb4650000000...|
|0x38b6668b1b470fa...|0xb7692c097a20f67...|0xbc4ca0eda7647a8...|0x23b872dd0000000...|
|0x72a1c46217d0a16...|0xec6a0c08eb2e8c9...|0xbc4ca0eda7647a8...|0x23b872dd0000000...|
|0xe95cb89fc5f8904...|0x7071246948ecad9...|0xbc4ca0eda7647a8...|0xa22cb4650000000...|
|0x02a1f7fb349ba99...|0x1ae642ad39c87a5...|0xbc4ca0eda7647a8...|0xa22cb4650000000...|
|0xc719973c9f0b390...|0x7512c6a03b4c29e...|0xbc4ca0eda7647a8...|0x095ea7b30000000...|
|0x9734adb9d226500...|0x504df0b10056b46...|0xbc4ca0eda7647a8...|0x23b872dd0000000...|
|0x7a18926703d3e28...|0xff3d22f855b1630...|0xbc4ca0eda7647a8...|0x23b872dd0000000...|
|0xf34db3793a92d48...|0x8ab83d869f2bc25...|0xbc4ca0eda

In [21]:
df.rdd.take(1)


[Row(hash='0x4ffcd8d81aa5ec615a7901e474b99f24a965421f99f868b27c642a052bd5555c', from_address='0x8ce2587b25e56f39785f6ffba8fe95997d40ed9e', to_address='0xbc4ca0eda7647a8ab7c2061c2e118a18a936f13d', input='0x23b872dd0000000000000000000000008ce2587b25e56f39785f6ffba8fe95997d40ed9e000000000000000000000000e6f1ead8e3f682f16a90d4961d47846f69cea076000000000000000000000000000000000000000000000000000000000000166d')]

In [6]:
import sys
from functools import lru_cache
from web3 import Web3
from web3.auto import w3
from web3.contract import Contract
from web3._utils.events import get_event_data
from web3._utils.abi import exclude_indexed_event_inputs, get_abi_input_names, get_indexed_event_inputs, normalize_event_input_types
from web3.exceptions import MismatchedABI, LogTopicError
from web3.types import ABIEvent
from eth_utils import event_abi_to_log_topic, to_hex
from hexbytes import HexBytes
from pyspark.sql.types import *
from pyspark.sql.functions import udf

import json
import re


In [7]:

def decode_tuple(t, target_field):
    output = dict()
    for i in range(len(t)):
        if isinstance(t[i], (bytes, bytearray)):
            output[target_field[i]['name']] = to_hex(t[i])
        elif isinstance(t[i], (tuple)):
            output[target_field[i]['name']] = decode_tuple(t[i], target_field[i]['components'])
        else:
            output[target_field[i]['name']] = t[i]
    return output


def decode_list_tuple(l, target_field):
    output = l
    for i in range(len(l)):
        output[i] = decode_tuple(l[i], target_field)
    return output


def decode_list(l):
    output = l
    for i in range(len(l)):
        if isinstance(l[i], (bytes, bytearray)):
            output[i] = to_hex(l[i])
        else:
            output[i] = l[i]
    return output


def convert_to_hex(arg, target_schema):
    output = dict()
    for k in arg:
        if isinstance(arg[k], (bytes, bytearray)):
            output[k] = to_hex(arg[k])
        elif isinstance(arg[k], (list)) and len(arg[k]) > 0:
            target = [a for a in target_schema if 'name' in a and a['name'] == k][0]
            if target['type'] == 'tuple[]':
                target_field = target['components']
                output[k] = decode_list_tuple(arg[k], target_field)
            else:
                output[k] = decode_list(arg[k])
        elif isinstance(arg[k], (tuple)):
            target_field = [a['components'] for a in target_schema if 'name' in a and a['name'] == k][0]
            output[k] = decode_tuple(arg[k], target_field)
        else:
            output[k] = arg[k]
    return output

# @lru_cache(maxsize=None)
def _get_contract(address, abi):
    """
    This helps speed up execution of decoding across a large dataset by caching the contract object
    It assumes that we are decoding a small set, on the order of thousands, of target smart contracts
    """
    if isinstance(abi, (str)):
        abi = json.loads(abi)

    contract = w3.eth.contract(address=Web3.toChecksumAddress(address), abi=abi)
    return (contract, abi)

@udf
def decode_tx(address, input_data, abi):
    if abi is not None:
        try:
            (contract, abi) = _get_contract(address, abi)
            func_obj, func_params = contract.decode_function_input(input_data)
            target_schema = [a['inputs'] for a in abi if 'name' in a and a['name'] == func_obj.fn_name][0]
            decoded_func_params = convert_to_hex(func_params, target_schema)
            return func_obj.fn_name # json.dumps(decoded_func_params), json.dumps(target_schema))
        except:
            e = sys.exc_info()[0]
            return ('decode error', repr(e), None)
    else:
        return ('no matching abi', None, None)


In [17]:
TX_HASH = '0x56f2ce34e4b20578742ed8ddc9fcbacaec62d477a530dff7ace8da2fe64b1208'
OPENSEA_CONTRACT_ADDR = "0x7be8076f4ea4a4ad08075c2508e481d6c946d12b"
BORED_APE_ADDR = "0xBC4CA0EdA7647A8aB7C2061c2E118A18a936f13D"

def fetch_abi(contract_addr):
    ABI_ENDPOINT = 'https://api.etherscan.io/api?module=contract&action=getabi&address='
    response = requests.get('%s%s'%(ABI_ENDPOINT, contract_addr))
    response_json = response.json()
    abi_json = json.loads(response_json['result'])
    return json.dumps(abi_json)

opensea_abi = fetch_abi(OPENSEA_CONTRACT_ADDR)
time.sleep(5)
nft_abi = fetch_abi(BORED_APE_ADDR)
pp.pprint(nft_abi)



('[{"inputs": [{"internalType": "string", "name": "name", "type": "string"}, '
 '{"internalType": "string", "name": "symbol", "type": "string"}, '
 '{"internalType": "uint256", "name": "maxNftSupply", "type": "uint256"}, '
 '{"internalType": "uint256", "name": "saleStart", "type": "uint256"}], '
 '"stateMutability": "nonpayable", "type": "constructor"}, {"anonymous": '
 'false, "inputs": [{"indexed": true, "internalType": "address", "name": '
 '"owner", "type": "address"}, {"indexed": true, "internalType": "address", '
 '"name": "approved", "type": "address"}, {"indexed": true, "internalType": '
 '"uint256", "name": "tokenId", "type": "uint256"}], "name": "Approval", '
 '"type": "event"}, {"anonymous": false, "inputs": [{"indexed": true, '
 '"internalType": "address", "name": "owner", "type": "address"}, {"indexed": '
 'true, "internalType": "address", "name": "operator", "type": "address"}, '
 '{"indexed": false, "internalType": "bool", "name": "approved", "type": '
 '"bool"}], "name"

In [65]:
def decode(partitionData):
    (contract, abi) = _get_contract(BORED_APE_ADDR, nft_abi)
    for row in partitionData:
        func_obj, func_params = contract.decode_function_input(row['input'])
        target_schema = [a['inputs'] for a in abi if 'name' in a and a['name'] == func_obj.fn_name][0]
        decoded_func_params = convert_to_hex(func_params, target_schema)
        if func_obj.fn_name == 'transferFrom':
            from_ = decoded_func_params['from']
            to_ = decoded_func_params['to']
            id_ = decoded_func_params['tokenId']
            yield (func_obj.fn_name, from_, to_, id_) # json.dumps(decoded_func_params), json.dumps(target_schema))

df2 = df.rdd.mapPartitions(decode).toDF(['fn_name', 'from', 'to', 'id'])


In [66]:
df2.show(truncate=False)

+------------+------------------------------------------+------------------------------------------+----+
|fn_name     |from                                      |to                                        |id  |
+------------+------------------------------------------+------------------------------------------+----+
|transferFrom|0x8ce2587B25e56f39785f6fFba8Fe95997d40ed9e|0xe6f1EaD8e3F682F16A90d4961D47846F69CeA076|5741|
|transferFrom|0xe6f1EaD8e3F682F16A90d4961D47846F69CeA076|0x8ce2587B25e56f39785f6fFba8Fe95997d40ed9e|8232|
|transferFrom|0xA8FC07d1E0BBE45e47249632d5804955bF82787C|0xd3fC6fec4b219c2D74B366FEe6B585df71611533|7060|
|transferFrom|0x937a1faB2930345A4Fd11d10567e82d1b9634c64|0xFba8A2Fa05E0ee39b6e3C584A0f7ce397Cc92aF0|3379|
|transferFrom|0xF531c7A28a3492390D4C47dBa6775FA76349DcFF|0xD1F6C71350791f2C26dE8C0E2E5f293a83455C52|3349|
|transferFrom|0x9aD14abF4986F635D521033beDd8A8ff61801A41|0xcC87Bcf3fd3120C86532426AC694b5c3B9686179|392 |
|transferFrom|0x8c241750F13f0f6d2e95De5Fb4A77e

In [79]:
df2.count()

Py4JJavaError: An error occurred while calling o1208.count.
: org.apache.spark.SparkException: Job aborted due to stage failure: Task 213 in stage 66.0 failed 4 times, most recent failure: Lost task 213.3 in stage 66.0 (TID 1522, zhuo-test-0116-w-4.us-central1-b.c.unity-ads-dd-ds-dev-prd.internal, executor 75): org.apache.spark.api.python.PythonException: Traceback (most recent call last):
  File "/usr/lib/spark/python/lib/pyspark.zip/pyspark/worker.py", line 377, in main
    process()
  File "/usr/lib/spark/python/lib/pyspark.zip/pyspark/worker.py", line 372, in process
    serializer.dump_stream(func(split_index, iterator), outfile)
  File "/usr/lib/spark/python/lib/pyspark.zip/pyspark/serializers.py", line 400, in dump_stream
    vs = list(itertools.islice(iterator, batch))
  File "<ipython-input-65-dc86f751e2fb>", line 4, in decode
  File "/opt/conda/anaconda/lib/python3.7/site-packages/eth_utils/decorators.py", line 18, in _wrapper
    return self.method(obj, *args, **kwargs)
  File "/opt/conda/anaconda/lib/python3.7/site-packages/web3/contract.py", line 466, in decode_function_input
    func = self.get_function_by_selector(selector)
  File "/opt/conda/anaconda/lib/python3.7/site-packages/eth_utils/decorators.py", line 18, in _wrapper
    return self.method(obj, *args, **kwargs)
  File "/opt/conda/anaconda/lib/python3.7/site-packages/web3/contract.py", line 459, in get_function_by_selector
    return get_function_by_identifier(fns, 'selector')
  File "/opt/conda/anaconda/lib/python3.7/site-packages/web3/contract.py", line 1681, in get_function_by_identifier
    'Could not find any function with matching {0}'.format(identifier)
ValueError: Could not find any function with matching selector

	at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.handlePythonException(PythonRunner.scala:457)
	at org.apache.spark.api.python.PythonRunner$$anon$3.read(PythonRunner.scala:592)
	at org.apache.spark.api.python.PythonRunner$$anon$3.read(PythonRunner.scala:575)
	at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext(PythonRunner.scala:410)
	at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37)
	at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:489)
	at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:458)
	at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:458)
	at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:458)
	at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.agg_doAggregateWithoutKey_0$(Unknown Source)
	at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown Source)
	at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
	at org.apache.spark.sql.execution.WholeStageCodegenExec$$anon$2.hasNext(WholeStageCodegenExec.scala:636)
	at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:458)
	at org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:126)
	at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:99)
	at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:55)
	at org.apache.spark.scheduler.Task.run(Task.scala:123)
	at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:414)
	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)
	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:417)
	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
	at java.lang.Thread.run(Thread.java:748)

Driver stacktrace:
	at org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:1926)
	at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:1914)
	at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:1913)
	at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
	at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
	at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
	at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1913)
	at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:948)
	at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:948)
	at scala.Option.foreach(Option.scala:407)
	at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:948)
	at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2147)
	at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2096)
	at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2085)
	at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
	at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:759)
	at org.apache.spark.SparkContext.runJob(SparkContext.scala:2076)
	at org.apache.spark.SparkContext.runJob(SparkContext.scala:2097)
	at org.apache.spark.SparkContext.runJob(SparkContext.scala:2116)
	at org.apache.spark.SparkContext.runJob(SparkContext.scala:2141)
	at org.apache.spark.rdd.RDD.$anonfun$collect$1(RDD.scala:990)
	at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
	at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
	at org.apache.spark.rdd.RDD.withScope(RDD.scala:385)
	at org.apache.spark.rdd.RDD.collect(RDD.scala:989)
	at org.apache.spark.sql.execution.SparkPlan.executeCollect(SparkPlan.scala:304)
	at org.apache.spark.sql.Dataset.$anonfun$count$1(Dataset.scala:2836)
	at org.apache.spark.sql.Dataset.$anonfun$count$1$adapted(Dataset.scala:2835)
	at org.apache.spark.sql.Dataset.$anonfun$withAction$2(Dataset.scala:3369)
	at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:80)
	at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:127)
	at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:75)
	at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3369)
	at org.apache.spark.sql.Dataset.count(Dataset.scala:2835)
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:498)
	at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
	at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
	at py4j.Gateway.invoke(Gateway.java:282)
	at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
	at py4j.commands.CallCommand.execute(CallCommand.java:79)
	at py4j.GatewayConnection.run(GatewayConnection.java:238)
	at java.lang.Thread.run(Thread.java:748)
Caused by: org.apache.spark.api.python.PythonException: Traceback (most recent call last):
  File "/usr/lib/spark/python/lib/pyspark.zip/pyspark/worker.py", line 377, in main
    process()
  File "/usr/lib/spark/python/lib/pyspark.zip/pyspark/worker.py", line 372, in process
    serializer.dump_stream(func(split_index, iterator), outfile)
  File "/usr/lib/spark/python/lib/pyspark.zip/pyspark/serializers.py", line 400, in dump_stream
    vs = list(itertools.islice(iterator, batch))
  File "<ipython-input-65-dc86f751e2fb>", line 4, in decode
  File "/opt/conda/anaconda/lib/python3.7/site-packages/eth_utils/decorators.py", line 18, in _wrapper
    return self.method(obj, *args, **kwargs)
  File "/opt/conda/anaconda/lib/python3.7/site-packages/web3/contract.py", line 466, in decode_function_input
    func = self.get_function_by_selector(selector)
  File "/opt/conda/anaconda/lib/python3.7/site-packages/eth_utils/decorators.py", line 18, in _wrapper
    return self.method(obj, *args, **kwargs)
  File "/opt/conda/anaconda/lib/python3.7/site-packages/web3/contract.py", line 459, in get_function_by_selector
    return get_function_by_identifier(fns, 'selector')
  File "/opt/conda/anaconda/lib/python3.7/site-packages/web3/contract.py", line 1681, in get_function_by_identifier
    'Could not find any function with matching {0}'.format(identifier)
ValueError: Could not find any function with matching selector

	at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.handlePythonException(PythonRunner.scala:457)
	at org.apache.spark.api.python.PythonRunner$$anon$3.read(PythonRunner.scala:592)
	at org.apache.spark.api.python.PythonRunner$$anon$3.read(PythonRunner.scala:575)
	at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext(PythonRunner.scala:410)
	at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37)
	at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:489)
	at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:458)
	at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:458)
	at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:458)
	at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.agg_doAggregateWithoutKey_0$(Unknown Source)
	at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown Source)
	at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
	at org.apache.spark.sql.execution.WholeStageCodegenExec$$anon$2.hasNext(WholeStageCodegenExec.scala:636)
	at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:458)
	at org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:126)
	at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:99)
	at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:55)
	at org.apache.spark.scheduler.Task.run(Task.scala:123)
	at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:414)
	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)
	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:417)
	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
	... 1 more


In [69]:
from graphframes import GraphFrame
from graphframes.examples import Graphs


In [68]:
!pip install graphframes

Collecting graphframes
  Downloading https://files.pythonhosted.org/packages/0b/27/c7c7e1ced2fe9a905f865dd91faaec2ac8a8e313f511678c8ec92a41a153/graphframes-0.6-py2.py3-none-any.whl
Installing collected packages: graphframes
Successfully installed graphframes-0.6


In [77]:
df_from = df2.select('from')
df_to = df2.select('to')
# u = df_from.union(df_to)

In [78]:
df_from.count()

Py4JJavaError: An error occurred while calling o1385.count.
: org.apache.spark.SparkException: Job aborted due to stage failure: Task 158 in stage 64.0 failed 4 times, most recent failure: Lost task 158.3 in stage 64.0 (TID 1107, zhuo-test-0116-w-9.us-central1-b.c.unity-ads-dd-ds-dev-prd.internal, executor 64): org.apache.spark.api.python.PythonException: Traceback (most recent call last):
  File "/usr/lib/spark/python/lib/pyspark.zip/pyspark/worker.py", line 377, in main
    process()
  File "/usr/lib/spark/python/lib/pyspark.zip/pyspark/worker.py", line 372, in process
    serializer.dump_stream(func(split_index, iterator), outfile)
  File "/usr/lib/spark/python/lib/pyspark.zip/pyspark/serializers.py", line 400, in dump_stream
    vs = list(itertools.islice(iterator, batch))
  File "<ipython-input-65-dc86f751e2fb>", line 4, in decode
  File "/opt/conda/anaconda/lib/python3.7/site-packages/eth_utils/decorators.py", line 18, in _wrapper
    return self.method(obj, *args, **kwargs)
  File "/opt/conda/anaconda/lib/python3.7/site-packages/web3/contract.py", line 466, in decode_function_input
    func = self.get_function_by_selector(selector)
  File "/opt/conda/anaconda/lib/python3.7/site-packages/eth_utils/decorators.py", line 18, in _wrapper
    return self.method(obj, *args, **kwargs)
  File "/opt/conda/anaconda/lib/python3.7/site-packages/web3/contract.py", line 459, in get_function_by_selector
    return get_function_by_identifier(fns, 'selector')
  File "/opt/conda/anaconda/lib/python3.7/site-packages/web3/contract.py", line 1681, in get_function_by_identifier
    'Could not find any function with matching {0}'.format(identifier)
ValueError: Could not find any function with matching selector

	at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.handlePythonException(PythonRunner.scala:457)
	at org.apache.spark.api.python.PythonRunner$$anon$3.read(PythonRunner.scala:592)
	at org.apache.spark.api.python.PythonRunner$$anon$3.read(PythonRunner.scala:575)
	at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext(PythonRunner.scala:410)
	at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37)
	at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:489)
	at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:458)
	at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:458)
	at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:458)
	at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.agg_doAggregateWithoutKey_0$(Unknown Source)
	at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown Source)
	at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
	at org.apache.spark.sql.execution.WholeStageCodegenExec$$anon$2.hasNext(WholeStageCodegenExec.scala:636)
	at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:458)
	at org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:126)
	at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:99)
	at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:55)
	at org.apache.spark.scheduler.Task.run(Task.scala:123)
	at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:414)
	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)
	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:417)
	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
	at java.lang.Thread.run(Thread.java:748)

Driver stacktrace:
	at org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:1926)
	at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:1914)
	at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:1913)
	at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
	at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
	at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
	at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1913)
	at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:948)
	at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:948)
	at scala.Option.foreach(Option.scala:407)
	at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:948)
	at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2147)
	at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2096)
	at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2085)
	at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
	at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:759)
	at org.apache.spark.SparkContext.runJob(SparkContext.scala:2076)
	at org.apache.spark.SparkContext.runJob(SparkContext.scala:2097)
	at org.apache.spark.SparkContext.runJob(SparkContext.scala:2116)
	at org.apache.spark.SparkContext.runJob(SparkContext.scala:2141)
	at org.apache.spark.rdd.RDD.$anonfun$collect$1(RDD.scala:990)
	at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
	at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
	at org.apache.spark.rdd.RDD.withScope(RDD.scala:385)
	at org.apache.spark.rdd.RDD.collect(RDD.scala:989)
	at org.apache.spark.sql.execution.SparkPlan.executeCollect(SparkPlan.scala:304)
	at org.apache.spark.sql.Dataset.$anonfun$count$1(Dataset.scala:2836)
	at org.apache.spark.sql.Dataset.$anonfun$count$1$adapted(Dataset.scala:2835)
	at org.apache.spark.sql.Dataset.$anonfun$withAction$2(Dataset.scala:3369)
	at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:80)
	at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:127)
	at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:75)
	at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3369)
	at org.apache.spark.sql.Dataset.count(Dataset.scala:2835)
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:498)
	at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
	at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
	at py4j.Gateway.invoke(Gateway.java:282)
	at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
	at py4j.commands.CallCommand.execute(CallCommand.java:79)
	at py4j.GatewayConnection.run(GatewayConnection.java:238)
	at java.lang.Thread.run(Thread.java:748)
Caused by: org.apache.spark.api.python.PythonException: Traceback (most recent call last):
  File "/usr/lib/spark/python/lib/pyspark.zip/pyspark/worker.py", line 377, in main
    process()
  File "/usr/lib/spark/python/lib/pyspark.zip/pyspark/worker.py", line 372, in process
    serializer.dump_stream(func(split_index, iterator), outfile)
  File "/usr/lib/spark/python/lib/pyspark.zip/pyspark/serializers.py", line 400, in dump_stream
    vs = list(itertools.islice(iterator, batch))
  File "<ipython-input-65-dc86f751e2fb>", line 4, in decode
  File "/opt/conda/anaconda/lib/python3.7/site-packages/eth_utils/decorators.py", line 18, in _wrapper
    return self.method(obj, *args, **kwargs)
  File "/opt/conda/anaconda/lib/python3.7/site-packages/web3/contract.py", line 466, in decode_function_input
    func = self.get_function_by_selector(selector)
  File "/opt/conda/anaconda/lib/python3.7/site-packages/eth_utils/decorators.py", line 18, in _wrapper
    return self.method(obj, *args, **kwargs)
  File "/opt/conda/anaconda/lib/python3.7/site-packages/web3/contract.py", line 459, in get_function_by_selector
    return get_function_by_identifier(fns, 'selector')
  File "/opt/conda/anaconda/lib/python3.7/site-packages/web3/contract.py", line 1681, in get_function_by_identifier
    'Could not find any function with matching {0}'.format(identifier)
ValueError: Could not find any function with matching selector

	at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.handlePythonException(PythonRunner.scala:457)
	at org.apache.spark.api.python.PythonRunner$$anon$3.read(PythonRunner.scala:592)
	at org.apache.spark.api.python.PythonRunner$$anon$3.read(PythonRunner.scala:575)
	at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext(PythonRunner.scala:410)
	at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37)
	at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:489)
	at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:458)
	at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:458)
	at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:458)
	at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.agg_doAggregateWithoutKey_0$(Unknown Source)
	at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown Source)
	at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
	at org.apache.spark.sql.execution.WholeStageCodegenExec$$anon$2.hasNext(WholeStageCodegenExec.scala:636)
	at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:458)
	at org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:126)
	at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:99)
	at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:55)
	at org.apache.spark.scheduler.Task.run(Task.scala:123)
	at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:414)
	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)
	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:417)
	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
	... 1 more


In [None]:
df_to.count()