In [1]:
# import sys
# !{sys.executable} -m pip install --upgrade pip
# !{sys.executable} -m pip install grpcio-tools

## Imports

In [2]:
# To generate PB2 files
import subprocess

import requests
import re
# File description for field inspection
from google.protobuf.descriptor import FieldDescriptor

## Constants

In [3]:
SPKG_DIR='./spkg'

# Regular expression to extract the package and class names
RE_PACKAGE_CLASS = r'^(.*?)\.([A-Z][a-zA-Z0-9_]*)$'

# PB types
TYPE_NAMES = {
    FieldDescriptor.TYPE_DOUBLE: 'double',
    FieldDescriptor.TYPE_FLOAT: 'float',
    FieldDescriptor.TYPE_INT64: 'int64',
    FieldDescriptor.TYPE_UINT64: 'uint64',
    FieldDescriptor.TYPE_INT32: 'int32',
    FieldDescriptor.TYPE_FIXED64: 'fixed64',
    FieldDescriptor.TYPE_FIXED32: 'fixed32',
    FieldDescriptor.TYPE_BOOL: 'bool',
    FieldDescriptor.TYPE_STRING: 'string',
    FieldDescriptor.TYPE_MESSAGE: 'message',
    FieldDescriptor.TYPE_BYTES: 'bytes',
    FieldDescriptor.TYPE_UINT32: 'uint32',
    FieldDescriptor.TYPE_ENUM: 'enum',
    FieldDescriptor.TYPE_SFIXED32: 'sfixed32',
    FieldDescriptor.TYPE_SFIXED64: 'sfixed64',
    FieldDescriptor.TYPE_SINT32: 'sint32',
    FieldDescriptor.TYPE_SINT64: 'sint64',
}

# PB labels
LABEL_NAMES = {
    FieldDescriptor.LABEL_OPTIONAL: 'optional',
    FieldDescriptor.LABEL_REQUIRED: 'required',
    FieldDescriptor.LABEL_REPEATED: 'repeated',
}

## Generate PB2 files

In [4]:
def generate_pb2_files(commands:str, spkg_path:str, out_path:str='.') -> None:
    command = f"""
    alias protogen_py="python3 -m grpc_tools.protoc --descriptor_set_in={spkg_path} --python_out={out_path} --grpc_python_out={out_path}";
    {commands}
    unalias protogen_py;
    """
    try:
        result = subprocess.run(command, shell=True, capture_output=True, text=True)
    except subprocess.CalledProcessError as e:
        print(f'CalledProcessError: Command {e.cmd} failed with exit code {e.returncode}')
        print(f'Stderr: {e.stderr}')

## Generate SF pb2 files

In [5]:
commands = """
    protogen_py sf/substreams/v1/package.proto;
    protogen_py sf/substreams/v1/modules.proto;
    protogen_py sf/substreams/v1/clock.proto;
"""
generate_pb2_files(commands, spkg_path=f'{SPKG_DIR}/ethereum-explorer-v0.1.2.spkg')

## Import PB2 packages

In [6]:
from sf.substreams.v1.package_pb2 import Package
# Only used for typing
from sf.substreams.v1.modules_pb2 import Module

## Read a Package from a filepath

In [7]:
def read_spkg_file(file_path:str) -> Package:
    package = Package()
    with open(file_path, 'rb') as file:
        package.ParseFromString(file.read())
    return package

## Read a package remotely

In [8]:
def read_spkg_from_url(url:str) -> Package:
    response = requests.get(url)
    # Raise an error for bad status codes
    response.raise_for_status()
    package = Package()
    package.ParseFromString(response.content)
    return package

## Read a SPKG Package

In [9]:
def read_spkg(location:str) -> Package:
    if location.startswith('http'):
        package = read_spkg_from_url(location)
    else:
        package = read_spkg_file(location)
    return package

## Test reading SPKG Packages

In [10]:
# file approach
package = read_spkg(location=f'{SPKG_DIR}/ethereum-explorer-v0.1.2.spkg')
assert package.__class__.__name__ == 'Package', 'Incorrect package'
assert package.network == 'mainnet', 'Expected mainnet'

# url approach
package_url = read_spkg(location='https://spkg.io/v1/packages/ethereum_explorer/v0.1.2')

# Both package and package_url are same
assert package_url.__class__.__name__ == package.__class__.__name__, 'Incorrect package'
assert package_url.network == package.network, 'Expected mainnet'

## Read PB files in a package
### Reference: [substream](https://github.com/messari/substreams-python/blob/master/substreams/substream.py)

In [11]:
def get_proto_file_map(pkg:Package) -> dict[str, str]:
   name_map = {}
   for pf in pkg.proto_files:
       for mt in pf.message_type:
           name_map[mt.name] = pf.name
   return name_map

## Test Proto file maps

In [12]:
proto_map = get_proto_file_map(pkg=package)
assert proto_map['Package'] == 'sf/substreams/v1/package.proto', 'Expected Package proto is missing'
assert proto_map['Modules'] == 'sf/substreams/v1/modules.proto', 'Expected Modules proto is missing'
assert proto_map['Clock'] == 'sf/substreams/v1/clock.proto', 'Expected Clock proto is missing'

## Returns Custom proto maps
#### Filter out sf and google proto maps

In [13]:
def get_custom_proto_file_map(proto_map:dict[str, str]) -> dict[str, str]:
    return {x:y for x,y in proto_map.items() if not y.startswith('sf/') and not y.startswith('google/')}

In [14]:
custom_proto_map = get_custom_proto_file_map(proto_map=proto_map)
assert custom_proto_map['BlockMeta'] == 'block_meta.proto', 'BlockMeta proto is missing'
assert custom_proto_map['Event'] == 'event.proto', 'Event proto is missing'

In [15]:
def get_module_names(pkg:Package) -> list[str]:
    return [module.name for module in pkg.modules.modules]

In [16]:
get_module_names(pkg=package)

['map_block_meta',
 'map_block_full',
 'map_filter_transactions',
 'map_contract_events']

## Returns Module details for given module name

In [17]:
def get_module_details(pkg:Package, name:str) -> Module:
    assert name in get_module_names(pkg=package), f"{name} doesn't exist as as a module"
    return [module for module in pkg.modules.modules if module.name == name][0]

In [18]:
get_module_details(pkg=package, name='map_block_meta')

name: "map_block_meta"
kind_map {
  output_type: "proto:eth.block_meta.v1.BlockMeta"
}
binary_entrypoint: "map_block_meta"
inputs {
  source {
    type: "sf.ethereum.type.v2.Block"
  }
}
output {
  type: "proto:eth.block_meta.v1.BlockMeta"
}

## Check the output class for 'map_block_meta'

In [19]:
def get_output_proto_for_module(pkg:Package, module_name:str) -> str:
    output_type = get_module_details(pkg=package, name=module_name).output.type
    # Extract fully qualified class name
    _, fqcn = output_type.split(':')
    
    match = re.search(RE_PACKAGE_CLASS, fqcn)
    assert match is not None, 'No match found'
    class_name = match.group(2)
    return proto_map[class_name]

In [20]:
# The associated PB file for our module
get_output_proto_for_module(pkg=package, module_name='map_block_meta')

'block_meta.proto'

In [21]:
def create_dir(dir_name:str) -> None:
    try:
        # Execute the 'mkdir' command, -p ensures no error thrown if the path exists
        result = subprocess.run(['mkdir', '-p', dir_name], check=True, capture_output=True, text=True)
    except subprocess.CalledProcessError as e:
        print(f'CalledProcessError: Command {e.cmd} failed with exit code {e.returncode}')
        print(f'Stderr: {e.stderr}')

## Generate PB2 files for module 'map_block_meta'

In [22]:
# Get the associated PB file
proto_file = get_output_proto_for_module(pkg=package, module_name='map_block_meta')
print(f'PB file: {proto_file}')

# Since there are no paths, we will compile them to a location
out_path='./sf/substreams/explorers/ethereum'
create_dir(dir_name=out_path)

# Generate PB classes from proto file
command = f'protogen_py {proto_file}'
generate_pb2_files(commands=command, spkg_path=f'{SPKG_DIR}/ethereum-explorer-v0.1.2.spkg', out_path=out_path)

PB file: block_meta.proto


## Returns human readable type and label string

In [23]:
def get_human_readable_field_type(field_descriptor:FieldDescriptor) -> (str, str):
    type_str = TYPE_NAMES.get(field_descriptor.type, f'UNKNOWN_TYPE_{field_descriptor.type}')
    label_str = LABEL_NAMES.get(field_descriptor.label, f'UNKNOWN_LABEL_{field_descriptor.label}')

    if field_descriptor.type == FieldDescriptor.TYPE_MESSAGE or \
       field_descriptor.type == FieldDescriptor.TYPE_ENUM:
        # For messages and enums, the type name is the full name of the message/enum type
        type_str = field_descriptor.message_type.full_name if field_descriptor.message_type else \
                   field_descriptor.enum_type.full_name if field_descriptor.enum_type else type_str
    return (label_str, type_str)

## Look into PB output class

In [24]:
def display_proto_details(field_descriptor:FieldDescriptor) -> None:
    print(f'Fields for: {field_descriptor.full_name}')
    for field in field_descriptor.fields:
        field_name = field.name
        field_type = field.type
    
        label_name, type_name = get_human_readable_field_type(field)
        print(f'{label_name} {type_name} {field_name} = {field.number}')

## Display details of PB class

In [25]:
# Import Generated BlockMeta class
from sf.substreams.explorers.ethereum.block_meta_pb2 import BlockMeta
block_meta = BlockMeta()
# Display proto details using the descriptor
display_proto_details(field_descriptor=block_meta.DESCRIPTOR)

Fields for: eth.block_meta.v1.BlockMeta
optional uint64 number = 1
optional string hash = 2
optional string parent_hash = 3


## Another Module

In [26]:
#1. The associated PB file for our module
proto_file = get_output_proto_for_module(pkg=package, module_name='map_block_full')
print(f'PB file: {proto_file}')

#2. Generate classes using proto file
command = f'protogen_py {proto_file}'
generate_pb2_files(commands=command, spkg_path=f'{SPKG_DIR}/ethereum-explorer-v0.1.2.spkg')

#3. Import the generated class
from sf.ethereum.type.v2.type_pb2 import Block

#4. Display proto details
block = Block()
# Display proto details using the descriptor
display_proto_details(field_descriptor=block.DESCRIPTOR)

PB file: sf/ethereum/type/v2/type.proto
Fields for: sf.ethereum.type.v2.Block
optional bytes hash = 2
optional uint64 number = 3
optional uint64 size = 4
optional sf.ethereum.type.v2.BlockHeader header = 5
repeated sf.ethereum.type.v2.BlockHeader uncles = 6
repeated sf.ethereum.type.v2.TransactionTrace transaction_traces = 10
repeated sf.ethereum.type.v2.BalanceChange balance_changes = 11
repeated sf.ethereum.type.v2.CodeChange code_changes = 20
optional int32 ver = 1
