# Ethereum Request for Comments (ERCs) AI Agent

In [25]:
import time
from datetime import date, datetime
import pandas as pd
from typing import List, Any
from pydantic_ai import Agent
from pydantic import BaseModel
from pydantic_ai.messages import ModelMessagesTypeAdapter

import random
import json
import secrets
from pathlib import Path
from datetime import datetime

import io
import zipfile
import requests
import frontmatter
import re
import textwrap
import numpy as np

import google.generativeai as genai
import os

from tqdm.auto import tqdm

from minsearch import Index, VectorSearch

from sentence_transformers import SentenceTransformer

## 1. Ingest and Index Documents

In [1]:
import io
import logging
import zipfile

import frontmatter
import requests

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

In [2]:
def read_repo_data(repo_owner, repo_name):
    """
    Download and parse all markdown files from a GitHub repository.

    Args:
        repo_owner: GitHub username or organization
        repo_name: Repository name

    Returns:
        List of dictionaries containing file content and metadata
    """
    prefix = "https://github.com"
    url = f"{prefix}/{repo_owner}/{repo_name}/archive/refs/heads/master.zip"
    resp = requests.get(url)

    if resp.status_code != 200:
        raise Exception(f"Failed to download repository: {resp.status_code}")

    repository_data = []
    zf = zipfile.ZipFile(io.BytesIO(resp.content))

    for file_info in zf.infolist():
        filename = file_info.filename
        filename_lower = filename.lower()

        if not ((filename_lower.endswith(".md")) and (filename_lower.startswith("ercs-master/ercs"))):
            continue

        try:
            with zf.open(file_info) as f_in:
                content = f_in.read().decode("utf-8", errors="ignore")
                post = frontmatter.loads(content)
                data = post.to_dict()
                data["filename"] = filename
                repository_data.append(data)
        except Exception as e:
            logger.error(f"Error processing {filename}: {e}")
            continue

    zf.close()

    return repository_data

In [3]:
erc_data = read_repo_data("ethereum", "ERCs")

In [4]:
len(erc_data)

540

In [36]:
erc_data[0]

{'eip': 1,
 'title': 'EIP Purpose and Guidelines',
 'status': 'Living',
 'type': 'Meta',
 'author': 'Martin Becze <mb@ethereum.org>, Hudson Jameson <hudson@ethereum.org>, et al.',
 'created': datetime.date(2015, 10, 27),
 'filename': 'ERCs-master/ERCS/eip-1.md'}

In [69]:
erc_data[30]

{'eip': 1387,
 'title': 'Merkle Tree Attestations with Privacy enabled',
 'author': 'Weiwu Zhang <a@colourful.land>, James Sangalli <j.l.sangalli@gmail.com>',
 'discussions-to': 'https://github.com/ethereum/EIPs/issues/1387',
 'status': 'Stagnant',
 'type': 'Standards Track',
 'category': 'ERC',
 'created': datetime.date(2018, 9, 8),
 'content': '### Introduction\n\nIt\'s often needed that an Ethereum smart contract must verify a claim (I live in Australia) attested by a valid attester.\n\nFor example, an ICO contract might require that the participant, Alice, lives in Australia before she participates. Alice\'s claim of residency could come from a local Justice of the Peace who could attest that "Alice is a resident of Australia in NSW".\n\nUnlike previous attempts, we assume that the attestation is signed and issued off the blockchain in a Merkle Tree format. Only a part of the Merkle tree is revealed by Alice at each use. Therefore we avoid the privacy problem often associated with 

In [70]:
for record in erc_data:
    print(record['filename'])

ERCs-master/ERCS/eip-1.md
ERCs-master/ERCS/erc-1046.md
ERCs-master/ERCS/erc-1056.md
ERCs-master/ERCS/erc-1062.md
ERCs-master/ERCS/erc-1066.md
ERCs-master/ERCS/erc-1077.md
ERCs-master/ERCS/erc-1078.md
ERCs-master/ERCS/erc-1080.md
ERCs-master/ERCS/erc-1081.md
ERCs-master/ERCS/erc-1123.md
ERCs-master/ERCS/erc-1129.md
ERCs-master/ERCS/erc-1132.md
ERCs-master/ERCS/erc-1154.md
ERCs-master/ERCS/erc-1155.md
ERCs-master/ERCS/erc-1167.md
ERCs-master/ERCS/erc-1175.md
ERCs-master/ERCS/erc-1178.md
ERCs-master/ERCS/erc-1185.md
ERCs-master/ERCS/erc-1191.md
ERCs-master/ERCS/erc-1202.md
ERCs-master/ERCS/erc-1203.md
ERCs-master/ERCS/erc-1207.md
ERCs-master/ERCS/erc-1261.md
ERCs-master/ERCS/erc-1271.md
ERCs-master/ERCS/erc-1319.md
ERCs-master/ERCS/erc-1328.md
ERCs-master/ERCS/erc-1337.md
ERCs-master/ERCS/erc-1363.md
ERCs-master/ERCS/erc-137.md
ERCs-master/ERCS/erc-1386.md
ERCs-master/ERCS/erc-1387.md
ERCs-master/ERCS/erc-1388.md
ERCs-master/ERCS/erc-1417.md
ERCs-master/ERCS/erc-1438.md
ERCs-master/ERCS/e

## 2. Chunking and Intelligent Processing for Data

### 2.1 Splitting by Paragraphs

In [113]:
import re
import textwrap

text = erc_data[45]['content']
paragraphs = re.split(r"\n\s*\n", text.strip())

In [114]:
len(paragraphs)

60

In [115]:
paragraphs[0]

'## Simple Summary\nMake smart contracts (e.g. dapps) accessible to non-ether users by allowing contracts to accept "[collect-calls](https://en.wikipedia.org/wiki/Collect_call)", paying for incoming calls. \nLet contracts "listen" on publicly accessible channels (e.g. web URL or a whisper address). \nIncentivize nodes to run "gas stations" to facilitate this. \nRequire no network changes, and minimal contract changes.'

In [116]:
print(textwrap.fill(paragraphs[0], width=100))

## Simple Summary Make smart contracts (e.g. dapps) accessible to non-ether users by allowing
contracts to accept "[collect-calls](https://en.wikipedia.org/wiki/Collect_call)", paying for
incoming calls.  Let contracts "listen" on publicly accessible channels (e.g. web URL or a whisper
address).  Incentivize nodes to run "gas stations" to facilitate this.  Require no network changes,
and minimal contract changes.


In [117]:
print(textwrap.fill(paragraphs[1], width=100))

## Abstract Communicating with dapps currently requires paying ETH for gas, which limits dapp
adoption to ether users.  Therefore, contract owners may wish to pay for the gas to increase user
acquisition, or let their users pay for gas with fiat money.  Alternatively, a 3rd party may wish to
subsidize the gas costs of certain contracts.  Solutions such as described in
[EIP-1077](./eip-1077.md) could allow transactions from addresses that hold no ETH.


In [120]:
print(textwrap.fill(paragraphs[58], width=100))

A working implementation of the [**gas stations network**](https://github.com/tabookey-dev/tabookey-
gasless) is being developed by **TabooKey**. It consists of `RelayHub`, `RelayRecipient`, `web3
hooks`, an implementation of a gas station inside `geth`, and sample dapps using the gas stations
network.


In [121]:
print(textwrap.fill(paragraphs[59], width=100))

## Copyright Copyright and related rights waived via [CC0](../LICENSE.md).


The spliting by paragraphs doens't make sense because the text subject is lost.

### 2.2 Sliding Window Chunking

In [4]:
def sliding_window(seq, size, step):
    if size <= 0 or step <= 0:
        raise ValueError("size and step must be positive")

    n = len(seq)
    result = []
    for i in range(0, n, step):
        chunk = seq[i:i+size]
        result.append({'start': i, 'chunk': chunk})
        if i + size >= n:
            break

    return result

In [5]:
erc_data_chunks = []

for doc in erc_data:
    doc_copy = doc.copy()
    doc_content = doc_copy.pop('content')
    chunks = sliding_window(doc_content, 2000, 1000)
    for chunk in chunks:
        chunk.update(doc_copy)
    erc_data_chunks.extend(chunks)

In [6]:
len(erc_data_chunks)

7022

In [92]:
erc_data_chunks[0]

{'start': 0,
 'chunk': '## What is an EIP?\n\nEIP stands for Ethereum Improvement Proposal. An EIP is a design document providing information to the Ethereum community, or describing a new feature for Ethereum or its processes or environment. The EIP should provide a concise technical specification of the feature and a rationale for the feature. The EIP author is responsible for building consensus within the community and documenting dissenting opinions.\n\n## EIP Rationale\n\nWe intend EIPs to be the primary mechanisms for proposing new features, for collecting community technical input on an issue, and for documenting the design decisions that have gone into Ethereum. Because the EIPs are maintained as text files in a versioned repository, their revision history is the historical record of the feature proposal.\n\nFor Ethereum implementers, EIPs are a convenient way to track the progress of their implementation. Ideally each implementation maintainer would list the EIPs that they hav

In [93]:
len(erc_data_chunks[0]["chunk"])

2000

In [96]:
print(erc_data_chunks[0]["chunk"], sep="\n")

## What is an EIP?

EIP stands for Ethereum Improvement Proposal. An EIP is a design document providing information to the Ethereum community, or describing a new feature for Ethereum or its processes or environment. The EIP should provide a concise technical specification of the feature and a rationale for the feature. The EIP author is responsible for building consensus within the community and documenting dissenting opinions.

## EIP Rationale

We intend EIPs to be the primary mechanisms for proposing new features, for collecting community technical input on an issue, and for documenting the design decisions that have gone into Ethereum. Because the EIPs are maintained as text files in a versioned repository, their revision history is the historical record of the feature proposal.

For Ethereum implementers, EIPs are a convenient way to track the progress of their implementation. Ideally each implementation maintainer would list the EIPs that they have implemented. This will give en

In [81]:
print(erc_data_chunks[1]["chunk"], sep="\n")

d users a convenient way to know the current status of a given implementation or library.

## EIP Types

There are three types of EIP:

- A **Standards Track EIP** describes any change that affects most or all Ethereum implementations, such as—a change to the network protocol, a change in block or transaction validity rules, proposed application standards/conventions, or any change or addition that affects the interoperability of applications using Ethereum. Standards Track EIPs consist of three parts—a design document, an implementation, and (if warranted) an update to the [formal specification](https://github.com/ethereum/yellowpaper). Furthermore, Standards Track EIPs can be broken down into the following categories:
  - **Core**: improvements requiring a consensus fork (e.g. [EIP-5](./eip-5.md), [EIP-101](./eip-101.md)), as well as changes that are not necessarily consensus critical but may be relevant to [“core dev” discussions](https://github.com/ethereum/pm) (for example, [EIP-9

In [82]:
print(erc_data_chunks[2]["chunk"], sep="\n")

0], and the miner/node strategy changes 2, 3, and 4 of [EIP-86](./eip-86.md)).
  - **Networking**: includes improvements around [devp2p](https://github.com/ethereum/devp2p/blob/readme-spec-links/rlpx.md) ([EIP-8](./eip-8.md)) and [Light Ethereum Subprotocol](https://ethereum.org/en/developers/docs/nodes-and-clients/#light-node), as well as proposed improvements to network protocol specifications of [whisper](https://github.com/ethereum/go-ethereum/issues/16013#issuecomment-364639309) and [swarm](https://github.com/ethereum/go-ethereum/pull/2959).
  - **Interface**: includes improvements around language-level standards like method names ([EIP-6](./eip-6.md)) and [contract ABIs](https://docs.soliditylang.org/en/develop/abi-spec.html).
  - **ERC**: application-level standards and conventions, including contract standards such as token standards ([ERC-20](./eip-20.md)), name registries ([ERC-137](./eip-137.md)), URI schemes, library/package formats, and wallet formats.

- A **Meta EIP** de

It's clear that the chunking strategy didn't work well for this dataset. The chunks often start or end in the middle of sentences, making them less coherent. A better approach would be to split the text at natural boundaries, such as paragraphs or sentences, rather than using fixed character counts. This would help maintain the context and meaning of the content within each chunk.

### 2.3 Splitting by Sections

In [97]:
import re

def split_markdown_by_level(text, level=2):
    """
    Split markdown text by a specific header level.
    
    :param text: Markdown text as a string
    :param level: Header level to split on
    :return: List of sections as strings
    """
    # This regex matches markdown headers
    # For level 2, it matches lines starting with "## "
    header_pattern = r'^(#{' + str(level) + r'} )(.+)$'
    pattern = re.compile(header_pattern, re.MULTILINE)

    # Split and keep the headers
    parts = pattern.split(text)
    
    sections = []
    for i in range(1, len(parts), 3):
        # We step by 3 because regex.split() with
        # capturing groups returns:
        # [before_match, group1, group2, after_match, ...]
        # here group1 is "## ", group2 is the header text
        header = parts[i] + parts[i+1]  # "## " + "Title"
        header = header.strip()

        # Get the content after this header
        content = ""
        if i+2 < len(parts):
            content = parts[i+2].strip()

        if content:
            section = f'{header}\n\n{content}'
        else:
            section = header
        sections.append(section)
    
    return sections

In [99]:
erc_data_chunks_sections = []

for doc in erc_data:
    doc_copy = doc.copy()
    doc_content = doc_copy.pop('content')
    sections = split_markdown_by_level(doc_content, level=2)
    for section in sections:
        section_doc = doc_copy.copy()
        section_doc['section'] = section
        erc_data_chunks_sections.append(section_doc)

In [100]:
len(erc_data_chunks_sections)

4141

In [101]:
erc_data_chunks_sections[0]

{'eip': 1,
 'title': 'EIP Purpose and Guidelines',
 'status': 'Living',
 'type': 'Meta',
 'author': 'Martin Becze <mb@ethereum.org>, Hudson Jameson <hudson@ethereum.org>, et al.',
 'created': datetime.date(2015, 10, 27),
 'filename': 'ERCs-master/ERCS/eip-1.md',
 'section': '## What is an EIP?\n\nEIP stands for Ethereum Improvement Proposal. An EIP is a design document providing information to the Ethereum community, or describing a new feature for Ethereum or its processes or environment. The EIP should provide a concise technical specification of the feature and a rationale for the feature. The EIP author is responsible for building consensus within the community and documenting dissenting opinions.'}

In [103]:
import textwrap

In [104]:
print(textwrap.fill(erc_data_chunks_sections[0]["section"], width=100))

## What is an EIP?  EIP stands for Ethereum Improvement Proposal. An EIP is a design document
providing information to the Ethereum community, or describing a new feature for Ethereum or its
processes or environment. The EIP should provide a concise technical specification of the feature
and a rationale for the feature. The EIP author is responsible for building consensus within the
community and documenting dissenting opinions.


In [106]:
print(textwrap.fill(erc_data_chunks_sections[1]["section"], width=100))

## EIP Rationale  We intend EIPs to be the primary mechanisms for proposing new features, for
collecting community technical input on an issue, and for documenting the design decisions that have
gone into Ethereum. Because the EIPs are maintained as text files in a versioned repository, their
revision history is the historical record of the feature proposal.  For Ethereum implementers, EIPs
are a convenient way to track the progress of their implementation. Ideally each implementation
maintainer would list the EIPs that they have implemented. This will give end users a convenient way
to know the current status of a given implementation or library.


In [107]:
print(textwrap.fill(erc_data_chunks_sections[3]["section"], width=100))

## EIP Work Flow  ### Shepherding an EIP  Parties involved in the process are you, the champion or
*EIP author*, the [*EIP editors*](#eip-editors), and the [*Ethereum Core
Developers*](https://github.com/ethereum/pm).  Before you begin writing a formal EIP, you should vet
your idea. Ask the Ethereum community first if an idea is original to avoid wasting time on
something that will be rejected based on prior research. It is thus recommended to open a discussion
thread on [the Ethereum Magicians forum](https://ethereum-magicians.org/) to do this.  Once the idea
has been vetted, your next responsibility will be to present (by means of an EIP) the idea to the
reviewers and all interested parties, invite editors, developers, and the community to give feedback
on the aforementioned channels. You should try and gauge whether the interest in your EIP is
commensurate with both the work involved in implementing it and how many parties will have to
conform to it. For example, the work required f

In [108]:
print(erc_data_chunks_sections[3]["section"], sep="\n")

## EIP Work Flow

### Shepherding an EIP

Parties involved in the process are you, the champion or *EIP author*, the [*EIP editors*](#eip-editors), and the [*Ethereum Core Developers*](https://github.com/ethereum/pm).

Before you begin writing a formal EIP, you should vet your idea. Ask the Ethereum community first if an idea is original to avoid wasting time on something that will be rejected based on prior research. It is thus recommended to open a discussion thread on [the Ethereum Magicians forum](https://ethereum-magicians.org/) to do this.

Once the idea has been vetted, your next responsibility will be to present (by means of an EIP) the idea to the reviewers and all interested parties, invite editors, developers, and the community to give feedback on the aforementioned channels. You should try and gauge whether the interest in your EIP is commensurate with both the work involved in implementing it and how many parties will have to conform to it. For example, the work required f

In [122]:
len(erc_data_chunks_sections[3]["section"])

5057

I think it worked well, but I need to verify more cases. The last section seens to be big.

### 2.4 Intelligent Chunking with LLM

In [7]:
import google.generativeai as genai
import os

from dotenv import load_dotenv

load_dotenv()

try:
    api_key = os.getenv("GOOGLE_API_KEY")
    if not api_key:
        raise ValueError("The environment variable GOOGLE_API_KEY was not found.")
    
    genai.configure(api_key=api_key)
    print("✅ Google AI configured successfully!")

except ValueError as e:
    logger.error(e)

✅ Google AI configured successfully!


In [8]:
def llm(prompt, model='gemini-2.5-flash'):
    """
    Sends a prompt to the Gemini model and returns the response as text.

    Args:
        prompt (str): The text to send to the model.
        model (str): The Gemini model name to use.
                     'gemini-1.5-flash' is a great fast and capable option.

    Returns:
        str: The generated text response from the model.
    """
    model_instance = genai.GenerativeModel(model)

    response = model_instance.generate_content(prompt)
    
    return response.text

In [9]:
prompt_template = """
    Split the provided document into logical sections that make sense for a Q&A system.
    
    Each section should be self-contained and cover a specific topic or concept.
    
    <DOCUMENT>
    {document}
    </DOCUMENT>
    
    Use this format:
    
    ## Section Name
    
    Section content with all relevant details
    
    ---
    
    ## Another Section Name
    
    Another section content
    
    ---
""".strip()


In [None]:
def intelligent_chunking(text):
    prompt = prompt_template.format(document=text)
    response = llm(prompt)
    sections = response.split('---')
    sections = [s.strip() for s in sections if s.strip()]

    return sections

In [21]:
from tqdm.auto import tqdm

erc_data_ai_chunks = []

for doc in tqdm(erc_data[:10]):  # Limiting to first 10 documents for cost and time control
    doc_copy = doc.copy()
    doc_content = doc_copy.pop('content')

    sections = intelligent_chunking(doc_content)
    for section in sections:
        count += 1
        section_doc = doc_copy.copy()
        section_doc['section'] = section
        erc_data_ai_chunks.append(section_doc)

  0%|          | 0/10 [00:00<?, ?it/s]

In [22]:
len(erc_data_ai_chunks)

256

In [23]:
erc_data_ai_chunks[0]

{'eip': 1,
 'title': 'EIP Purpose and Guidelines',
 'status': 'Living',
 'type': 'Meta',
 'author': 'Martin Becze <mb@ethereum.org>, Hudson Jameson <hudson@ethereum.org>, et al.',
 'created': datetime.date(2015, 10, 27),
 'filename': 'ERCs-master/ERCS/eip-1.md',
 'section': '## What is an EIP?\n\nAn EIP, or Ethereum Improvement Proposal, is a design document that provides information to the Ethereum community or describes a new feature for Ethereum, its processes, or environment. It should contain a concise technical specification of the feature and its rationale. The EIP author is responsible for building community consensus and documenting dissenting opinions.'}

In [24]:
erc_data_ai_chunks[1]

{'eip': 1,
 'title': 'EIP Purpose and Guidelines',
 'status': 'Living',
 'type': 'Meta',
 'author': 'Martin Becze <mb@ethereum.org>, Hudson Jameson <hudson@ethereum.org>, et al.',
 'created': datetime.date(2015, 10, 27),
 'filename': 'ERCs-master/ERCS/eip-1.md',
 'section': "## EIP Rationale and Purpose\n\nEIPs are intended to be the primary mechanism for proposing new features, gathering technical input from the community, and documenting Ethereum's design decisions. Since EIPs are maintained as text files in a versioned repository, their revision history serves as a historical record of feature proposals. For Ethereum implementers, EIPs offer a convenient way to track implementation progress, allowing them to list implemented EIPs for end-users to check the status of a given implementation or library."}

In [25]:
erc_data_ai_chunks[2]

{'eip': 1,
 'title': 'EIP Purpose and Guidelines',
 'status': 'Living',
 'type': 'Meta',
 'author': 'Martin Becze <mb@ethereum.org>, Hudson Jameson <hudson@ethereum.org>, et al.',
 'created': datetime.date(2015, 10, 27),
 'filename': 'ERCs-master/ERCS/eip-1.md',
 'section': '## EIP Types\n\nThere are three main types of EIPs, each serving a distinct purpose:\n\n*   **Standards Track EIP**: Describes changes affecting most or all Ethereum implementations. This includes network protocol changes, block/transaction validity rules, application standards, or any change impacting interoperability. Standards Track EIPs comprise a design document, an implementation, and sometimes an update to the formal specification. They are further categorized into:\n    *   **Core**: Improvements requiring a consensus fork or relevant to "core dev" discussions (e.g., EIP-5, EIP-101). If a Core EIP proposes changes to the EVM, it must refer to instructions by their mnemonics and define their opcodes (e.g., `

In [26]:
erc_data_ai_chunks[3]

{'eip': 1,
 'title': 'EIP Purpose and Guidelines',
 'status': 'Living',
 'type': 'Meta',
 'author': 'Martin Becze <mb@ethereum.org>, Hudson Jameson <hudson@ethereum.org>, et al.',
 'created': datetime.date(2015, 10, 27),
 'filename': 'ERCs-master/ERCS/eip-1.md',
 'section': "## EIP Work Flow: Shepherding an EIP\n\nThe EIP work flow involves the EIP author (champion), EIP editors, and Ethereum Core Developers.\n\n1.  **Vetting the Idea**: Before writing a formal EIP, authors should vet their idea by opening a discussion thread on the Ethereum Magicians forum to check for originality and avoid redundant work.\n2.  **Presenting the Idea and Gathering Feedback**: Once vetted, the author presents the idea via an EIP to reviewers and interested parties, inviting feedback from editors, developers, and the community. Authors should gauge if community interest justifies the implementation work and the number of parties required to conform. Negative community feedback can prevent an EIP from adv

In [27]:
print(erc_data_ai_chunks[3]["section"], sep="\n")

## EIP Work Flow: Shepherding an EIP

The EIP work flow involves the EIP author (champion), EIP editors, and Ethereum Core Developers.

1.  **Vetting the Idea**: Before writing a formal EIP, authors should vet their idea by opening a discussion thread on the Ethereum Magicians forum to check for originality and avoid redundant work.
2.  **Presenting the Idea and Gathering Feedback**: Once vetted, the author presents the idea via an EIP to reviewers and interested parties, inviting feedback from editors, developers, and the community. Authors should gauge if community interest justifies the implementation work and the number of parties required to conform. Negative community feedback can prevent an EIP from advancing past the Draft stage.
3.  **Community Consensus**: The champion's role is to write the EIP in the prescribed format, lead discussions in appropriate forums, and build community consensus around the idea.


In [28]:
len(erc_data_ai_chunks[3]["section"])

929

The result using AI seems to have been smaller than the previous method. Did we lose a lot of information?

## 3. Add Search

### 3.1 Lexical Indexing with MinSearch

In [6]:
from minsearch import Index

index = Index(
    text_fields=["chunk", "title", "description", "author", "status", "type", "filename"],
    keyword_fields=[]
)

index.fit(erc_data_chunks)

<minsearch.minsearch.Index at 0x14d6b0d70>

In [31]:
query = "What is ERC-4337?"
text_results = index.search(query)

In [33]:
text_results[0]

{'start': 2000,
 'chunk': "This MUST be true if this is ERC-1155 Token Metadata, otherwise, this MUST be omitted.\n     * Setting this to true indicates to wallets that the address should be treated as an ERC-1155 token.\n     **/\n    erc1155?: boolean | undefined;\n}\n```\n\n### ERC-20 Extension\n\n#### ERC-20 Interface Extension\n\nCompliant contracts MUST implement the following Solidity interface:\n\n```solidity\npragma solidity ^0.8.0;\n\n/// @title  ERC-20 Metadata Extension\ninterface ERC20TokenMetadata /* is ERC20 */ {\n    /// @notice     Gets an ERC-721-like token URI\n    /// @dev        The resolved data MUST be in JSON format and support ERC-1046's ERC-20 Token Metadata Schema\n    function tokenURI() external view returns (string);\n}\n```\n\n#### ERC-20 Token Metadata Schema\n\nThe resolved JSON of the `tokenURI` described in the ERC-20 Interface Extension section MUST conform to the following TypeScript interface:\n\n```typescript\n/**\n * Asset Metadata\n */\ninterfac

### 3.2 Vector Search

In [14]:
from sentence_transformers import SentenceTransformer

embedding_model = SentenceTransformer('multi-qa-distilbert-cos-v1')

INFO:sentence_transformers.SentenceTransformer:Use pytorch device_name: mps
INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: multi-qa-distilbert-cos-v1


In [None]:
import numpy as np

from minsearch import VectorSearch
from tqdm.auto import tqdm

erc_data_embeddings = []


erc_data_chunks = erc_data_chunks[0:100]  # Limiting to first 100 chunks for cost and time control

for d in tqdm(erc_data_chunks):
    v = embedding_model.encode(d["chunk"])
    erc_data_embeddings.append(v)

erc_data_embeddings = np.array(erc_data_embeddings)

erc_data_vindex = VectorSearch()
erc_data_vindex.fit(erc_data_embeddings, erc_data_chunks)

  0%|          | 0/100 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

<minsearch.vector.VectorSearch at 0x17288eba0>

In [28]:
query = "What is the main purpose of the ERC-20?"
q = embedding_model.encode(query)
vector_results = erc_data_vindex.search(q)

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

In [29]:
vector_results

[{'start': 7000,
  'chunk': "` field in the metadata.\n\n## Rationale\n\nThis ERC makes adding metadata to ERC-20 tokens more straightforward for developers, with minimal to no disruption to the overall ecosystem. Using the same parameter name makes it easier to reuse code.\n\nAdditionally, the recommendations not to use ERC-20's `name`, `symbol`, and `decimals` functions save gas.\n\nBuilt-in interoperability is useful as otherwise it might not be easy to differentiate the type of the token. Interoperability could be done using [ERC-165](./eip-165.md), but static calls are time-inefficient for wallets and websites, and is generally inflexible. Instead, including interoperability data in the token URI increases flexibility while also giving a performance increase.\n\n## Backwards Compatibility\n\nThis EIP is fully backwards compatible as its implementation simply extends the functionality of ERC-20 tokens and is optional. Additionally, it makes backward compatible recommendations for E

### 3.3 Hybrid Search

In [34]:
query = "What is the main purpose of the ERC-20?"

text_results = index.search(query, num_results=5)

q = embedding_model.encode(query)
vector_results = erc_data_vindex.search(q, num_results=5)

final_results = text_results + vector_results

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

In [35]:
len(final_results)

10

In [36]:
final_results

[{'start': 2000,
  'chunk': "This MUST be true if this is ERC-1155 Token Metadata, otherwise, this MUST be omitted.\n     * Setting this to true indicates to wallets that the address should be treated as an ERC-1155 token.\n     **/\n    erc1155?: boolean | undefined;\n}\n```\n\n### ERC-20 Extension\n\n#### ERC-20 Interface Extension\n\nCompliant contracts MUST implement the following Solidity interface:\n\n```solidity\npragma solidity ^0.8.0;\n\n/// @title  ERC-20 Metadata Extension\ninterface ERC20TokenMetadata /* is ERC20 */ {\n    /// @notice     Gets an ERC-721-like token URI\n    /// @dev        The resolved data MUST be in JSON format and support ERC-1046's ERC-20 Token Metadata Schema\n    function tokenURI() external view returns (string);\n}\n```\n\n#### ERC-20 Token Metadata Schema\n\nThe resolved JSON of the `tokenURI` described in the ERC-20 Interface Extension section MUST conform to the following TypeScript interface:\n\n```typescript\n/**\n * Asset Metadata\n */\ninterf

## 4. Agents and Tools

In [None]:
from typing import List, Any

def text_search(query: str) -> List[Any]:
    """
    Perform a text-based search on the Ethereum ERC index.

    Args:
        query (str): The search query string.

    Returns:
        List[Any]: A list of up to 5 search results returned by the Ethereum ERC index.
    """
    return index.search(query, num_results=5)

In [8]:
system_prompt = """
You are an expert in Computer Science and Distributed Ledger Technology, 
with an emphasis on the Ethereum blockchain.

Use the reference material to answer the questions.

If the search does not return relevant results, inform the user and provide
general guidance.
"""

In [None]:
from pydantic_ai import Agent

agent = Agent(
    name="ethereum_agent",
    instructions=system_prompt,
    tools=[text_search],
    model="gemini-2.5-flash"
)

In [12]:
question = "How to create a token on Ethereum blockchain?"

result = await agent.run(user_prompt=question)

INFO:google_genai.models:AFC is enabled with max remote calls: 10.
INFO:httpx:HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent "HTTP/1.1 200 OK"
INFO:google_genai.models:AFC is enabled with max remote calls: 10.
INFO:httpx:HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent "HTTP/1.1 200 OK"


In [14]:
print(result.output, sep="\n")

To create a token on the Ethereum blockchain, you generally need to deploy a smart contract that adheres to a token standard. The most common standard for fungible tokens is **ERC-20**, as described in EIP-20.

The ERC-20 standard provides a common interface for tokens within smart contracts, enabling basic functionalities such as:

*   **`name()`**: Returns the name of the token (e.g., "MyToken"). (Optional)
*   **`symbol()`**: Returns the symbol of the token (e.g., "HIX"). (Optional)
*   **`decimals()`**: Returns the number of decimals the token uses (e.g., `8` means to divide the token amount by `100000000` for user representation). (Optional)
*   **`totalSupply()`**: Returns the total token supply.
*   **`balanceOf(address _owner)`**: Returns the account balance of a given address.
*   **`transfer(address _to, uint256 _value)`**: Transfers `_value` amount of tokens to address `_to`.
*   **`transferFrom(address _from, address _to, uint256 _value)`**: Transfers `_value` amount of tok

## 5. Evaluation

### 5.1 Logging

In [20]:
def log_entry(agent, messages, source="user"):
    tools = []

    for ts in agent.toolsets:
        tools.extend(ts.tools.keys())

    dict_messages = ModelMessagesTypeAdapter.dump_python(messages)

    return {
        "agent_name": agent.name,
        "system_prompt": agent._instructions,
        "provider": agent.model.system,
        "model": agent.model.model_name,
        "tools": tools,
        "messages": dict_messages,
        "source": source
    }

In [27]:
# Writing logs to a folder

LOG_DIR = Path("logs")
LOG_DIR.mkdir(exist_ok=True)

def serializer(obj):
    if isinstance(obj, (datetime, date)):
        return obj.isoformat()
    raise TypeError(f"Type {type(obj)} not serializable")


def log_interaction_to_file(agent, messages, source="user"):
    entry = log_entry(agent, messages, source)

    ts = entry["messages"][-1]["timestamp"]
    ts_str = ts.strftime("%Y%m%d_%H%M%S")
    rand_hex = secrets.token_hex(3)

    filename = f"{agent.name}_{ts_str}_{rand_hex}.json"
    filepath = LOG_DIR / filename

    with filepath.open("w", encoding="utf-8") as f_out:
        json.dump(entry, f_out, indent=2, default=serializer)

    return filepath

In [None]:
question = input()
print("#"*50)
print(f"Question: {question}")
print("#"*50)

result = await agent.run(user_prompt=question)

print(result.output)
log_interaction_to_file(agent, result.new_messages())

INFO:google_genai.models:AFC is enabled with max remote calls: 10.


##################################################
Question: How to create an ERC-20 token?
##################################################


INFO:httpx:HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent "HTTP/1.1 200 OK"
INFO:google_genai.models:AFC is enabled with max remote calls: 10.
INFO:httpx:HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent "HTTP/1.1 200 OK"


To create an ERC-20 token, you need to implement a smart contract that adheres to the ERC-20 standard. This standard defines a set of functions and events that an Ethereum token contract must implement.

Several implementations of ERC-20 compliant tokens are available, each with different trade-offs in terms of gas efficiency and security. Popular and well-audited examples include:

*   **OpenZeppelin implementation:** This is a widely used and recommended library for secure smart contract development.
*   **ConsenSys implementation:** Another reputable implementation from ConsenSys.

These implementations provide a solid foundation for creating your own ERC-20 token. You would typically use Solidity to write your smart contract, compile it, and then deploy it to the Ethereum blockchain.


PosixPath('logs/faq_agent_20250928_205900_dda0b9.json')

In [34]:
async def ask_question(agent, question):
    print("#"*50)
    print(f"Question: {question}")
    print("#"*50)

    result = await agent.run(user_prompt=question)

    print(result.output)
    log_interaction_to_file(agent, result.new_messages())

    return result.output

### 5.2 Adding References

In [None]:
system_prompt = """
You are an expert in Computer Science and Distributed Ledger Technology,  with an emphasis on the Ethereum blockchain.

Use the search tool to find relevant information from the Ethereum ERC materials before answering questions.  

If you can find specific information through search, use it to provide accurate answers.

Always include references by citing the filename of the source material you used.  
Format: [LINK TITLE](FULL_GITHUB_LINK)

If the search doesn't return relevant results, let the user know and provide general guidance.
""".strip()

In [31]:
# Create another version of agent, let's call it faq_agent_v2
agent = Agent(
    name="faq_agent_v2",
    instructions=system_prompt,
    tools=[text_search],
    model="gemini-2.5-flash"
)

In [36]:
result = await ask_question(agent, "How to create an ERC-20 token?")
result

INFO:google_genai.models:AFC is enabled with max remote calls: 10.


##################################################
Question: How to create an ERC-20 token?
##################################################


INFO:httpx:HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent "HTTP/1.1 200 OK"
INFO:google_genai.models:AFC is enabled with max remote calls: 10.
INFO:httpx:HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent "HTTP/1.1 200 OK"


To create an ERC-20 token, you need to deploy a smart contract on the Ethereum blockchain that adheres to the ERC-20 Token Standard. This standard defines a common set of rules that all ERC-20 tokens must follow, allowing them to interact seamlessly with various applications, wallets, and decentralized exchanges.

The core of creating an ERC-20 token involves implementing the following functions and events in your Solidity smart contract:

### Required Functions

*   **`totalSupply()`**: Returns the total token supply.
*   **`balanceOf(address _owner)`**: Returns the account balance of a specified address.
*   **`transfer(address _to, uint256 _value)`**: Transfers `_value` amount of tokens to a specified address and *must* trigger a `Transfer` event. This function should revert if the caller's account balance is insufficient. Transfers of 0 values should be treated as normal transfers and still fire the `Transfer` event.
*   **`transferFrom(address _from, address _to, uint256 _value)`*

'To create an ERC-20 token, you need to deploy a smart contract on the Ethereum blockchain that adheres to the ERC-20 Token Standard. This standard defines a common set of rules that all ERC-20 tokens must follow, allowing them to interact seamlessly with various applications, wallets, and decentralized exchanges.\n\nThe core of creating an ERC-20 token involves implementing the following functions and events in your Solidity smart contract:\n\n### Required Functions\n\n*   **`totalSupply()`**: Returns the total token supply.\n*   **`balanceOf(address _owner)`**: Returns the account balance of a specified address.\n*   **`transfer(address _to, uint256 _value)`**: Transfers `_value` amount of tokens to a specified address and *must* trigger a `Transfer` event. This function should revert if the caller\'s account balance is insufficient. Transfers of 0 values should be treated as normal transfers and still fire the `Transfer` event.\n*   **`transferFrom(address _from, address _to, uint25

### 5.3 LLM as a Judge

In [38]:
evaluation_prompt = """
Use this checklist to evaluate the quality of an AI agent's answer (<ANSWER>) to a user question (<QUESTION>).
We also include the entire log (<LOG>) for analysis.

For each item, check if the condition is met. 

Checklist:

- instructions_follow: The agent followed the user's instructions (in <INSTRUCTIONS>)
- instructions_avoid: The agent avoided doing things it was told not to do  
- answer_relevant: The response directly addresses the user's question  
- answer_clear: The answer is clear and correct  
- answer_citations: The response includes proper citations or sources when required  
- completeness: The response is complete and covers all key aspects of the request
- tool_call_search: Is the search tool invoked? 

Output true/false for each check and provide a short explanation for your judgment.
""".strip()

In [39]:
class EvaluationCheck(BaseModel):
    check_name: str
    justification: str
    check_pass: bool

class EvaluationChecklist(BaseModel):
    checklist: list[EvaluationCheck]
    summary: str

In [40]:
eval_agent = Agent(
    name="eval_agent",
    model="gemini-2.5-flash-lite",
    instructions=evaluation_prompt,
    output_type=EvaluationChecklist
)

In [41]:
user_prompt_format = """
<INSTRUCTIONS>{instructions}</INSTRUCTIONS>
<QUESTION>{question}</QUESTION>
<ANSWER>{answer}</ANSWER>
<LOG>{log}</LOG>
""".strip()

In [42]:
def load_log_file(log_file):
    with open(log_file, "r") as f_in:
        log_data = json.load(f_in)
        log_data["log_file"] = log_file
        return log_data

In [43]:
log_record = load_log_file("./logs/faq_agent_v2_20250928_210612_516117.json")

instructions = log_record["system_prompt"]
question = log_record["messages"][0]["parts"][0]["content"]
answer = log_record["messages"][-1]["parts"][0]["content"]
log = json.dumps(log_record["messages"])

user_prompt = user_prompt_format.format(
    instructions=instructions,
    question=question,
    answer=answer,
    log=log
)

In [44]:
result = await eval_agent.run(user_prompt, output_type=EvaluationChecklist)

checklist = result.output
print(checklist.summary)

for check in checklist.checklist:
    print(check)

INFO:google_genai.models:AFC is enabled with max remote calls: 10.
INFO:httpx:HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash-lite:generateContent "HTTP/1.1 200 OK"


The user asked how to create an ERC-20 token. The agent successfully used the search tool to find information on the ERC-20 Token Standard. It then provided a comprehensive answer detailing the required functions, optional functions, and events that constitute an ERC-20 token. The answer also included references to example implementations from OpenZeppelin and ConsenSys, and a citation to the ERC-20 Token Standard document as requested.
check_name='instructions_follow' justification='The agent followed all instructions, including using the search tool, providing accurate answers, and including citations.' check_pass=True
check_name='instructions_avoid' justification='The agent did not avoid any instructions.' check_pass=True
check_name='answer_relevant' justification="The answer directly addresses the user's question on how to create an ERC-20 token." check_pass=True
check_name='answer_clear' justification='The answer is clear, well-structured, and provides a comprehensive explanation 

In [45]:
def simplify_log_messages(messages):
    log_simplified = []

    for m in messages:
        parts = []
    
        for original_part in m["parts"]:
            part = original_part.copy()
            kind = part["part_kind"]
    
            if kind == "user-prompt":
                del part["timestamp"]
            if kind == 'tool-call':
                del part["tool_call_id"]
            if kind == "tool-return":
                del part["tool_call_id"]
                del part["metadata"]
                del part["timestamp"]
                # Replace actual search results with placeholder to save tokens
                part["content"] = "RETURN_RESULTS_REDACTED"
            if kind == "text":
                del part["id"]
    
            parts.append(part)
    
        message = {
            "kind": m["kind"],
            "parts": parts
        }
    
        log_simplified.append(message)
    return log_simplified

In [46]:
async def evaluate_log_record(eval_agent, log_record):
    messages = log_record["messages"]

    instructions = log_record["system_prompt"]
    question = messages[0]["parts"][0]["content"]
    answer = messages[-1]["parts"][0]["content"]

    log_simplified = simplify_log_messages(messages)
    log = json.dumps(log_simplified)

    user_prompt = user_prompt_format.format(
        instructions=instructions,
        question=question,
        answer=answer,
        log=log
    )

    result = await eval_agent.run(user_prompt, output_type=EvaluationChecklist)

    return result.output 

In [47]:
log_record = load_log_file('./logs/faq_agent_v2_20250928_210612_516117.json')
eval1 = await evaluate_log_record(eval_agent, log_record)

INFO:google_genai.models:AFC is enabled with max remote calls: 10.
INFO:httpx:HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash-lite:generateContent "HTTP/1.1 200 OK"


In [48]:
print(eval1.summary)

for check in eval1.checklist:
    print(check)

The user asked how to create an ERC-20 token. The agent successfully used the search tool to find relevant information on the ERC-20 Token Standard. It provided a comprehensive explanation of the required functions (totalSupply, balanceOf, transfer, transferFrom, approve, allowance) and events (Transfer, Approval) that must be implemented in a Solidity smart contract. The agent also suggested using existing battle-tested implementations like those from OpenZeppelin or ConsenSys and mentioned the deployment process using tools like Remix, Hardhat, or Truffle. A relevant citation in the specified format was included.
check_name='instructions_follow' justification='The agent successfully followed the instructions by searching for information on how to create an ERC-20 token and providing a detailed explanation based on the search results.' check_pass=True
check_name='instructions_avoid' justification='The agent avoided any actions it was instructed not to do.' check_pass=True
check_name='

### 5.4 Data Generation

In [49]:
question_generation_prompt = """
You are helping to create test questions for an AI agent that answers questions about the Ethereum ERC documentation.

Based on the provided data content, generate realistic questions that users might ask.

The questions should:

- Be natural and varied in style
- Range from simple to complex
- Include both specific technical questions and general course questions

Generate one question for each record.
""".strip()

In [50]:
class QuestionsList(BaseModel):
    questions: list[str]

question_generator = Agent(
    name="question_generator",
    instructions=question_generation_prompt,
    model='gemini-2.5-flash',
    output_type=QuestionsList
)

In [51]:
sample = random.sample(erc_data, 10)
prompt_docs = [d["content"] for d in sample]
prompt = json.dumps(prompt_docs)

result = await question_generator.run(prompt)
questions = result.output.questions

INFO:google_genai.models:AFC is enabled with max remote calls: 10.
INFO:httpx:HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent "HTTP/1.1 200 OK"


In [52]:
len(questions)

7

In [53]:
questions

['How does ERC-4494 address the user experience issue of gasless approvals for ERC-721 tokens, and what specific functions does it introduce to achieve this?',
 'What are the primary motivations behind ERC-3009, and how does it aim to improve upon existing ERC-20 transfer mechanisms, particularly concerning transaction flexibility and nonce management?',
 'Explain the purpose of ERC-5247 and provide examples of scenarios where a smart contract executable proposal would be beneficial.',
 'What problem does ERC-2477 aim to solve regarding token metadata, and which functions are central to its integrity verification mechanism?',
 'Describe the core components and functionalities of the ERC-7390 standard for vanilla options, specifically highlighting how it handles multiple buyers and collateralization.',
 'What is the main purpose of ERC-6150, and how does it model hierarchical relationships between NFTs using its core functions and event?',
 'Explain the concept of MixHash within the EIP

In [54]:
for q in tqdm(questions):
    print(q)

    result = await agent.run(user_prompt=q)
    print(result.output)

    log_interaction_to_file(
        agent,
        result.new_messages(),
        source='ai-generated'
    )
    time.sleep(5)
    print()

  0%|          | 0/7 [00:00<?, ?it/s]

INFO:google_genai.models:AFC is enabled with max remote calls: 10.


How does ERC-4494 address the user experience issue of gasless approvals for ERC-721 tokens, and what specific functions does it introduce to achieve this?


INFO:httpx:HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent "HTTP/1.1 200 OK"
INFO:google_genai.models:AFC is enabled with max remote calls: 10.
INFO:httpx:HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent "HTTP/1.1 200 OK"


ERC-4494 addresses the user experience issue of gasless approvals for ERC-721 tokens by extending the "Permit" approval flow, which was initially introduced for ERC-20 tokens in ERC-2612. This significantly improves user experience by allowing approvals to be granted via a signed message (structured according to ERC-712) rather than requiring a separate, gas-consuming transaction. Instead of the typical two-transaction process (one to approve and a second for the contract to pull the asset), a permit-style flow only requires the owner to sign a message and a single subsequent transaction for the asset to be pulled [ERCs-master/ERCS/erc-4494.md](https://github.com/ethereum/ERCs/blob/master/ERCS/erc-4494.md).

To achieve this, ERC-4494 introduces three specific functions to the ERC-721 standard:

1.  **`permit(address spender, uint256 tokenId, uint256 deadline, bytes memory sig)`**: This function allows the owner of an ERC-721 NFT to approve a `spender` for a specific `tokenId` by provid

INFO:google_genai.models:AFC is enabled with max remote calls: 10.



What are the primary motivations behind ERC-3009, and how does it aim to improve upon existing ERC-20 transfer mechanisms, particularly concerning transaction flexibility and nonce management?


INFO:httpx:HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent "HTTP/1.1 200 OK"
INFO:google_genai.models:AFC is enabled with max remote calls: 10.
INFO:httpx:HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent "HTTP/1.1 200 OK"


ERC-3009, titled "Transfer With Authorization," is primarily motivated by the need to enhance the security and flexibility of token transfers, especially when involving third-party relayers or smart contracts. It aims to improve upon existing ERC-20 transfer mechanisms by:

*   **Enabling Authorized Transfers:** ERC-3009 allows token owners to authorize a third party to initiate a transfer on their behalf using a signed message. This is achieved through functions like `transferWithAuthorization` and `receiveWithAuthorization`. This mechanism provides greater flexibility by decoupling the signing of a transaction from its execution, allowing for scenarios like gasless transactions where a relayer pays for the gas.
*   **Mitigating Front-Running Risks:** The standard introduces the `receiveWithAuthorization` function, which is recommended when calling from other smart contracts. This is a crucial improvement to prevent front-running attacks where an attacker could extract a transfer auth

INFO:google_genai.models:AFC is enabled with max remote calls: 10.



Explain the purpose of ERC-5247 and provide examples of scenarios where a smart contract executable proposal would be beneficial.


INFO:httpx:HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent "HTTP/1.1 200 OK"
INFO:google_genai.models:AFC is enabled with max remote calls: 10.
INFO:httpx:HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent "HTTP/1.1 200 OK"


ERC-5247, titled "Smart Contract Executable Proposal Interface," defines a standard interface for creating and executing proposals directly on the Ethereum blockchain. The primary purpose of this ERC is to **separate the definition of the code to be executed from the actual execution process itself.**

A smart contract executable proposal, as defined by ERC-5247, includes a series of function calls with details such as:

*   **Target contract addresses:** The addresses of the contracts that the proposal intends to interact with.
*   **Ether values:** The amount of Ether (if any) to be sent with each function call.
*   **Gas limits:** The maximum gas to be used for each function call.
*   **Calldatas:** The encoded function calls and their arguments.

By standardizing this interface, ERC-5247 allows for proposals to be submitted, recorded, and then later executed on-chain, often after some form of approval or consensus.

Here are examples of scenarios where a smart contract executable p

INFO:google_genai.models:AFC is enabled with max remote calls: 10.



What problem does ERC-2477 aim to solve regarding token metadata, and which functions are central to its integrity verification mechanism?


INFO:httpx:HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent "HTTP/1.1 200 OK"
INFO:google_genai.models:AFC is enabled with max remote calls: 10.
INFO:httpx:HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent "HTTP/1.1 200 OK"


ERC-2477 aims to solve the problem of **lack of integrity verification for token metadata and its associated schemas** in existing token standards like ERC-721 and ERC-1155. While these standards provide URIs for metadata, there isn't a built-in mechanism to confirm that the metadata (or its defining schema) hasn't been altered or tampered with after it was initially referenced. This standard provides a method to ensure the trustworthiness and immutability of both the metadata and its schema, drawing inspiration from the World Wide Web's Sub-Resource Integrity (SRI) approach.

The central functions for its integrity verification mechanism are:

*   **`tokenURIIntegrity(uint id)`**: This function is used to verify the integrity of the token's metadata document, which is typically accessed via functions like `tokenURI(id)` (for ERC-721) or `uri(id)` (for ERC-1155). It returns a digest and the hash algorithm used for the verification.
*   **`tokenURISchemaIntegrity(uint id)`**: This funct

INFO:google_genai.models:AFC is enabled with max remote calls: 10.



Describe the core components and functionalities of the ERC-7390 standard for vanilla options, specifically highlighting how it handles multiple buyers and collateralization.


INFO:httpx:HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent "HTTP/1.1 200 OK"
INFO:google_genai.models:AFC is enabled with max remote calls: 10.
INFO:httpx:HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent "HTTP/1.1 200 OK"


The ERC-7390 standard defines an interface for creating, managing, and executing simple time-limited call/put (vanilla) options on the Ethereum blockchain. It aims to standardize options contracts to foster interoperability within the decentralized finance ecosystem [ERCs-master/ERCS/erc-7390.md](https://github.com/ethereum/ERCs/blob/master/ERCS/erc-7390.md).

### Core Components and Functionalities

ERC-7390 provides a comprehensive set of functions and events for seamless interactions with vanilla options. Instead of representing a single, expiring option, it can store multiple "issuances," each identified by a unique ID. Each issuance can be bought, exercised, or canceled independently. The standard supports both European and American style options through the `exerciseWindowStart` and `exerciseWindowEnd` parameters. For a European option, these dates would typically be set closely around the expiration date, while for an American option, the `exerciseWindowStart` could be set to th

INFO:google_genai.models:AFC is enabled with max remote calls: 10.



What is the main purpose of ERC-6150, and how does it model hierarchical relationships between NFTs using its core functions and event?


INFO:httpx:HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent "HTTP/1.1 200 OK"
INFO:google_genai.models:AFC is enabled with max remote calls: 10.
INFO:httpx:HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent "HTTP/1.1 200 OK"


ERC-6150 is an extension to EIP-721 that proposes **multi-layer filesystem-like hierarchical NFTs**. Its main purpose is to standardize the interface for these hierarchical NFTs, allowing for the creation and management of tree-like structures on the blockchain. This standard enables complete on-chain storage of hierarchy, management of relationships within a single contract, transferrable ownership of categories/directories as NFTs, and permissionless access to the hierarchical structure. [ERCs-master/ERCS/erc-6150.md](https://github.com/ethereum/ERCs/blob/master/ERCS/erc-6150.md)

ERC-6150 models hierarchical relationships between NFTs by treating each NFT as a node in a tree structure, which can be a root, leaf, parent, or child node. [ERCs-master/ERCS/erc-6150.md](https://github.com/ethereum/ERCs/blob/master/ERCS/erc-6150.md)

Its core functions and event include:

*   **Functions:**
    *   `parentOf(uint256 tokenId)`: Retrieves the parent NFT of a specified token.
    *   `childr

INFO:google_genai.models:AFC is enabled with max remote calls: 10.



Explain the concept of MixHash within the EIP for Public Data Storage Proofs, detailing its structure and how it contributes to verifying public data.


INFO:httpx:HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent "HTTP/1.1 200 OK"
INFO:google_genai.models:AFC is enabled with max remote calls: 10.
INFO:httpx:HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent "HTTP/1.1 200 OK"


In the context of EIP-7585, "MixHash and Public Data Storage Proofs," **MixHash** serves as a crucial identifier for public data within the Ethereum ecosystem. It's a fundamental element in enabling the verification and tracking of valuable public information.

### Structure of MixHash

MixHash is defined as a `bytes32` data type. While the EIP leaves room for future expansion with different `HashType` categories, its default and current recommended `HashType` is **SHA256**. This standardized structure ensures consistency and interoperability when referencing and verifying public data.

### How MixHash Contributes to Verifying Public Data

MixHash plays several key roles in the verification of public data:

1.  **Public Data Identification and Value Tracking:** It allows the Ethereum ecosystem to confirm whether a specific MixHash corresponds to public data and helps in tracking the value associated with that data. This is essential for ensuring that storage proofs are generated for ge

In [56]:
eval_set = []

for log_file in LOG_DIR.glob("*.json"):
    if "faq_agent_v2" not in log_file.name:
        continue

    log_record = load_log_file(log_file)
    if log_record["source"] != "ai-generated":
        continue

    eval_set.append(log_record)

In [58]:
eval_results = []

for log_record in tqdm(eval_set):
    eval_result = await evaluate_log_record(eval_agent, log_record)
    eval_results.append((log_record, eval_result))
    time.sleep(10)

  0%|          | 0/7 [00:00<?, ?it/s]

INFO:google_genai.models:AFC is enabled with max remote calls: 10.
INFO:httpx:HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash-lite:generateContent "HTTP/1.1 200 OK"
INFO:google_genai.models:AFC is enabled with max remote calls: 10.
INFO:httpx:HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash-lite:generateContent "HTTP/1.1 200 OK"
INFO:google_genai.models:AFC is enabled with max remote calls: 10.
INFO:httpx:HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash-lite:generateContent "HTTP/1.1 200 OK"
INFO:google_genai.models:AFC is enabled with max remote calls: 10.
INFO:httpx:HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash-lite:generateContent "HTTP/1.1 200 OK"
INFO:google_genai.models:AFC is enabled with max remote calls: 10.
INFO:httpx:HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-

In [59]:
# Convert data to be suitable to use Pandas

rows = []

for log_record, eval_result in eval_results:
    messages = log_record["messages"]

    row = {
        "file": log_record["log_file"].name,
        "question": messages[0]["parts"][0]["content"],
        "answer": messages[-1]["parts"][0]["content"],
    }

    checks = {c.check_name: c.check_pass for c in eval_result.checklist}
    row.update(checks)

    rows.append(row)

In [60]:
df_evals = pd.DataFrame(rows)

In [61]:
df_evals.mean(numeric_only=True)

instructions_follow    1.0
instructions_avoid     1.0
answer_relevant        1.0
answer_clear           1.0
answer_citations       1.0
completeness           1.0
tool_call_search       1.0
dtype: float64