<a href="https://colab.research.google.com/github/elephant-xyz/notebook/blob/main/Mining_Seed.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Seed Mining process

In [None]:
# @title Step 1: Upload .env

In [1]:
# @title Step 2: Prepare
# @title  {"vertical-output":true}
import os

parcel_id = "30434108090030050" # @param {"type":"string"}
address = "1605 S US highway 1 3E, Jupiter, FL 33477" # @param {"type":"string"}
request_method = "GET" # @param {"type":"string"}
url = "https://pbcpao.gov/Property/Details?parcelId=30434108090030050" # @param {"type":"string"}
County = "palm beach" # @param {"type":"string"}
headers = "" # @param {"type":"string"}

os.environ["parcel_id"] = parcel_id
os.environ["address"] = address
os.environ["request_method"] = request_method
os.environ["url"] = url
os.environ["County"] = County
os.environ["headers"] = headers


import json
import os
import re
import sys
from urllib.parse import urlparse, parse_qs


# Get data from Step 2 environment variables
parcel_id = os.environ.get("parcel_id", "")
address = os.environ.get("address", "")
request_method = os.environ.get("request_method", "")
url = os.environ.get("url", "")
county = os.environ.get("County", "")
headers = os.environ.get("headers", "")


def is_empty_value(value):
    """Check if value is empty or None"""
    if value is None:
        return True
    if isinstance(value, str) and value.strip() == "":
        return True
    return False

def ensure_directory(file_path):
    """Ensure the directory for the file exists"""
    directory = os.path.dirname(file_path)
    if directory and not os.path.exists(directory):
        os.makedirs(directory)

def extract_query_params_and_base_url(url):
    """Extract query parameters and base URL separately"""
    if is_empty_value(url):
        return None, None

    try:
        parsed_url = urlparse(url)

        # Base URL without query parameters
        base_url = f"{parsed_url.scheme}://{parsed_url.netloc}{parsed_url.path}"

        # Query parameters as multiValueQueryString format (object of arrays)
        query_params = parse_qs(parsed_url.query)
        multi_value_query = dict(query_params) if query_params else None

        return base_url, multi_value_query
    except Exception as e:
        print(f"Warning: Could not parse URL: {e}")
        return url, None

def create_parcel_folder(parcel_id, address, method, url, county, headers):
    # Create folder name based on parcel_id
    clean_parcel_id = re.sub(r"[^\w\-_]", "_", str(parcel_id))
    folder_name = f"output/{clean_parcel_id}"
    ensure_directory(folder_name + "/")

    # Extract base URL and query parameters separately
    base_url, multi_value_query = extract_query_params_and_base_url(url)

    # Create unnormalized_address.json
    unnormalized_address_data = {
        "full_address": address if not is_empty_value(address) else None,
        "source_http_request": {
            "method": method if not is_empty_value(method) else None,
            "url": base_url if not is_empty_value(base_url) else None,
            "multiValueQueryString": multi_value_query
        },
        "county_jurisdiction": county if not is_empty_value(county) else None,
        "request_identifier": parcel_id if not is_empty_value(parcel_id) else None,
    }
    if headers and not is_empty_value(headers):
        unnormalized_address_data["source_http_request"]["headers"] = headers

    # Create property_seed.json
    property_seed_data = {
        "parcel_id": parcel_id if not is_empty_value(parcel_id) else None,
        "source_http_request": {
            "method": method if not is_empty_value(method) else None,
            "url": base_url if not is_empty_value(base_url) else None,
            "multiValueQueryString": multi_value_query
        },
        "request_identifier": parcel_id if not is_empty_value(parcel_id) else None,
    }
    if headers and not is_empty_value(headers):
        property_seed_data["source_http_request"]["headers"] = headers

    # Create relationship_property_to_address.json
    relationship_data = {
        "from": {"/": "./property_seed.json"},
        "to": {"/": "./unnormalized_address.json"}
    }

    # Create root schema
    root_schema = {
        "label": "Seed",
        "relationships": {"property_seed": {"/": "./relationship_property_to_address.json"}},
    }

    # Write all JSON files
    files_to_create = [
        (f"{folder_name}/unnormalized_address.json", unnormalized_address_data),
        (f"{folder_name}/property_seed.json", property_seed_data),
        (f"{folder_name}/relationship_property_to_address.json", relationship_data),
        (f"{folder_name}/bafkreif7ywbjxu3s6jfi6ginvmsufeux3cd5eujuivg2y7tmqt2qk4rsoe.json", root_schema),
    ]

    for filename, data_obj in files_to_create:
        with open(filename, "w", encoding="utf-8") as f:
            json.dump(data_obj, f, indent=2, ensure_ascii=False)

    return folder_name, unnormalized_address_data, property_seed_data

def process_input_data():
    try:
        # Validate required data
        if is_empty_value(parcel_id):
            print("❌ Error: parcel_id is required but not provided")
            return

        # Show extracted URL components
        base_url, multi_value_query = extract_query_params_and_base_url(url)

        # Create parcel folder and files
        folder_name, address_data, property_data = create_parcel_folder(
            parcel_id, address, request_method, url, county, headers
        )

        print(f"\n✅ Prepare done for parcel ID {parcel_id}.")

    except Exception as e:
        print(f"❌ Error processing input data: {e}")
        import traceback
        traceback.print_exc()

# Process the input data
process_input_data()



✅ Prepare done for parcel ID 30434108090030050.


In [3]:
# @title Step 2: Transform
! pip3 install python-dotenv -q

from dotenv import load_dotenv
load_dotenv()

import subprocess
import sys
import csv


def get_seed_cid_and_html_link(path="seed-results.csv"):
    with open(path, newline='', encoding='utf-8') as csvfile:
        reader = csv.DictReader(csvfile)
        first_row = next(reader, None)
        if first_row is None:
            raise ValueError("CSV file is empty")
        return first_row["dataGroupCid"], first_row["htmlLink"]


def has_submit_errors(path="submit_errors.csv"):
    """
    Повертає True, якщо у файлі submit_errors.csv є хоча б один рядок (після заголовку).
    """
    with open(path, newline='', encoding='utf-8') as csvfile:
        reader = csv.DictReader(csvfile)
        return next(reader, None) is not None


def run_validate_and_upload():
    try:
        subprocess.run(
            ["npx", "-y", "@elephant-xyz/cli", "validate-and-upload", "output", "--output-csv", "seed-results.csv"],
            stdout=subprocess.DEVNULL,    # ховаємо stdout
            stderr=subprocess.PIPE,       # ловимо stderr у буфер
            check=True,
            text=True                     # stderr як рядок
        )
        # Якщо є записані помилки — завершуємо роботу
        if has_submit_errors():
            print("❌ Transform failed, please check submit_errors.csv for details", file=sys.stderr)
            return

        # Інакше — читаємо результати
        seed_group_cid, html_link = get_seed_cid_and_html_link()

        print("✅ Transform done\n")
        print(f"Seed group CID: {seed_group_cid}\n")
        print(f"HTML link: {html_link}")

    except subprocess.CalledProcessError as e:
        # обробка помилок виконання команди
        print(f"Command failed (exit code {e.returncode}):", file=sys.stderr)
        print(e.stderr.strip(), file=sys.stderr)
        sys.exit(e.returncode)


if __name__ == "__main__":
    run_validate_and_upload()


✅ Transform done

Seed group CID: bafkreif7ywbjxu3s6jfi6ginvmsufeux3cd5eujuivg2y7tmqt2qk4rsoe

HTML link: http://dweb.link/ipfs/bafybeiankhv3qlgx3izhrhw66aghayd3r7zwhkk3rlm34g4go7tatd6zp4


In [4]:
# @title Step 3: Validate
! pip3 install python-dotenv -q

from dotenv import load_dotenv
load_dotenv()

import subprocess
import sys
import csv


def get_seed_cid_and_html_link(path="seed-results.csv"):
    with open(path, newline='', encoding='utf-8') as csvfile:
        reader = csv.DictReader(csvfile)
        first_row = next(reader, None)
        if first_row is None:
            raise ValueError("CSV file is empty")
        return first_row["dataGroupCid"], first_row["htmlLink"]


def has_submit_errors(path="submit_errors.csv"):
    """
    Повертає True, якщо у файлі submit_errors.csv є хоча б один рядок (після заголовку).
    """
    with open(path, newline='', encoding='utf-8') as csvfile:
        reader = csv.DictReader(csvfile)
        return next(reader, None) is not None


def run_validate_and_upload():
    try:
        subprocess.run(
            ["npx", "-y", "@elephant-xyz/cli", "validate-and-upload", "output", "--output-csv", "seed-results.csv"],
            stdout=subprocess.DEVNULL,    # ховаємо stdout
            stderr=subprocess.PIPE,       # ловимо stderr у буфер
            check=True,
            text=True                     # stderr як рядок
        )
        # Якщо є записані помилки — завершуємо роботу
        if has_submit_errors():
            print("❌ Validate failed, please check submit_errors.csv for details", file=sys.stderr)
            return

        # Інакше — читаємо результати
        seed_group_cid, html_link = get_seed_cid_and_html_link()
        print("✅ Validate done\n")
        print(f"Seed group CID: {seed_group_cid}\n")
        print(f"HTML link: {html_link}")

    except subprocess.CalledProcessError as e:
        # обробка помилок виконання команди
        print(f"Command failed (exit code {e.returncode}):", file=sys.stderr)
        print(e.stderr.strip(), file=sys.stderr)
        sys.exit(e.returncode)


if __name__ == "__main__":
    run_validate_and_upload()


✅ Validate done

Seed group CID: bafkreif7ywbjxu3s6jfi6ginvmsufeux3cd5eujuivg2y7tmqt2qk4rsoe

HTML link: http://dweb.link/ipfs/bafybeialbp7dd535hgafsc4w26m4uudolvksjino2hiteexui4shgrdbam


In [6]:
# @title Step 4: Upload
! pip3 install python-dotenv requests -q

from dotenv import load_dotenv
load_dotenv()

import subprocess
import sys
import csv

import requests


def get_seed_info(path="seed-results.csv"):
    with open(path, newline='', encoding='utf-8') as csvfile:
        reader = csv.DictReader(csvfile)
        first_row = next(reader, None)
        if first_row is None:
            raise ValueError("CSV file is empty")
        return first_row


def has_submit_errors(path="submit_errors.csv"):
    """
    Повертає True, якщо у файлі submit_errors.csv є хоча б один рядок (після заголовку).
    """
    with open(path, newline='', encoding='utf-8') as csvfile:
        reader = csv.DictReader(csvfile)
        return next(reader, None) is not None


def count_uploaded_files(output_dir="output"):
    """
    Count all files recursively in the output directory and its subdirectories.
    """
    if not os.path.exists(output_dir):
        return 0

    file_count = 0
    for root, dirs, files in os.walk(output_dir):
        file_count += len(files)
    return file_count


def collect_data_ipfs_links(data_cid):
  seed_data_link = f"https://ipfs.io/ipfs/{data_cid}"

  seed_data = requests.get(seed_data_link).json()

  property_seed_cid = seed_data["relationships"]["property_seed"]["/"]
  property_seed_link = f"https://ipfs.io/ipfs/{property_seed_cid}"

  property_seed_data = requests.get(property_seed_link).json()

  property_cid, address_cid = property_seed_data["from"]["/"], property_seed_data["to"]["/"]

  property_link = f"https://ipfs.io/ipfs/{property_cid}"
  address_link = f"https://ipfs.io/ipfs/{address_cid}"

  return seed_data_link, property_seed_link, property_link, address_link


def run_validate_and_upload():
    try:
        subprocess.run(
            ["npx", "-y", "@elephant-xyz/cli", "validate-and-upload", "output", "--output-csv", "seed-results.csv"],
            stdout=subprocess.DEVNULL,    # ховаємо stdout
            stderr=subprocess.PIPE,       # ловимо stderr у буфер
            check=True,
            text=True,
        )

        if has_submit_errors():
            print("❌ Validate failed, please check submit_errors.csv for details", file=sys.stderr)
            return

        seed_info = get_seed_info()
        seed_group_cid, data_cid, html_link = seed_info["dataGroupCid"], seed_info["dataCid"], seed_info["htmlLink"]

        files_uploaded = count_uploaded_files("output")

        data_ipfs_links = collect_data_ipfs_links(data_cid)
        seed_group_link, property_seed_link, property_link, address_link = data_ipfs_links

        print("✅ Upload done\n")
        print(f"{files_uploaded} files uploaded\n")

        print(f"Seed group CID: {seed_group_cid}\n")
        print(f"HTML link: {html_link}\n")

        print(f"Seed group IPFS link: {seed_group_link}")
        print(f"relationship IPFS link: {property_seed_link}")
        print(f"property_seed IPFS link: {property_link}")
        print(f"unnormalized_address IPFS link: {address_link}")

    except subprocess.CalledProcessError as e:
        print(f"Command failed (exit code {e.returncode}):", file=sys.stderr)
        print(e.stderr.strip(), file=sys.stderr)
        sys.exit(e.returncode)


if __name__ == "__main__":
    run_validate_and_upload()


✅ Upload done

4 files uploaded

Seed group CID: bafkreif7ywbjxu3s6jfi6ginvmsufeux3cd5eujuivg2y7tmqt2qk4rsoe

HTML link: http://dweb.link/ipfs/bafybeiehg55jhmtzwspsk76riz7g77skexargvoczlbnzui337xqxzioba

Seed group IPFS link: https://ipfs.io/ipfs/bafkreieyaohnh2i446cf4pw5bndntmkhy4g6kvg3vzysw5j56g26s7ybji
relationship IPFS link: https://ipfs.io/ipfs/bafkreigacxivvzfdbwg7lexvu67tgkl76gcfzcql2vlscmbyqm4t43fcpe
property_seed IPFS link: https://ipfs.io/ipfs/bafkreiawsrtadbrmno7guioh75ivlgmiscbvocsoz4voc5zl2n62c3va4m
unnormalized_address IPFS link: https://ipfs.io/ipfs/bafkreig45uumuublszfnm2w267jbxsurrhuivd2mnmilact4fyutpal4de


In [None]:
# @title Step 5: Submit

! pip3 install python-dotenv -q

from dotenv import load_dotenv
load_dotenv()

import subprocess
import sys
import csv


def get_transaction_hash(path="transaction-status.csv"):
    with open(path, newline='', encoding='utf-8') as csvfile:
        reader = csv.DictReader(csvfile)
        first_row = next(reader, None)
        if first_row is None:
            raise ValueError("CSV file is empty")
        return first_row["transactionHash"]


def has_submit_errors(path="submit_errors.csv"):
    with open(path, newline='', encoding='utf-8') as csvfile:
        reader = csv.DictReader(csvfile)
        return next(reader, None) is not None


def run_submit_to_contract():
    try:
        subprocess.run(
            [
                "npx", "-y", "@elephant-xyz/cli", "submit-to-contract", "seed-results.csv",
                "--from-address", "0xefAd08946612A15d5De8D4Db7fc03556b6424075",
                "--api-key", "f7e18cf6-5d07-4e4a-ae23-f27b812614e6",
                "--domain", "oracles-69c46050.staircaseapi.com",
                "--oracle-key-id", "7ad26e0b-67c9-4c2f-95a2-2792c7db5ac7",
            ],
            stdout=subprocess.DEVNULL,
            stderr=subprocess.PIPE,
            check=True,
            text=True,
        )
        if has_submit_errors():
            print("❌ Submit failed, please check submit_errors.csv for details", file=sys.stderr)
            return

        transaction_hash = get_transaction_hash()
        transaction_link = f"https://polygonscan.com/tx/{transaction_hash}"

        print("✅ Submit done\n")
        print(f"Transaction link: {transaction_link}")

    except subprocess.CalledProcessError as e:
        print(f"Command failed (exit code {e.returncode}):", file=sys.stderr)
        print(e.stderr.strip(), file=sys.stderr)
        sys.exit(e.returncode)


if __name__ == "__main__":
    run_submit_to_contract()


✅ Submit done

Transaction link: https://polygonscan.com/tx/0x25d6e386ea5a231a9c5a9d30e2193164fe196bdd3c00f35ec8c0d19ac3a9386e


In [None]:
# @title Step 6: Download seed-results.csv