## 1. SILVER LOGS

In [1]:
# SCHEMA_REGISTRY_URL = "http://schema-registry:8081"
# sc_client = SchemaRegistryUtils.get_schema_registry_client(SCHEMA_REGISTRY_URL)
# topic_logs = "mainnet.application.logs-value"
# schema_logs = SchemaRegistryUtils.get_avro_schema(sc_client, topic_logs)
# print(schema_logs)

# df.select("topic").distinct().show(20, True)
# df.filter(col("topic") == "mainnet.application.logs").show()

## 2. SILVER BLOCKS_METADATA

In [2]:
import os
from utils.spark import get_spark_session
import os

from pyspark.sql.functions import col, expr, split, window, count, max
from pyspark.sql.avro.functions import from_avro
from pyspark.sql.types import *

from utils.schema_registry_utils import SchemaRegistryUtils


class SilverBlocks:
   
  def __init__(self, spark, silver_tbl_name):
    self.spark = spark
    self.silver_tbl_name = silver_tbl_name


  def create_table(self):
    self.spark.sql(f"""
    CREATE TABLE IF NOT EXISTS {self.silver_tbl_name} (
    key binary,
    value binary,
    partition int,
    offset long,
    timestamp timestamp,
    topic string) 
    USING ICEBERG PARTITIONED BY (topic)
    """).show()
    spark.table(self.silver_tbl_name).printSchema()
    
  def extract_data(self, bronze_src_tbl):
    schema = StructType([
        StructField('key', BinaryType(), True),
        StructField('value', BinaryType(), True),
        StructField('partition', IntegerType(), True),
        StructField('offset', LongType(), True),
        StructField('timestamp', TimestampType(), True),
        StructField('topic', StringType(), True)]
    )
    df_extracted = (
      self.spark
        .readStream
        .format("iceberg")
        .option("streaming-skip-delete-snapshots","true")
        .option("schema", schema)
        .table(bronze_src_tbl)
        .filter(col("topic") == "mainnet.mined.txs.token.transfer")
        .select("key","value","partition","offset","timestamp","topic"))
    return df_extracted

    
  def transform_data(self, df_extracted):   
    df_transformed = (
        df_blocks
        .select(
          col("timestamp").alias("kafka_timestamp"),
          from_avro(expr("substring(value, 6)"), schema_blocks).alias("data"))
        .select("kafka_timestamp", "data.*")
    )
    return df_transformed

    
  def load_data_to_console(self, df_transformed):
    query = (
      df_transformed
        .writeStream
        .outputMode("append")
        .format("console")
        .start())
    return query

  def load_data_to_bronze(self, df_transformed):
    query = (
      df_transformed
        .writeStream
        .format("iceberg")
        .outputMode("append")
        .option("checkpointLocation", "s3a://sistemas/checkpoints/silver")
        .toTable(self.silver_tbl_name))
    return query


os.environ["SPARK_MASTER_URL"] = "spark://spark-master:7077"
SCHEMA_REGISTRY_URL = "http://schema-registry:8081"
sc_client = SchemaRegistryUtils.get_schema_registry_client(SCHEMA_REGISTRY_URL)
topic_blocks = "mainnet.mined.block.metadata-value"
schema_blocks = SchemaRegistryUtils.get_avro_schema(sc_client, topic_blocks)
print(schema_blocks)

spark = get_spark_session("silver_blocks")

silver_blocks = 'nessie.blocks'
bronze_src_tbl = "nessie.test"
print(spark.table(bronze_src_tbl).schema)
spark.table(silver_blocks).show()




{"type":"record","name":"BlockClock","namespace":"io.onchain.streamtxs.avro","fields":[{"name":"number","type":"long"},{"name":"timestamp","type":"long"},{"name":"hash","type":"string"},{"name":"parentHash","type":"string"},{"name":"difficulty","type":"long"},{"name":"totalDifficulty","type":"string"},{"name":"nonce","type":"string"},{"name":"size","type":"long"},{"name":"miner","type":"string"},{"name":"baseFeePerGas","type":"long"},{"name":"gasLimit","type":"long"},{"name":"gasUsed","type":"long"},{"name":"logsBloom","type":"string"},{"name":"extraData","type":"string"},{"name":"transactionsRoot","type":"string"},{"name":"stateRoot","type":"string"},{"name":"transactions","type":{"type":"array","items":"string"}},{"name":"withdrawals","type":{"type":"array","items":{"type":"record","name":"Withdrawal","fields":[{"name":"index","type":"long"},{"name":"validatorIndex","type":"long"},{"name":"address","type":"string"},{"name":"amount","type":"long"}]}}}]}
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA

Ivy Default Cache set to: /root/.ivy2/cache
The jars for the packages stored in: /root/.ivy2/jars
org.apache.iceberg#iceberg-spark-runtime-3.5_2.12 added as a dependency
org.projectnessie.nessie-integrations#nessie-spark-extensions-3.5_2.12 added as a dependency
software.amazon.awssdk#bundle added as a dependency
software.amazon.awssdk#url-connection-client added as a dependency
org.apache.spark#spark-sql-kafka-0-10_2.12 added as a dependency
org.apache.spark#spark-avro_2.12 added as a dependency
:: resolving dependencies :: org.apache.spark#spark-submit-parent-5a131c8e-27e0-41bb-89e4-b02e554a0025;1.0
	confs: [default]
	found org.apache.iceberg#iceberg-spark-runtime-3.5_2.12;1.6.1 in central
	found org.projectnessie.nessie-integrations#nessie-spark-extensions-3.5_2.12;0.95.0 in central
	found software.amazon.awssdk#bundle;2.17.178 in central
	found software.amazon.eventstream#eventstream;1.0.1 in central
	found software.amazon.awssdk#url-connection-client;2.17.178 in central
	found sof

StructType([StructField('key', BinaryType(), True), StructField('value', BinaryType(), True), StructField('partition', IntegerType(), True), StructField('offset', LongType(), True), StructField('timestamp', TimestampType(), True), StructField('topic', StringType(), True)])
+---+-----+---------+------+---------+-----+
|key|value|partition|offset|timestamp|topic|
+---+-----+---------+------+---------+-----+
+---+-----+---------+------+---------+-----+



IOPub data rate exceeded.                                                       
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_data_rate_limit`.

Current values:
ServerApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
ServerApp.rate_limit_window=3.0 (secs)



## 3. SILVER TRANSACTIONS_DATA

In [3]:
engine = SilverBlocks(spark, silver_blocks)
df_extracted = engine.extract_data(bronze_src_tbl)

#query = engine.load_data_to_bronze(df_extracted)
query = engine.load_data_to_console(df_extracted)
query.awaitTermination()

In [4]:
# SCHEMA_REGISTRY_URL = "http://schema-registry:8081"
# sc_client = SchemaRegistryUtils.get_schema_registry_client(SCHEMA_REGISTRY_URL)
# topic_txs = "mainnet.mined.txs.contract.call-value"
# schema_txs = SchemaRegistryUtils.get_avro_schema(sc_client, topic_txs)
# print(schema_txs)

In [5]:
#spark.stop()

In [7]:
spark.table(bronze_src_tbl).select("topic").distinct().show(20, False)

[Stage 7:>                                                          (0 + 1) / 1]

+--------------------------------+
|topic                           |
+--------------------------------+
|mainnet.mined.txs.token.transfer|
|mainnet.mined.block.metadata    |
+--------------------------------+



                                                                                