From f711248b97d30e89ea42f8885ea0ab2a406891c6 Mon Sep 17 00:00:00 2001 From: Jake Loo <2171134+jakeloo@users.noreply.github.com> Date: Sat, 13 Sep 2025 03:51:02 -0400 Subject: [PATCH 1/4] Update Schema (#290) * Update mview schema * Fix projection --- .../0008_clickhouse_create_token_balances.sql | 20 +++++----- ...09_clickhouse_create_token_balances_mv.sql | 37 ++++++++++++------- 2 files changed, 34 insertions(+), 23 deletions(-) diff --git a/internal/tools/clickhouse/0008_clickhouse_create_token_balances.sql b/internal/tools/clickhouse/0008_clickhouse_create_token_balances.sql index 49444f1c..522d12fd 100644 --- a/internal/tools/clickhouse/0008_clickhouse_create_token_balances.sql +++ b/internal/tools/clickhouse/0008_clickhouse_create_token_balances.sql @@ -31,11 +31,11 @@ CREATE TABLE IF NOT EXISTS token_balances owner_address, token_address, token_id, - sumState(balance_delta * if(is_deleted = 0, 1, -1)) AS balance_state, - minState(block_number) AS min_block_number_state, - minState(block_timestamp) AS min_block_timestamp_state, - maxState(block_number) AS max_block_number_state, - maxState(block_timestamp) AS max_block_timestamp_state + sum(balance_delta * if(is_deleted = 0, 1, -1)) AS balance_state, + min(block_number) AS min_block_number_state, + min(block_timestamp) AS min_block_timestamp_state, + max(block_number) AS max_block_number_state, + max(block_timestamp) AS max_block_timestamp_state GROUP BY chain_id, owner_address, token_address, token_id ), @@ -46,11 +46,11 @@ CREATE TABLE IF NOT EXISTS token_balances token_address, token_id, owner_address, - sumState(balance_delta * if(is_deleted = 0, 1, -1)) AS balance_state, - minState(block_number) AS min_block_number_state, - minState(block_timestamp) AS min_block_timestamp_state, - maxState(block_number) AS max_block_number_state, - maxState(block_timestamp) AS max_block_timestamp_state + sum(balance_delta * if(is_deleted = 0, 1, -1)) AS balance_state, + min(block_number) AS min_block_number_state, + min(block_timestamp) AS min_block_timestamp_state, + max(block_number) AS max_block_number_state, + max(block_timestamp) AS max_block_timestamp_state GROUP BY chain_id, token_address, token_id, owner_address ), diff --git a/internal/tools/clickhouse/0009_clickhouse_create_token_balances_mv.sql b/internal/tools/clickhouse/0009_clickhouse_create_token_balances_mv.sql index 63e523e1..933f6d4a 100644 --- a/internal/tools/clickhouse/0009_clickhouse_create_token_balances_mv.sql +++ b/internal/tools/clickhouse/0009_clickhouse_create_token_balances_mv.sql @@ -1,8 +1,7 @@ -- ERC20 -CREATE MATERIALIZED VIEW IF NOT EXISTS token_balances_erc20_mv +CREATE MATERIALIZED VIEW IF NOT EXISTS token_balances_erc20_from_mv TO token_balances AS --- FROM side (outgoing, negative delta) SELECT chain_id, token_type, @@ -19,8 +18,11 @@ SELECT insert_timestamp, is_deleted FROM token_transfers -WHERE token_type = 'erc20' -UNION ALL +WHERE token_type = 'erc20'; + +CREATE MATERIALIZED VIEW IF NOT EXISTS token_balances_erc20_to_mv +TO token_balances +AS -- TO side (incoming, positive delta) SELECT chain_id, @@ -41,7 +43,7 @@ FROM token_transfers WHERE token_type = 'erc20'; -- ERC721 -CREATE MATERIALIZED VIEW IF NOT EXISTS token_balances_erc721_mv +CREATE MATERIALIZED VIEW IF NOT EXISTS token_balances_erc721_from_mv TO token_balances AS SELECT @@ -60,8 +62,11 @@ SELECT insert_timestamp, is_deleted FROM token_transfers -WHERE token_type = 'erc721' -UNION ALL +WHERE token_type = 'erc721'; + +CREATE MATERIALIZED VIEW IF NOT EXISTS token_balances_erc721_to_mv +TO token_balances +AS SELECT chain_id, token_type, @@ -81,7 +86,7 @@ FROM token_transfers WHERE token_type = 'erc721'; -- ERC1155 -CREATE MATERIALIZED VIEW IF NOT EXISTS token_balances_erc1155_mv +CREATE MATERIALIZED VIEW IF NOT EXISTS token_balances_erc1155_from_mv TO token_balances AS SELECT @@ -100,8 +105,11 @@ SELECT insert_timestamp, is_deleted FROM token_transfers -WHERE token_type = 'erc1155' -UNION ALL +WHERE token_type = 'erc1155'; + +CREATE MATERIALIZED VIEW IF NOT EXISTS token_balances_erc1155_to_mv +TO token_balances +AS SELECT chain_id, token_type, @@ -121,7 +129,7 @@ FROM token_transfers WHERE token_type = 'erc1155'; -- ERC6909 -CREATE MATERIALIZED VIEW IF NOT EXISTS token_balances_erc6909_mv +CREATE MATERIALIZED VIEW IF NOT EXISTS token_balances_erc6909_from_mv TO token_balances AS SELECT @@ -140,8 +148,11 @@ SELECT insert_timestamp, is_deleted FROM token_transfers -WHERE token_type = 'erc6909' -UNION ALL +WHERE token_type = 'erc6909'; + +CREATE MATERIALIZED VIEW IF NOT EXISTS token_balances_erc6909_to_mv +TO token_balances +AS SELECT chain_id, token_type, From f66f78218343038721468325e9adbbba9b23c02f Mon Sep 17 00:00:00 2001 From: Jake Loo <2171134+jakeloo@users.noreply.github.com> Date: Wed, 17 Sep 2025 17:21:47 -0400 Subject: [PATCH 2/4] Partition token_balances (#291) * Partition token_balances * Index block number * Update schema --- .../0002_clickhouse_create_logs_table.sql | 15 +++++++++++---- .../0006_clickhouse_create_token_transfers.sql | 1 + .../0008_clickhouse_create_token_balances.sql | 3 ++- 3 files changed, 14 insertions(+), 5 deletions(-) diff --git a/internal/tools/clickhouse/0002_clickhouse_create_logs_table.sql b/internal/tools/clickhouse/0002_clickhouse_create_logs_table.sql index d4e202cc..44e598ee 100644 --- a/internal/tools/clickhouse/0002_clickhouse_create_logs_table.sql +++ b/internal/tools/clickhouse/0002_clickhouse_create_logs_table.sql @@ -37,16 +37,23 @@ CREATE TABLE IF NOT EXISTS logs ( transaction_index, log_index ), - PROJECTION chain_topic0_projection + PROJECTION chain_address_block_number_full_projection + ( + SELECT + * + ORDER BY + chain_id, + address, + block_number + ), + PROJECTION chain_topic0_full_projection ( SELECT - _part_offset + * ORDER BY chain_id, topic_0, block_number, - transaction_index, - log_index, address ), PROJECTION address_topic0_state_projection diff --git a/internal/tools/clickhouse/0006_clickhouse_create_token_transfers.sql b/internal/tools/clickhouse/0006_clickhouse_create_token_transfers.sql index edb92cbe..4eb1c594 100644 --- a/internal/tools/clickhouse/0006_clickhouse_create_token_transfers.sql +++ b/internal/tools/clickhouse/0006_clickhouse_create_token_transfers.sql @@ -17,6 +17,7 @@ CREATE TABLE IF NOT EXISTS token_transfers `insert_timestamp` DateTime DEFAULT now(), `is_deleted` UInt8 DEFAULT 0, + INDEX idx_block_number block_number TYPE minmax GRANULARITY 1, INDEX idx_block_timestamp block_timestamp TYPE minmax GRANULARITY 1, INDEX idx_from_address from_address TYPE bloom_filter GRANULARITY 3, INDEX idx_to_address to_address TYPE bloom_filter GRANULARITY 3, diff --git a/internal/tools/clickhouse/0008_clickhouse_create_token_balances.sql b/internal/tools/clickhouse/0008_clickhouse_create_token_balances.sql index 522d12fd..ecf58e80 100644 --- a/internal/tools/clickhouse/0008_clickhouse_create_token_balances.sql +++ b/internal/tools/clickhouse/0008_clickhouse_create_token_balances.sql @@ -20,6 +20,7 @@ CREATE TABLE IF NOT EXISTS token_balances `insert_timestamp` DateTime DEFAULT now(), `is_deleted` UInt8 DEFAULT 0, + INDEX idx_block_number block_number TYPE minmax GRANULARITY 1, INDEX idx_block_timestamp block_timestamp TYPE minmax GRANULARITY 1, INDEX idx_token_address token_address TYPE bloom_filter GRANULARITY 3, INDEX idx_owner_address owner_address TYPE bloom_filter GRANULARITY 3, @@ -62,6 +63,6 @@ CREATE TABLE IF NOT EXISTS token_balances ) ) ENGINE = ReplacingMergeTree(insert_timestamp, is_deleted) -PARTITION BY chain_id +PARTITION BY (chain_id, toStartOfQuarter(block_timestamp)) ORDER BY (chain_id, owner_address, token_address, token_id, block_number, transaction_index, log_index, direction) SETTINGS index_granularity = 8192, lightweight_mutation_projection_mode = 'rebuild', deduplicate_merge_projection_mode = 'rebuild', allow_part_offset_column_in_projections=1; \ No newline at end of file From d26b9b8b19c14af377c7b7a77cea15805239f40b Mon Sep 17 00:00:00 2001 From: nischit Date: Mon, 27 Oct 2025 13:45:26 +0545 Subject: [PATCH 3/4] removed old code --- cmd/api.go | 139 -- cmd/migrate_valid.go | 359 ---- cmd/orchestrator.go | 50 - cmd/root.go | 401 ---- cmd/validate.go | 77 - cmd/validate_and_fix.go | 152 -- configs/config.go | 344 +-- configs/test_config.yml | 78 - internal/common/abi.go | 271 --- internal/common/balances.go | 14 - internal/common/block.go | 78 - internal/common/log.go | 248 --- internal/common/log_test.go | 43 - internal/common/set.go | 42 - internal/common/trace.go | 49 - internal/common/transaction.go | 202 -- internal/common/transaction_test.go | 49 - internal/common/transfers.go | 22 - internal/handlers/blocks_handlers.go | 123 -- internal/handlers/logs_handlers.go | 255 --- internal/handlers/search_handlers.go | 488 ----- internal/handlers/search_handlers_test.go | 302 --- internal/handlers/token_handlers.go | 446 ---- internal/handlers/transactions_handlers.go | 257 --- internal/handlers/transfer_handlers.go | 199 -- internal/libs/libblockdata/validator.go | 4 +- internal/metrics/metrics.go | 176 -- internal/middleware/authorization.go | 36 - internal/middleware/cors.go | 18 - internal/middleware/logger.go | 50 - internal/orchestrator/chain_tracker.go | 53 - internal/orchestrator/committer.go | 531 ----- internal/orchestrator/committer_test.go | 1 - internal/orchestrator/orchestrator.go | 132 -- internal/orchestrator/poller.go | 501 ----- internal/orchestrator/poller_test.go | 13 - internal/orchestrator/reorg_handler.go | 294 --- internal/orchestrator/reorg_handler_test.go | 13 - internal/orchestrator/validator.go | 234 --- internal/orchestrator/work_mode_monitor.go | 160 -- internal/publisher/publisher.go | 410 ---- internal/source/s3.go | 1003 --------- internal/source/source.go | 14 - internal/source/staging.go | 65 - internal/storage/badger.go | 645 ------ internal/storage/block_buffer.go | 282 --- internal/storage/block_buffer_badger.go | 477 ----- internal/storage/block_buffer_badger_test.go | 144 -- internal/storage/block_buffer_pebble.go | 495 ----- internal/storage/clickhouse.go | 1836 ----------------- internal/storage/clickhouse_connector_test.go | 277 --- internal/storage/connector.go | 377 ---- internal/storage/kafka.go | 137 -- internal/storage/pebble.go | 698 ------- internal/storage/postgres.go | 295 --- internal/storage/postgres_connector_test.go | 124 -- internal/storage/redis.go | 134 -- internal/storage/s3.go | 1174 ----------- .../0000_clickhouse_create_blocks_table.sql | 49 - ...1_clickhouse_create_transactions_table.sql | 98 - .../0002_clickhouse_create_logs_table.sql | 78 - .../0003_clickhouse_create_traces_table.sql | 58 - ...04_clickhouse_create_insert_null_table.sql | 108 - .../0005_clickhouse_create_insert_data_mv.sql | 122 -- ...0006_clickhouse_create_token_transfers.sql | 115 -- .../0008_clickhouse_create_token_balances.sql | 68 - ...09_clickhouse_create_token_balances_mv.sql | 172 -- ...clickhouse_create_address_transactions.sql | 63 - ...ckhouse_create_address_transactions_mv.sql | 44 - ...12_clickhouse_create_address_transfers.sql | 67 - ...clickhouse_create_address_transfers_mv.sql | 24 - ...house_create_insert_token_transfers_mv.sql | 20 - ...0000_clickhouse_backfill_logs_transfer.sql | 202 -- .../0010_clickhouse_inserts_null_table_v1.sql | 109 - ...11_clickhouse_inserts_null_table_v1_mv.sql | 122 -- internal/tools/postgres/postgres_schema.sql | 52 - internal/validation/cursor.go | 72 - internal/validation/duplicates.go | 246 --- internal/worker/worker.go | 449 ---- test/mocks/MockIMainStorage.go | 1090 ---------- test/mocks/MockIStagingStorage.go | 304 --- 81 files changed, 38 insertions(+), 19185 deletions(-) delete mode 100644 cmd/api.go delete mode 100644 cmd/migrate_valid.go delete mode 100644 cmd/orchestrator.go delete mode 100644 cmd/validate.go delete mode 100644 cmd/validate_and_fix.go delete mode 100644 configs/test_config.yml delete mode 100644 internal/common/abi.go delete mode 100644 internal/common/balances.go delete mode 100644 internal/common/log_test.go delete mode 100644 internal/common/set.go delete mode 100644 internal/common/transaction_test.go delete mode 100644 internal/common/transfers.go delete mode 100644 internal/handlers/blocks_handlers.go delete mode 100644 internal/handlers/logs_handlers.go delete mode 100644 internal/handlers/search_handlers.go delete mode 100644 internal/handlers/search_handlers_test.go delete mode 100644 internal/handlers/token_handlers.go delete mode 100644 internal/handlers/transactions_handlers.go delete mode 100644 internal/handlers/transfer_handlers.go delete mode 100644 internal/middleware/authorization.go delete mode 100644 internal/middleware/cors.go delete mode 100644 internal/middleware/logger.go delete mode 100644 internal/orchestrator/chain_tracker.go delete mode 100644 internal/orchestrator/committer.go delete mode 100644 internal/orchestrator/committer_test.go delete mode 100644 internal/orchestrator/orchestrator.go delete mode 100644 internal/orchestrator/poller.go delete mode 100644 internal/orchestrator/poller_test.go delete mode 100644 internal/orchestrator/reorg_handler.go delete mode 100644 internal/orchestrator/reorg_handler_test.go delete mode 100644 internal/orchestrator/validator.go delete mode 100644 internal/orchestrator/work_mode_monitor.go delete mode 100644 internal/publisher/publisher.go delete mode 100644 internal/source/s3.go delete mode 100644 internal/source/source.go delete mode 100644 internal/source/staging.go delete mode 100644 internal/storage/badger.go delete mode 100644 internal/storage/block_buffer.go delete mode 100644 internal/storage/block_buffer_badger.go delete mode 100644 internal/storage/block_buffer_badger_test.go delete mode 100644 internal/storage/block_buffer_pebble.go delete mode 100644 internal/storage/clickhouse.go delete mode 100644 internal/storage/clickhouse_connector_test.go delete mode 100644 internal/storage/connector.go delete mode 100644 internal/storage/kafka.go delete mode 100644 internal/storage/pebble.go delete mode 100644 internal/storage/postgres.go delete mode 100644 internal/storage/postgres_connector_test.go delete mode 100644 internal/storage/redis.go delete mode 100644 internal/storage/s3.go delete mode 100644 internal/tools/clickhouse/0000_clickhouse_create_blocks_table.sql delete mode 100644 internal/tools/clickhouse/0001_clickhouse_create_transactions_table.sql delete mode 100644 internal/tools/clickhouse/0002_clickhouse_create_logs_table.sql delete mode 100644 internal/tools/clickhouse/0003_clickhouse_create_traces_table.sql delete mode 100644 internal/tools/clickhouse/0004_clickhouse_create_insert_null_table.sql delete mode 100644 internal/tools/clickhouse/0005_clickhouse_create_insert_data_mv.sql delete mode 100644 internal/tools/clickhouse/0006_clickhouse_create_token_transfers.sql delete mode 100644 internal/tools/clickhouse/0008_clickhouse_create_token_balances.sql delete mode 100644 internal/tools/clickhouse/0009_clickhouse_create_token_balances_mv.sql delete mode 100644 internal/tools/clickhouse/0010_clickhouse_create_address_transactions.sql delete mode 100644 internal/tools/clickhouse/0011_clickhouse_create_address_transactions_mv.sql delete mode 100644 internal/tools/clickhouse/0012_clickhouse_create_address_transfers.sql delete mode 100644 internal/tools/clickhouse/0013_clickhouse_create_address_transfers_mv.sql delete mode 100644 internal/tools/clickhouse/0014_clickhouse_create_insert_token_transfers_mv.sql delete mode 100644 internal/tools/clickhouse_opts/0000_clickhouse_backfill_logs_transfer.sql delete mode 100644 internal/tools/clickhouse_opts/0010_clickhouse_inserts_null_table_v1.sql delete mode 100644 internal/tools/clickhouse_opts/0011_clickhouse_inserts_null_table_v1_mv.sql delete mode 100644 internal/tools/postgres/postgres_schema.sql delete mode 100644 internal/validation/cursor.go delete mode 100644 internal/validation/duplicates.go delete mode 100644 internal/worker/worker.go delete mode 100644 test/mocks/MockIMainStorage.go delete mode 100644 test/mocks/MockIStagingStorage.go diff --git a/cmd/api.go b/cmd/api.go deleted file mode 100644 index 4254e7ed..00000000 --- a/cmd/api.go +++ /dev/null @@ -1,139 +0,0 @@ -package cmd - -import ( - "context" - "net/http" - "os/signal" - "syscall" - "time" - - "github.com/gin-gonic/gin" - "github.com/rs/zerolog/log" - "github.com/spf13/cobra" - swaggerFiles "github.com/swaggo/files" - ginSwagger "github.com/swaggo/gin-swagger" - "github.com/swaggo/swag" - - "github.com/thirdweb-dev/indexer/internal/handlers" - "github.com/thirdweb-dev/indexer/internal/middleware" - - // Import the generated Swagger docs - config "github.com/thirdweb-dev/indexer/configs" - "github.com/thirdweb-dev/indexer/docs" -) - -var ( - apiCmd = &cobra.Command{ - Use: "api", - Short: "TBD", - Long: "TBD", - Run: func(cmd *cobra.Command, args []string) { - RunApi(cmd, args) - }, - } -) - -// @title Thirdweb Insight -// @version v0.0.1-beta -// @description API for querying blockchain transactions and events -// @license.name Apache 2.0 -// @license.url https://github.com/thirdweb-dev/indexer/blob/main/LICENSE -// @BasePath / -// @Security BasicAuth -// @securityDefinitions.basic BasicAuth -func RunApi(cmd *cobra.Command, args []string) { - docs.SwaggerInfo.Host = config.Cfg.API.Host - - ctx, stop := signal.NotifyContext(context.Background(), syscall.SIGINT, syscall.SIGTERM) - defer stop() - - r := gin.New() - r.Use(middleware.Logger()) - r.Use(gin.Recovery()) - - // Add Swagger route - r.GET("/swagger/*any", ginSwagger.WrapHandler(swaggerFiles.Handler)) - // Add Swagger JSON endpoint - r.GET("/openapi.json", func(c *gin.Context) { - doc, err := swag.ReadDoc() - if err != nil { - log.Error().Err(err).Msg("Failed to read Swagger documentation") - c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to provide Swagger documentation"}) - return - } - c.Header("Content-Type", "application/json") - c.String(http.StatusOK, doc) - }) - - root := r.Group("/:chainId") - { - root.Use(middleware.Authorization) - root.Use(middleware.Cors) - // wildcard queries - root.GET("/transactions", handlers.GetTransactions) - root.GET("/events", handlers.GetLogs) - root.GET("/wallet-transactions/:wallet_address", handlers.GetWalletTransactions) - - // contract scoped queries - root.GET("/transactions/:to", handlers.GetTransactionsByContract) - root.GET("/events/:contract", handlers.GetLogsByContract) - - // signature scoped queries - root.GET("/transactions/:to/:signature", handlers.GetTransactionsByContractAndSignature) - root.GET("/events/:contract/:signature", handlers.GetLogsByContractAndSignature) - - // blocks table queries - root.GET("/blocks", handlers.GetBlocks) - - // token balance queries - root.GET("/balances/:owner/:type", handlers.GetTokenBalancesByType) - - root.GET("/balances/:owner", handlers.GetTokenBalancesByType) - - // token holder queries - root.GET("/holders/:address", handlers.GetTokenHoldersByType) - - // token transfers queries - root.GET("/transfers", handlers.GetTokenTransfers) - // token ID queries - root.GET("/tokens/:address", handlers.GetTokenIdsByType) - - // search - root.GET("/search/:input", handlers.Search) - } - - r.GET("/health", func(c *gin.Context) { - // TODO: implement a simple query before going live - c.String(http.StatusOK, "ok") - }) - - srv := &http.Server{ - Addr: ":3000", - Handler: r, - } - - // Initializing the server in a goroutine so that - // it won't block the graceful shutdown handling below - go func() { - if err := srv.ListenAndServe(); err != nil && err != http.ErrServerClosed { - log.Fatal().Err(err).Msg("listen: %s\n") - } - }() - - // Listen for the interrupt signal. - <-ctx.Done() - - // Restore default behavior on the interrupt signal and notify user of shutdown. - stop() - log.Info().Msg("shutting down API gracefully") - - // The context is used to inform the server it has 5 seconds to finish - // the request it is currently handling - ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) - defer cancel() - if err := srv.Shutdown(ctx); err != nil { - log.Fatal().Err(err).Msg("API server forced to shutdown") - } - - log.Info().Msg("API server exiting") -} diff --git a/cmd/migrate_valid.go b/cmd/migrate_valid.go deleted file mode 100644 index 8252683f..00000000 --- a/cmd/migrate_valid.go +++ /dev/null @@ -1,359 +0,0 @@ -package cmd - -import ( - "context" - "fmt" - "math/big" - "time" - - "github.com/rs/zerolog/log" - "github.com/spf13/cobra" - config "github.com/thirdweb-dev/indexer/configs" - "github.com/thirdweb-dev/indexer/internal/common" - "github.com/thirdweb-dev/indexer/internal/orchestrator" - "github.com/thirdweb-dev/indexer/internal/rpc" - "github.com/thirdweb-dev/indexer/internal/storage" - "github.com/thirdweb-dev/indexer/internal/worker" -) - -var ( - migrateValidationCmd = &cobra.Command{ - Use: "validationMigration", - Short: "Migrate valid block data from main storage to target storage", - Long: "Migrate valid blocks, logs, transactions, traces, etc. to target storage. It will query current data from main storage and validate it. Anything missing or not passing validation will be queried from the RPC.", - Run: func(cmd *cobra.Command, args []string) { - RunValidationMigration(cmd, args) - }, - } -) - -const ( - DEFAULT_BATCH_SIZE = 2000 - DEFAULT_WORKERS = 1 -) - -func RunValidationMigration(cmd *cobra.Command, args []string) { - ctx := context.Background() - - migrator := NewMigrator() - defer migrator.Close() - - targetEndBlock := big.NewInt(int64(config.Cfg.Migrator.EndBlock)) - targetStartBlock := big.NewInt(int64(config.Cfg.Migrator.StartBlock)) - rangeStartBlock, rangeEndBlock := migrator.DetermineMigrationBoundaries(targetStartBlock, targetEndBlock) - - log.Info().Msgf("Migrating blocks from %s to %s (both ends inclusive)", rangeStartBlock.String(), rangeEndBlock.String()) - - log.Info().Msg("Starting migration") - - // Process the entire range in a single thread - if err := processBlockRange(ctx, migrator, rangeStartBlock, rangeEndBlock); err != nil { - log.Error().Err(err).Msg("Migration failed") - log.Fatal().Msg("Migration stopped due to error") - } - - log.Info().Msg("Migration completed successfully") -} - -func processBlockRange(ctx context.Context, migrator *Migrator, startBlock, endBlock *big.Int) error { - currentBlock := new(big.Int).Set(startBlock) - - for currentBlock.Cmp(endBlock) <= 0 { - batchStartTime := time.Now() - - // Check for cancellation - select { - case <-ctx.Done(): - log.Info().Msgf("Migration interrupted at block %s", currentBlock.String()) - return nil - default: - } - - batchEndBlock := new(big.Int).Add(currentBlock, big.NewInt(int64(migrator.batchSize-1))) - if batchEndBlock.Cmp(endBlock) > 0 { - batchEndBlock = endBlock - } - - blockNumbers := generateBlockNumbersForRange(currentBlock, batchEndBlock) - - // Fetch valid blocks from source - fetchStartTime := time.Now() - validBlocksForRange, err := migrator.GetValidBlocksForRange(blockNumbers) - fetchDuration := time.Since(fetchStartTime) - if err != nil { - // If we got an error fetching valid blocks, we'll continue - log.Error().Err(err).Msg("Failed to get valid blocks for range") - time.Sleep(3 * time.Second) - continue - } - - // Build map of fetched blocks - mapBuildStartTime := time.Now() - blocksToInsertMap := make(map[string]common.BlockData) - for _, blockData := range validBlocksForRange { - blocksToInsertMap[blockData.Block.Number.String()] = blockData - } - - // Loop over block numbers to find missing blocks - missingBlocks := make([]*big.Int, 0) - for _, blockNum := range blockNumbers { - if _, exists := blocksToInsertMap[blockNum.String()]; !exists { - missingBlocks = append(missingBlocks, blockNum) - } - } - mapBuildDuration := time.Since(mapBuildStartTime) - - // Fetch missing blocks from RPC - if len(missingBlocks) > 0 { - log.Debug().Dur("duration", mapBuildDuration).Int("missing_blocks", len(missingBlocks)).Msg("Identified missing blocks") - - rpcFetchStartTime := time.Now() - validMissingBlocks := migrator.GetValidBlocksFromRPC(missingBlocks) - rpcFetchDuration := time.Since(rpcFetchStartTime) - log.Debug().Dur("duration", rpcFetchDuration).Int("blocks_fetched", len(validMissingBlocks)).Msg("Fetched missing blocks from RPC") - - for _, blockData := range validMissingBlocks { - if blockData.Block.ChainId.Sign() == 0 { - return fmt.Errorf("block %s has chain ID 0", blockData.Block.Number.String()) - } - blocksToInsertMap[blockData.Block.Number.String()] = blockData - } - } - - // Prepare blocks for insertion - blocksToInsert := make([]common.BlockData, 0, len(blocksToInsertMap)) - for _, blockData := range blocksToInsertMap { - blocksToInsert = append(blocksToInsert, blockData) - } - - // Insert blocks to destination - insertStartTime := time.Now() - err = migrator.destination.InsertBlockData(blocksToInsert) - insertDuration := time.Since(insertStartTime) - if err != nil { - log.Error().Err(err).Dur("duration", insertDuration).Msg("Failed to insert blocks to target storage") - time.Sleep(3 * time.Second) - continue - } - - batchDuration := time.Since(batchStartTime) - log.Info(). - Dur("total_duration", batchDuration). - Dur("fetch_duration", fetchDuration). - Dur("insert_duration", insertDuration). - Int("blocks_processed", len(blocksToInsert)). - Str("start_block_number", blockNumbers[0].String()). - Str("end_block_number", blockNumbers[len(blockNumbers)-1].String()). - Msgf("Batch processed successfully for %s - %s", blockNumbers[0].String(), blockNumbers[len(blockNumbers)-1].String()) - - currentBlock = new(big.Int).Add(batchEndBlock, big.NewInt(1)) - } - - return nil -} - -type Migrator struct { - rpcClient rpc.IRPCClient - worker *worker.Worker - source storage.IStorage - destination storage.IMainStorage - validator *orchestrator.Validator - batchSize int -} - -func NewMigrator() *Migrator { - batchSize := DEFAULT_BATCH_SIZE - if config.Cfg.Migrator.BatchSize > 0 { - batchSize = int(config.Cfg.Migrator.BatchSize) - } - - rpcClient, err := rpc.Initialize() - if err != nil { - log.Fatal().Err(err).Msg("Failed to initialize RPC") - } - - sourceConnector, err := storage.NewStorageConnector(&config.Cfg.Storage) - if err != nil { - log.Fatal().Err(err).Msg("Failed to initialize storage") - } - - // check if chain was indexed with block receipts. If it was, then the current RPC must support block receipts - validRpc, err := validateRPC(rpcClient, sourceConnector) - if err != nil { - log.Fatal().Err(err).Msg("Failed to validate RPC") - } - if !validRpc { - log.Fatal().Msg("RPC does not support block receipts, but transactions were indexed with receipts") - } - - validator := orchestrator.NewValidator(rpcClient, sourceConnector, worker.NewWorker(rpcClient)) - - destinationConnector, err := storage.NewMainConnector(&config.Cfg.Migrator.Destination, &sourceConnector.OrchestratorStorage) - if err != nil { - log.Fatal().Err(err).Msg("Failed to initialize storage") - } - - return &Migrator{ - batchSize: batchSize, - rpcClient: rpcClient, - source: sourceConnector, - destination: destinationConnector, - validator: validator, - worker: worker.NewWorker(rpcClient), - } -} - -func (m *Migrator) Close() { - m.rpcClient.Close() - - if err := m.source.Close(); err != nil { - log.Fatal().Err(err).Msg("Failed to close source storage") - } - - if err := m.destination.Close(); err != nil { - log.Fatal().Err(err).Msg("Failed to close destination storage") - } -} - -func (m *Migrator) DetermineMigrationBoundaries(targetStartBlock, targetEndBlock *big.Int) (*big.Int, *big.Int) { - // get latest block from main storage - latestBlockStored, err := m.source.MainStorage.GetMaxBlockNumber(m.rpcClient.GetChainID()) - if err != nil { - log.Fatal().Err(err).Msg("Failed to get latest block from main storage") - } - latestBlockRPC, err := m.rpcClient.GetLatestBlockNumber(context.Background()) - if err != nil { - log.Fatal().Err(err).Msg("Failed to get latest block from RPC") - } - log.Info().Msgf("Latest block in main storage: %d", latestBlockStored) - - endBlock := latestBlockStored - if targetEndBlock.Sign() > 0 && targetEndBlock.Cmp(latestBlockRPC) <= 0 { - endBlock = targetEndBlock - } - if targetEndBlock.Uint64() == 0 { - endBlock = latestBlockRPC - } - - startBlock := targetStartBlock - - blockCount, err := m.destination.GetBlockCount(m.rpcClient.GetChainID(), startBlock, endBlock) - if err != nil { - log.Fatal().Err(err).Msg("Failed to get latest block from target storage") - } - log.Info().Msgf("Block count in the target storage for range %s to %s: count=%s", startBlock.String(), endBlock.String(), blockCount.String()) - - expectedCount := new(big.Int).Sub(endBlock, startBlock) - expectedCount = expectedCount.Add(expectedCount, big.NewInt(1)) - if expectedCount.Cmp(blockCount) == 0 { - log.Fatal().Msgf("Full range is already migrated") - return nil, nil - } - - maxStoredBlock, err := m.destination.GetMaxBlockNumberInRange(m.rpcClient.GetChainID(), startBlock, endBlock) - if err != nil { - log.Fatal().Err(err).Msg("Failed to get max block from destination storage") - return nil, nil - } - - log.Info().Msgf("Block in the target storage for range %s to %s: count=%s, max=%s", startBlock.String(), endBlock.String(), blockCount.String(), maxStoredBlock.String()) - // Only adjust start block if we actually have blocks stored (count > 0) - // When count is 0, maxStoredBlock might be 0 but that doesn't mean block 0 exists - if blockCount.Sign() > 0 && maxStoredBlock != nil && maxStoredBlock.Cmp(startBlock) >= 0 { - startBlock = new(big.Int).Add(maxStoredBlock, big.NewInt(1)) - } - - return startBlock, endBlock -} - -func (m *Migrator) FetchBlocksFromRPC(blockNumbers []*big.Int) ([]common.BlockData, error) { - allBlockData := make([]common.BlockData, 0, len(blockNumbers)) - - blockData := m.worker.Run(context.Background(), blockNumbers) - for _, block := range blockData { - if block.Error != nil { - return nil, block.Error - } - allBlockData = append(allBlockData, block.Data) - } - return allBlockData, nil -} - -func (m *Migrator) GetValidBlocksForRange(blockNumbers []*big.Int) ([]common.BlockData, error) { - getFullBlockTime := time.Now() - blockData, err := m.source.MainStorage.GetFullBlockData(m.rpcClient.GetChainID(), blockNumbers) - getFullBlockDuration := time.Since(getFullBlockTime) - if err != nil { - log.Error().Err(err).Msg("Failed to get full block data") - return nil, err - } - - validateBlockTime := time.Now() - validBlocks, _, err := m.validator.ValidateBlocks(blockData) - validateBlockDuration := time.Since(validateBlockTime) - if err != nil { - log.Error().Err(err).Msg("Failed to validate blocks") - return nil, err - } - - log.Debug().Dur("get_full_block", getFullBlockDuration).Dur("validate_block", validateBlockDuration).Int("count", len(blockNumbers)).Msg("Get valid blocks for range") - return validBlocks, nil -} - -func (m *Migrator) GetValidBlocksFromRPC(blockNumbers []*big.Int) []common.BlockData { - missingBlocksData, err := m.FetchBlocksFromRPC(blockNumbers) - if err != nil { - log.Fatal().Err(err).Msg("Failed to query missing blocks") - } - - validBlocks, invalidBlocks, err := m.validator.ValidateBlocks(missingBlocksData) - if err != nil { - log.Fatal().Err(err).Msg("Failed to validate missing blocks") - } - if len(invalidBlocks) > 0 { - log.Fatal().Msgf("Unable to validate %d newly queried missing blocks", len(invalidBlocks)) - } - return validBlocks -} - -func validateRPC(rpcClient rpc.IRPCClient, s storage.IStorage) (bool, error) { - if rpcClient.SupportsBlockReceipts() { - return true, nil - } - - // If rpc does not support block receipts, we need to check if the transactions are indexed with block receipts - transactionsQueryResult, err := s.MainStorage.GetTransactions(storage.QueryFilter{ - ChainId: rpcClient.GetChainID(), - Limit: 1, - }) - if err != nil { - log.Fatal().Err(err).Msg("Failed to get transactions from main storage") - } - if len(transactionsQueryResult.Data) == 0 { - log.Warn().Msg("No transactions found in main storage, assuming RPC is valid") - return true, nil - } - tx := transactionsQueryResult.Data[0] - if tx.GasUsed == nil { - // was indexed with logs not receipts and current rpc does not support block receipts - return true, nil - } - // was indexed with receipts and current rpc does not support block receipts - return false, nil -} - -func generateBlockNumbersForRange(startBlock, endBlock *big.Int) []*big.Int { - if startBlock.Cmp(endBlock) > 0 { - return []*big.Int{} - } - - // Pre-calculate capacity to avoid slice growth - length := new(big.Int).Sub(endBlock, startBlock) - length.Add(length, big.NewInt(1)) - - blockNumbers := make([]*big.Int, 0, length.Int64()) - for i := new(big.Int).Set(startBlock); i.Cmp(endBlock) <= 0; i.Add(i, big.NewInt(1)) { - blockNumbers = append(blockNumbers, new(big.Int).Set(i)) - } - return blockNumbers -} diff --git a/cmd/orchestrator.go b/cmd/orchestrator.go deleted file mode 100644 index 6d8a3576..00000000 --- a/cmd/orchestrator.go +++ /dev/null @@ -1,50 +0,0 @@ -package cmd - -import ( - "net/http" - - "github.com/prometheus/client_golang/prometheus/promhttp" - "github.com/rs/zerolog/log" - "github.com/spf13/cobra" - "github.com/thirdweb-dev/indexer/internal/orchestrator" - "github.com/thirdweb-dev/indexer/internal/rpc" -) - -var ( - orchestratorCmd = &cobra.Command{ - Use: "orchestrator", - Short: "TBD", - Long: "TBD", - Run: func(cmd *cobra.Command, args []string) { - RunOrchestrator(cmd, args) - }, - } -) - -func RunOrchestrator(cmd *cobra.Command, args []string) { - log.Info().Msg("Starting indexer") - rpc, err := rpc.Initialize() - if err != nil { - log.Fatal().Err(err).Msg("Failed to initialize RPC") - } - - orchestrator, err := orchestrator.NewOrchestrator(rpc) - if err != nil { - log.Fatal().Err(err).Msg("Failed to create orchestrator") - } - - // Start Prometheus metrics server - log.Info().Msg("Starting Metrics Server on port 2112") - go func() { - http.Handle("/metrics", promhttp.Handler()) - if err := http.ListenAndServe(":2112", nil); err != nil { - log.Error().Err(err).Msg("Metrics server error") - } - }() - - // Start orchestrator (blocks until shutdown) - // The orchestrator handles signals internally and coordinates shutdown - orchestrator.Start() - - log.Info().Msg("Shutdown complete") -} diff --git a/cmd/root.go b/cmd/root.go index 8d1cc0a2..5aea3b57 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -4,7 +4,6 @@ import ( "os" "github.com/spf13/cobra" - "github.com/spf13/viper" configs "github.com/thirdweb-dev/indexer/configs" customLogger "github.com/thirdweb-dev/indexer/internal/log" ) @@ -18,10 +17,6 @@ var ( Short: "TBD", Long: "TBD", Run: func(cmd *cobra.Command, args []string) { - go func() { - RunOrchestrator(cmd, args) - }() - RunApi(cmd, args) }, } ) @@ -36,404 +31,8 @@ func init() { cobra.OnInitialize(initConfig) rootCmd.PersistentFlags().StringVar(&cfgFile, "config", "", "config file (default is ./configs/config.yml)") - rootCmd.PersistentFlags().String("rpc-url", "", "RPC Url to use for the indexer") - rootCmd.PersistentFlags().Int("rpc-blocks-blocksPerRequest", 0, "How many blocks to fetch per request") - rootCmd.PersistentFlags().Int("rpc-blocks-batchDelay", 0, "Milliseconds to wait between batches of blocks when fetching from the RPC") - rootCmd.PersistentFlags().Int("rpc-logs-blocksPerRequest", 0, "How many blocks to fetch logs per request") - rootCmd.PersistentFlags().Int("rpc-logs-batchDelay", 0, "Milliseconds to wait between batches of logs when fetching from the RPC") - rootCmd.PersistentFlags().Bool("rpc-blockReceipts-enabled", false, "Whether to enable fetching block receipts from the RPC") - rootCmd.PersistentFlags().Int("rpc-blockReceipts-blocksPerRequest", 0, "How many blocks to fetch receipts for per request") - rootCmd.PersistentFlags().Int("rpc-blockReceipts-batchDelay", 0, "Milliseconds to wait between batches of receipts when fetching from the RPC") - rootCmd.PersistentFlags().Bool("rpc-traces-enabled", true, "Whether to enable fetching traces from the RPC") - rootCmd.PersistentFlags().Int("rpc-traces-blocksPerRequest", 0, "How many blocks to fetch traces per request") - rootCmd.PersistentFlags().Int("rpc-traces-batchDelay", 0, "Milliseconds to wait between batches of traces when fetching from the RPC") - rootCmd.PersistentFlags().String("log-level", "", "Log level to use for the application") - rootCmd.PersistentFlags().Bool("log-prettify", false, "Whether to prettify the log output") - rootCmd.PersistentFlags().Int("poller-parallel-pollers", 5, "Maximum number of parallel pollers") - rootCmd.PersistentFlags().String("poller-s3-bucket", "", "S3 bucket for oller archive source") - rootCmd.PersistentFlags().String("poller-s3-region", "", "S3 region for poller archive source") - rootCmd.PersistentFlags().String("poller-s3-prefix", "", "S3 prefix for poller archive source") - rootCmd.PersistentFlags().String("poller-s3-accessKeyId", "", "S3 access key ID for poller archive source") - rootCmd.PersistentFlags().String("poller-s3-secretAccessKey", "", "S3 secret access key for poller archive source") - rootCmd.PersistentFlags().String("poller-s3-endpoint", "", "S3 endpoint for poller archive source (for S3-compatible services)") - rootCmd.PersistentFlags().String("poller-s3-format", "parquet", "S3 storage format for poller archive source") - rootCmd.PersistentFlags().String("poller-s3-cacheDir", "/tmp/insight-archive", "Local cache directory for poller archive source") - rootCmd.PersistentFlags().Int("poller-s3-metadataTTL", 0, "Metadata cache TTL in seconds for poller archive source") - rootCmd.PersistentFlags().Int("poller-s3-fileCacheTTL", 0, "File cache TTL in seconds for poller archive source") - rootCmd.PersistentFlags().Int64("poller-s3-maxCacheSize", 0, "Max cache size in bytes for poller archive source (default 5GB)") - rootCmd.PersistentFlags().Int("poller-s3-cleanupInterval", 0, "Cache cleanup interval in seconds for poller archive source") - rootCmd.PersistentFlags().Int("poller-s3-maxConcurrentDownloads", 3, "Max concurrent downloads for poller archive source") - rootCmd.PersistentFlags().Bool("committer-enabled", true, "Toggle committer") - rootCmd.PersistentFlags().Int("committer-blocks-per-commit", 10, "How many blocks to commit each interval") - rootCmd.PersistentFlags().Int("committer-from-block", 0, "From which block to start committing") - rootCmd.PersistentFlags().Int("committer-to-block", 0, "To which block to commit") - rootCmd.PersistentFlags().Bool("reorgHandler-enabled", true, "Toggle reorg handler") - rootCmd.PersistentFlags().Int("reorgHandler-interval", 1000, "How often to run reorg handler in milliseconds") - rootCmd.PersistentFlags().Int("reorgHandler-blocks-per-scan", 100, "How many blocks to scan for reorgs") - rootCmd.PersistentFlags().Int("reorgHandler-from-block", 0, "From which block to start scanning for reorgs") - rootCmd.PersistentFlags().String("storage-staging-clickhouse-database", "", "Clickhouse database for staging storage") - rootCmd.PersistentFlags().Int("storage-staging-clickhouse-port", 0, "Clickhouse port for staging storage") - rootCmd.PersistentFlags().String("storage-main-clickhouse-database", "", "Clickhouse database for main storage") - rootCmd.PersistentFlags().Int("storage-main-clickhouse-port", 0, "Clickhouse port for main storage") - rootCmd.PersistentFlags().String("storage-orchestrator-clickhouse-database", "", "Clickhouse database for orchestrator storage") - rootCmd.PersistentFlags().Int("storage-orchestrator-clickhouse-port", 0, "Clickhouse port for orchestrator storage") - rootCmd.PersistentFlags().String("storage-orchestrator-clickhouse-host", "", "Clickhouse host for orchestrator storage") - rootCmd.PersistentFlags().String("storage-orchestrator-clickhouse-username", "", "Clickhouse username for orchestrator storage") - rootCmd.PersistentFlags().String("storage-orchestrator-clickhouse-password", "", "Clickhouse password for orchestrator storage") - rootCmd.PersistentFlags().Bool("storage-orchestrator-clickhouse-asyncInsert", true, "Clickhouse async insert for orchestrator storage") - rootCmd.PersistentFlags().Int("storage-orchestrator-clickhouse-maxRowsPerInsert", 100000, "Clickhouse max rows per insert for orchestrator storage") - rootCmd.PersistentFlags().Int("storage-orchestrator-clickhouse-maxOpenConns", 30, "Clickhouse max open connections for orchestrator storage") - rootCmd.PersistentFlags().Int("storage-orchestrator-clickhouse-maxIdleConns", 30, "Clickhouse max idle connections for orchestrator storage") - rootCmd.PersistentFlags().Bool("storage-orchestrator-clickhouse-disableTLS", false, "Clickhouse disableTLS for orchestrator storage") - rootCmd.PersistentFlags().Bool("storage-orchestrator-clickhouse-enableParallelViewProcessing", false, "Clickhouse enableParallelViewProcessing for orchestrator storage") - rootCmd.PersistentFlags().Bool("storage-orchestrator-clickhouse-enableCompression", true, "Clickhouse enableCompression for orchestrator storage") - rootCmd.PersistentFlags().Int("storage-orchestrator-clickhouse-maxQueryTime", 60, "Clickhouse max query time for orchestrator storage") - rootCmd.PersistentFlags().Int("storage-orchestrator-clickhouse-maxMemoryUsage", 1000000000, "Clickhouse max memory usage in bytes for orchestrator storage") - rootCmd.PersistentFlags().String("storage-orchestrator-postgres-host", "", "PostgreSQL host for orchestrator storage") - rootCmd.PersistentFlags().Int("storage-orchestrator-postgres-port", 5432, "PostgreSQL port for orchestrator storage") - rootCmd.PersistentFlags().String("storage-orchestrator-postgres-username", "", "PostgreSQL username for orchestrator storage") - rootCmd.PersistentFlags().String("storage-orchestrator-postgres-password", "", "PostgreSQL password for orchestrator storage") - rootCmd.PersistentFlags().String("storage-orchestrator-postgres-database", "", "PostgreSQL database for orchestrator storage") - rootCmd.PersistentFlags().String("storage-orchestrator-postgres-sslMode", "require", "PostgreSQL SSL mode for orchestrator storage (disable, require, verify-ca, verify-full)") - rootCmd.PersistentFlags().Int("storage-orchestrator-postgres-maxOpenConns", 50, "PostgreSQL max open connections for orchestrator storage") - rootCmd.PersistentFlags().Int("storage-orchestrator-postgres-maxIdleConns", 25, "PostgreSQL max idle connections for orchestrator storage") - rootCmd.PersistentFlags().Int("storage-orchestrator-postgres-maxConnLifetime", 300, "PostgreSQL max connection lifetime in seconds for orchestrator storage") - rootCmd.PersistentFlags().Int("storage-orchestrator-postgres-connectTimeout", 10, "PostgreSQL connection timeout in seconds for orchestrator storage") - rootCmd.PersistentFlags().String("storage-staging-clickhouse-host", "", "Clickhouse host for staging storage") - rootCmd.PersistentFlags().String("storage-main-clickhouse-host", "", "Clickhouse host for main storage") - rootCmd.PersistentFlags().String("storage-main-clickhouse-username", "", "Clickhouse username for main storage") - rootCmd.PersistentFlags().String("storage-main-clickhouse-password", "", "Clickhouse password for main storage") - rootCmd.PersistentFlags().Bool("storage-main-clickhouse-asyncInsert", true, "Clickhouse async insert for main storage") - rootCmd.PersistentFlags().Int("storage-main-clickhouse-maxRowsPerInsert", 100000, "Clickhouse max rows per insert for main storage") - rootCmd.PersistentFlags().Int("storage-main-clickhouse-maxOpenConns", 30, "Clickhouse max open connections for main storage") - rootCmd.PersistentFlags().Int("storage-main-clickhouse-maxIdleConns", 30, "Clickhouse max idle connections for main storage") - rootCmd.PersistentFlags().Bool("storage-main-clickhouse-disableTLS", false, "Clickhouse disableTLS for main storage") - rootCmd.PersistentFlags().Bool("storage-main-clickhouse-enableParallelViewProcessing", false, "Clickhouse enableParallelViewProcessing for main storage") - rootCmd.PersistentFlags().Bool("storage-main-clickhouse-enableCompression", true, "Clickhouse enableCompression for main storage") - rootCmd.PersistentFlags().Int("storage-main-clickhouse-maxQueryTime", 60, "Clickhouse max query time for main storage") - rootCmd.PersistentFlags().Int("storage-main-clickhouse-maxMemoryUsage", 1000000000, "Clickhouse max memory usage in bytes for main storage") - rootCmd.PersistentFlags().String("storage-staging-clickhouse-username", "", "Clickhouse username for staging storage") - rootCmd.PersistentFlags().String("storage-staging-clickhouse-password", "", "Clickhouse password for staging storage") - rootCmd.PersistentFlags().Bool("storage-staging-clickhouse-asyncInsert", true, "Clickhouse async insert for staging storage") - rootCmd.PersistentFlags().Int("storage-staging-clickhouse-maxRowsPerInsert", 100000, "Clickhouse max rows per insert for staging storage") - rootCmd.PersistentFlags().Int("storage-staging-clickhouse-maxOpenConns", 30, "Clickhouse max open connections for staging storage") - rootCmd.PersistentFlags().Int("storage-staging-clickhouse-maxIdleConns", 30, "Clickhouse max idle connections for staging storage") - rootCmd.PersistentFlags().Bool("storage-staging-clickhouse-disableTLS", false, "Clickhouse disableTLS for staging storage") - rootCmd.PersistentFlags().Bool("storage-staging-clickhouse-enableParallelViewProcessing", false, "Clickhouse enableParallelViewProcessing for staging storage") - rootCmd.PersistentFlags().Bool("storage-staging-clickhouse-enableCompression", true, "Clickhouse enableCompression for staging storage") - rootCmd.PersistentFlags().Int("storage-staging-clickhouse-maxQueryTime", 60, "Clickhouse max query time for staging storage") - rootCmd.PersistentFlags().Int("storage-staging-clickhouse-maxMemoryUsage", 1000000000, "Clickhouse max memory usage in bytes for staging storage") - rootCmd.PersistentFlags().String("storage-staging-postgres-host", "", "PostgreSQL host for staging storage") - rootCmd.PersistentFlags().Int("storage-staging-postgres-port", 5432, "PostgreSQL port for staging storage") - rootCmd.PersistentFlags().String("storage-staging-postgres-username", "", "PostgreSQL username for staging storage") - rootCmd.PersistentFlags().String("storage-staging-postgres-password", "", "PostgreSQL password for staging storage") - rootCmd.PersistentFlags().String("storage-staging-postgres-database", "", "PostgreSQL database for staging storage") - rootCmd.PersistentFlags().String("storage-staging-postgres-sslMode", "require", "PostgreSQL SSL mode for staging storage (disable, require, verify-ca, verify-full)") - rootCmd.PersistentFlags().Int("storage-staging-postgres-maxOpenConns", 50, "PostgreSQL max open connections for staging storage") - rootCmd.PersistentFlags().Int("storage-staging-postgres-maxIdleConns", 25, "PostgreSQL max idle connections for staging storage") - rootCmd.PersistentFlags().Int("storage-staging-postgres-maxConnLifetime", 300, "PostgreSQL max connection lifetime in seconds for staging storage") - rootCmd.PersistentFlags().Int("storage-staging-postgres-connectTimeout", 10, "PostgreSQL connection timeout in seconds for staging storage") - rootCmd.PersistentFlags().String("storage-main-kafka-brokers", "", "Kafka brokers for main storage") - rootCmd.PersistentFlags().String("storage-main-kafka-username", "", "Kafka username for main storage") - rootCmd.PersistentFlags().String("storage-main-kafka-password", "", "Kafka password for main storage") - rootCmd.PersistentFlags().Bool("storage-main-kafka-enable-tls", true, "Enable TLS for Kafka connection in main storage") - rootCmd.PersistentFlags().String("storage-orchestrator-redis-host", "", "Redis host for orchestrator storage metadata") - rootCmd.PersistentFlags().Int("storage-orchestrator-redis-port", 6379, "Redis port for orchestrator storage metadata") - rootCmd.PersistentFlags().String("storage-orchestrator-redis-password", "", "Redis password for orchestator storage metadata") - rootCmd.PersistentFlags().Int("storage-orchestrator-redis-db", 0, "Redis database number for orchestrator storage metadata") - rootCmd.PersistentFlags().Bool("storage-orchestrator-redis-enableTLS", true, "Enable TLS for Redis connection in orchestrator storage metadata") - rootCmd.PersistentFlags().String("storage-staging-type", "auto", "Storage type for staging (auto, clickhouse, postgres, kafka, badger, s3)") - rootCmd.PersistentFlags().String("storage-main-type", "auto", "Storage type for main (auto, clickhouse, postgres, kafka, badger, s3)") - rootCmd.PersistentFlags().String("storage-orchestrator-type", "auto", "Storage type for orchestrator (auto, clickhouse, postgres, badger)") - rootCmd.PersistentFlags().String("storage-staging-badger-path", "", "BadgerDB path for staging storage") - rootCmd.PersistentFlags().String("storage-orchestrator-badger-path", "", "BadgerDB path for orchestrator storage") - rootCmd.PersistentFlags().String("storage-orchestrator-pebble-path", "", "PebbleDB path for orchestrator storage") - rootCmd.PersistentFlags().String("storage-staging-pebble-path", "", "PebbleDB path for staging storage") - rootCmd.PersistentFlags().String("storage-main-s3-bucket", "", "S3 bucket for main storage") - rootCmd.PersistentFlags().String("storage-main-s3-region", "", "S3 region for main storage") - rootCmd.PersistentFlags().String("storage-main-s3-prefix", "", "S3 key prefix for main storage") - rootCmd.PersistentFlags().String("storage-main-s3-accessKeyId", "", "S3 access key ID for main storage") - rootCmd.PersistentFlags().String("storage-main-s3-secretAccessKey", "", "S3 secret access key for main storage") - rootCmd.PersistentFlags().String("storage-main-s3-endpoint", "", "S3 endpoint URL for main storage (for S3-compatible services)") - rootCmd.PersistentFlags().String("storage-main-s3-format", "parquet", "S3 storage format for main storage (parquet or json)") - rootCmd.PersistentFlags().Int64("storage-main-s3-bufferSizeMB", 512, "S3 buffer size in MB before flush for main storage") - rootCmd.PersistentFlags().Int("storage-main-s3-bufferTimeoutSeconds", 300, "S3 buffer timeout in seconds before flush for main storage") - rootCmd.PersistentFlags().Int("storage-main-s3-maxBlocksPerFile", 0, "S3 max blocks per file for main storage (0 = no limit)") - rootCmd.PersistentFlags().String("storage-main-s3-parquet-compression", "snappy", "Parquet compression type for S3 main storage") - rootCmd.PersistentFlags().Int64("storage-main-s3-parquet-rowGroupSize", 256, "Parquet row group size in MB for S3 main storage") - rootCmd.PersistentFlags().Int64("storage-main-s3-parquet-pageSize", 8192, "Parquet page size in KB for S3 main storage") - rootCmd.PersistentFlags().String("api-host", "localhost:3000", "API host") - rootCmd.PersistentFlags().String("api-basicAuth-username", "", "API basic auth username") - rootCmd.PersistentFlags().String("api-basicAuth-password", "", "API basic auth password") - rootCmd.PersistentFlags().String("api-thirdweb-clientId", "", "Thirdweb client id") - rootCmd.PersistentFlags().Int("api-contractApiRequest-maxIdleConns", 100, "Max idle connections for contract API request") - rootCmd.PersistentFlags().Int("api-contractApiRequest-maxIdleConnsPerHost", 100, "Max idle connections per host for contract API request") - rootCmd.PersistentFlags().Int("api-contractApiRequest-maxConnsPerHost", 100, "Max connections per host for contract API request") - rootCmd.PersistentFlags().Int("api-contractApiRequest-idleConnTimeout", 90, "Idle connection timeout for contract API request in seconds") - rootCmd.PersistentFlags().Bool("api-contractApiRequest-disableCompression", false, "Disable compression for contract API request") - rootCmd.PersistentFlags().Int("api-contractApiRequest-timeout", 10, "Timeout in seconds for contract API request") - rootCmd.PersistentFlags().Bool("publisher-enabled", false, "Toggle publisher") - rootCmd.PersistentFlags().String("publisher-mode", "default", "Publisher mode: default or parallel") - rootCmd.PersistentFlags().String("publisher-brokers", "", "Kafka brokers") - rootCmd.PersistentFlags().String("publisher-username", "", "Kafka username for publisher") - rootCmd.PersistentFlags().String("publisher-password", "", "Kafka password for publisher") - rootCmd.PersistentFlags().Bool("publisher-enable-tls", true, "Enable TLS for Kafka connection in publisher") - rootCmd.PersistentFlags().Bool("publisher-blocks-enabled", false, "Toggle block publisher") - rootCmd.PersistentFlags().String("publisher-blocks-topicName", "", "Kafka topic name for blocks") - rootCmd.PersistentFlags().Bool("publisher-transactions-enabled", false, "Toggle transaction publisher") - rootCmd.PersistentFlags().String("publisher-transactions-topicName", "", "Kafka topic name for transactions") - rootCmd.PersistentFlags().String("publisher-transactions-toFilter", "", "Filter transactions by to address") - rootCmd.PersistentFlags().String("publisher-transactions-fromFilter", "", "Filter transactions by from address") - rootCmd.PersistentFlags().Bool("publisher-traces-enabled", false, "Toggle trace publisher") - rootCmd.PersistentFlags().String("publisher-traces-topicName", "", "Kafka topic name for traces") - rootCmd.PersistentFlags().Bool("publisher-events-enabled", false, "Toggle event publisher") - rootCmd.PersistentFlags().String("publisher-events-topicName", "", "Kafka topic name for events") - rootCmd.PersistentFlags().String("publisher-events-addressFilter", "", "Filter events by address") - rootCmd.PersistentFlags().String("publisher-events-topic0Filter", "", "Filter events by topic0") - rootCmd.PersistentFlags().String("validation-mode", "strict", "Validation mode. Strict will validate logsBloom and transactionsRoot. Minimal will validate transaction count and logs existence.") - rootCmd.PersistentFlags().String("migrator-destination-type", "auto", "Storage type for migrator destination (auto, clickhouse, postgres, kafka, badger, pebble, s3)") - rootCmd.PersistentFlags().String("migrator-destination-clickhouse-host", "", "Clickhouse host for migrator destination") - rootCmd.PersistentFlags().Int("migrator-destination-clickhouse-port", 0, "Clickhouse port for migrator destination") - rootCmd.PersistentFlags().String("migrator-destination-clickhouse-username", "", "Clickhouse username for migrator destination") - rootCmd.PersistentFlags().String("migrator-destination-clickhouse-password", "", "Clickhouse password for migrator destination") - rootCmd.PersistentFlags().String("migrator-destination-clickhouse-database", "", "Clickhouse database for migrator destination") - rootCmd.PersistentFlags().Bool("migrator-destination-clickhouse-disableTLS", false, "Clickhouse disableTLS for migrator destination") - rootCmd.PersistentFlags().Bool("migrator-destination-clickhouse-asyncInsert", true, "Clickhouse async insert for migrator destination") - rootCmd.PersistentFlags().Int("migrator-destination-clickhouse-maxRowsPerInsert", 100000, "Clickhouse max rows per insert for migrator destination") - rootCmd.PersistentFlags().Int("migrator-destination-clickhouse-maxOpenConns", 30, "Clickhouse max open connections for migrator destination") - rootCmd.PersistentFlags().Int("migrator-destination-clickhouse-maxIdleConns", 30, "Clickhouse max idle connections for migrator destination") - rootCmd.PersistentFlags().String("migrator-destination-postgres-host", "", "PostgreSQL host for migrator destination") - rootCmd.PersistentFlags().Int("migrator-destination-postgres-port", 5432, "PostgreSQL port for migrator destination") - rootCmd.PersistentFlags().String("migrator-destination-postgres-username", "", "PostgreSQL username for migrator destination") - rootCmd.PersistentFlags().String("migrator-destination-postgres-password", "", "PostgreSQL password for migrator destination") - rootCmd.PersistentFlags().String("migrator-destination-postgres-database", "", "PostgreSQL database for migrator destination") - rootCmd.PersistentFlags().String("migrator-destination-postgres-sslMode", "require", "PostgreSQL SSL mode for migrator destination") - rootCmd.PersistentFlags().Int("migrator-destination-postgres-maxOpenConns", 50, "PostgreSQL max open connections for migrator destination") - rootCmd.PersistentFlags().Int("migrator-destination-postgres-maxIdleConns", 25, "PostgreSQL max idle connections for migrator destination") - rootCmd.PersistentFlags().Int("migrator-destination-postgres-maxConnLifetime", 300, "PostgreSQL max connection lifetime in seconds for migrator destination") - rootCmd.PersistentFlags().Int("migrator-destination-postgres-connectTimeout", 10, "PostgreSQL connection timeout in seconds for migrator destination") - rootCmd.PersistentFlags().String("migrator-destination-kafka-brokers", "", "Kafka brokers for migrator destination") - rootCmd.PersistentFlags().String("migrator-destination-kafka-username", "", "Kafka username for migrator destination") - rootCmd.PersistentFlags().String("migrator-destination-kafka-password", "", "Kafka password for migrator destination") - rootCmd.PersistentFlags().Bool("migrator-destination-kafka-enableTLS", true, "Enable TLS for Kafka connection in migrator destination") - rootCmd.PersistentFlags().String("migrator-destination-s3-bucket", "", "S3 bucket for migrator destination") - rootCmd.PersistentFlags().String("migrator-destination-s3-region", "", "S3 region for migrator destination") - rootCmd.PersistentFlags().String("migrator-destination-s3-prefix", "", "S3 key prefix for migrator destination") - rootCmd.PersistentFlags().String("migrator-destination-s3-accessKeyId", "", "S3 access key ID for migrator destination") - rootCmd.PersistentFlags().String("migrator-destination-s3-secretAccessKey", "", "S3 secret access key for migrator destination") - rootCmd.PersistentFlags().String("migrator-destination-s3-endpoint", "", "S3 endpoint URL for migrator destination") - rootCmd.PersistentFlags().String("migrator-destination-s3-format", "parquet", "S3 storage format for migrator destination") - rootCmd.PersistentFlags().Int64("migrator-destination-s3-bufferSizeMB", 0, "S3 buffer size in MB before flush for migrator destination") - rootCmd.PersistentFlags().Int("migrator-destination-s3-bufferTimeoutSeconds", 0, "S3 buffer timeout in seconds before flush for migrator destination") - rootCmd.PersistentFlags().Int("migrator-destination-s3-maxBlocksPerFile", 0, "S3 max blocks per file for migrator destination") - rootCmd.PersistentFlags().Uint("migrator-batchSize", 0, "Batch size for storage operations in migrator") - rootCmd.PersistentFlags().Uint("migrator-startBlock", 0, "Start block for migration") - rootCmd.PersistentFlags().Uint("migrator-endBlock", 0, "End block for migration") - rootCmd.PersistentFlags().Uint("migrator-workerCount", 0, "Worker count for migration") - - viper.BindPFlag("rpc.url", rootCmd.PersistentFlags().Lookup("rpc-url")) - viper.BindPFlag("rpc.blocks.blocksPerRequest", rootCmd.PersistentFlags().Lookup("rpc-blocks-blocksPerRequest")) - viper.BindPFlag("rpc.blocks.batchDelay", rootCmd.PersistentFlags().Lookup("rpc-blocks-batchDelay")) - viper.BindPFlag("rpc.logs.blocksPerRequest", rootCmd.PersistentFlags().Lookup("rpc-logs-blocksPerRequest")) - viper.BindPFlag("rpc.logs.batchDelay", rootCmd.PersistentFlags().Lookup("rpc-logs-batchDelay")) - viper.BindPFlag("rpc.blockReceipts.enabled", rootCmd.PersistentFlags().Lookup("rpc-blockReceipts-enabled")) - viper.BindPFlag("rpc.blockReceipts.blocksPerRequest", rootCmd.PersistentFlags().Lookup("rpc-blockReceipts-blocksPerRequest")) - viper.BindPFlag("rpc.blockReceipts.batchDelay", rootCmd.PersistentFlags().Lookup("rpc-blockReceipts-batchDelay")) - viper.BindPFlag("rpc.traces.enabled", rootCmd.PersistentFlags().Lookup("rpc-traces-enabled")) - viper.BindPFlag("rpc.traces.blocksPerRequest", rootCmd.PersistentFlags().Lookup("rpc-traces-blocksPerRequest")) - viper.BindPFlag("rpc.traces.batchDelay", rootCmd.PersistentFlags().Lookup("rpc-traces-batchDelay")) - viper.BindPFlag("log.level", rootCmd.PersistentFlags().Lookup("log-level")) - viper.BindPFlag("log.prettify", rootCmd.PersistentFlags().Lookup("log-prettify")) - viper.BindPFlag("poller.enabled", rootCmd.PersistentFlags().Lookup("poller-enabled")) - viper.BindPFlag("poller.parallelPollers", rootCmd.PersistentFlags().Lookup("poller-parallel-pollers")) - viper.BindPFlag("poller.s3.endpoint", rootCmd.PersistentFlags().Lookup("poller-s3-endpoint")) - viper.BindPFlag("poller.s3.accessKeyId", rootCmd.PersistentFlags().Lookup("poller-s3-accessKeyId")) - viper.BindPFlag("poller.s3.secretAccessKey", rootCmd.PersistentFlags().Lookup("poller-s3-secretAccessKey")) - viper.BindPFlag("poller.s3.bucket", rootCmd.PersistentFlags().Lookup("poller-s3-bucket")) - viper.BindPFlag("poller.s3.region", rootCmd.PersistentFlags().Lookup("poller-s3-region")) - viper.BindPFlag("poller.s3.prefix", rootCmd.PersistentFlags().Lookup("poller-s3-prefix")) - viper.BindPFlag("poller.s3.cacheDir", rootCmd.PersistentFlags().Lookup("poller-s3-cacheDir")) - viper.BindPFlag("poller.s3.metadataTTL", rootCmd.PersistentFlags().Lookup("poller-s3-metadataTTL")) - viper.BindPFlag("poller.s3.fileCacheTTL", rootCmd.PersistentFlags().Lookup("poller-s3-fileCacheTTL")) - viper.BindPFlag("poller.s3.maxCacheSize", rootCmd.PersistentFlags().Lookup("poller-s3-maxCacheSize")) - viper.BindPFlag("poller.s3.cleanupInterval", rootCmd.PersistentFlags().Lookup("poller-s3-cleanupInterval")) - viper.BindPFlag("poller.s3.maxConcurrentDownloads", rootCmd.PersistentFlags().Lookup("poller-s3-maxConcurrentDownloads")) - viper.BindPFlag("committer.enabled", rootCmd.PersistentFlags().Lookup("committer-enabled")) - viper.BindPFlag("committer.blocksPerCommit", rootCmd.PersistentFlags().Lookup("committer-blocks-per-commit")) - viper.BindPFlag("committer.fromBlock", rootCmd.PersistentFlags().Lookup("committer-from-block")) - viper.BindPFlag("committer.toBlock", rootCmd.PersistentFlags().Lookup("committer-to-block")) - viper.BindPFlag("reorgHandler.enabled", rootCmd.PersistentFlags().Lookup("reorgHandler-enabled")) - viper.BindPFlag("reorgHandler.interval", rootCmd.PersistentFlags().Lookup("reorgHandler-interval")) - viper.BindPFlag("reorgHandler.blocksPerScan", rootCmd.PersistentFlags().Lookup("reorgHandler-blocks-per-scan")) - viper.BindPFlag("reorgHandler.fromBlock", rootCmd.PersistentFlags().Lookup("reorgHandler-from-block")) - viper.BindPFlag("storage.staging.clickhouse.database", rootCmd.PersistentFlags().Lookup("storage-staging-clickhouse-database")) - viper.BindPFlag("storage.staging.clickhouse.host", rootCmd.PersistentFlags().Lookup("storage-staging-clickhouse-host")) - viper.BindPFlag("storage.staging.clickhouse.port", rootCmd.PersistentFlags().Lookup("storage-staging-clickhouse-port")) - viper.BindPFlag("storage.staging.clickhouse.username", rootCmd.PersistentFlags().Lookup("storage-staging-clickhouse-username")) - viper.BindPFlag("storage.staging.clickhouse.password", rootCmd.PersistentFlags().Lookup("storage-staging-clickhouse-password")) - viper.BindPFlag("storage.staging.clickhouse.asyncInsert", rootCmd.PersistentFlags().Lookup("storage-staging-clickhouse-asyncInsert")) - viper.BindPFlag("storage.staging.clickhouse.maxRowsPerInsert", rootCmd.PersistentFlags().Lookup("storage-staging-clickhouse-maxRowsPerInsert")) - viper.BindPFlag("storage.staging.clickhouse.maxOpenConns", rootCmd.PersistentFlags().Lookup("storage-staging-clickhouse-maxOpenConns")) - viper.BindPFlag("storage.staging.clickhouse.maxIdleConns", rootCmd.PersistentFlags().Lookup("storage-staging-clickhouse-maxIdleConns")) - viper.BindPFlag("storage.staging.clickhouse.disableTLS", rootCmd.PersistentFlags().Lookup("storage-staging-clickhouse-disableTLS")) - viper.BindPFlag("storage.staging.clickhouse.enableParallelViewProcessing", rootCmd.PersistentFlags().Lookup("storage-staging-clickhouse-enableParallelViewProcessing")) - viper.BindPFlag("storage.staging.clickhouse.enableCompression", rootCmd.PersistentFlags().Lookup("storage-staging-clickhouse-enableCompression")) - viper.BindPFlag("storage.staging.clickhouse.maxQueryTime", rootCmd.PersistentFlags().Lookup("storage-staging-clickhouse-maxQueryTime")) - viper.BindPFlag("storage.staging.clickhouse.maxMemoryUsage", rootCmd.PersistentFlags().Lookup("storage-staging-clickhouse-maxMemoryUsage")) - viper.BindPFlag("storage.main.clickhouse.database", rootCmd.PersistentFlags().Lookup("storage-main-clickhouse-database")) - viper.BindPFlag("storage.main.clickhouse.host", rootCmd.PersistentFlags().Lookup("storage-main-clickhouse-host")) - viper.BindPFlag("storage.main.clickhouse.port", rootCmd.PersistentFlags().Lookup("storage-main-clickhouse-port")) - viper.BindPFlag("storage.main.clickhouse.username", rootCmd.PersistentFlags().Lookup("storage-main-clickhouse-username")) - viper.BindPFlag("storage.main.clickhouse.password", rootCmd.PersistentFlags().Lookup("storage-main-clickhouse-password")) - viper.BindPFlag("storage.main.clickhouse.asyncInsert", rootCmd.PersistentFlags().Lookup("storage-main-clickhouse-asyncInsert")) - viper.BindPFlag("storage.main.clickhouse.maxRowsPerInsert", rootCmd.PersistentFlags().Lookup("storage-main-clickhouse-maxRowsPerInsert")) - viper.BindPFlag("storage.main.clickhouse.maxOpenConns", rootCmd.PersistentFlags().Lookup("storage-main-clickhouse-maxOpenConns")) - viper.BindPFlag("storage.main.clickhouse.maxIdleConns", rootCmd.PersistentFlags().Lookup("storage-main-clickhouse-maxIdleConns")) - viper.BindPFlag("storage.main.clickhouse.disableTLS", rootCmd.PersistentFlags().Lookup("storage-main-clickhouse-disableTLS")) - viper.BindPFlag("storage.main.clickhouse.enableParallelViewProcessing", rootCmd.PersistentFlags().Lookup("storage-main-clickhouse-enableParallelViewProcessing")) - viper.BindPFlag("storage.main.clickhouse.enableCompression", rootCmd.PersistentFlags().Lookup("storage-main-clickhouse-enableCompression")) - viper.BindPFlag("storage.main.clickhouse.maxQueryTime", rootCmd.PersistentFlags().Lookup("storage-main-clickhouse-maxQueryTime")) - viper.BindPFlag("storage.main.clickhouse.maxMemoryUsage", rootCmd.PersistentFlags().Lookup("storage-main-clickhouse-maxMemoryUsage")) - viper.BindPFlag("storage.orchestrator.clickhouse.database", rootCmd.PersistentFlags().Lookup("storage-orchestrator-clickhouse-database")) - viper.BindPFlag("storage.orchestrator.clickhouse.host", rootCmd.PersistentFlags().Lookup("storage-orchestrator-clickhouse-host")) - viper.BindPFlag("storage.orchestrator.clickhouse.port", rootCmd.PersistentFlags().Lookup("storage-orchestrator-clickhouse-port")) - viper.BindPFlag("storage.orchestrator.clickhouse.username", rootCmd.PersistentFlags().Lookup("storage-orchestrator-clickhouse-username")) - viper.BindPFlag("storage.orchestrator.clickhouse.password", rootCmd.PersistentFlags().Lookup("storage-orchestrator-clickhouse-password")) - viper.BindPFlag("storage.orchestrator.clickhouse.asyncInsert", rootCmd.PersistentFlags().Lookup("storage-orchestrator-clickhouse-asyncInsert")) - viper.BindPFlag("storage.orchestrator.clickhouse.maxRowsPerInsert", rootCmd.PersistentFlags().Lookup("storage-orchestrator-clickhouse-maxRowsPerInsert")) - viper.BindPFlag("storage.orchestrator.clickhouse.maxOpenConns", rootCmd.PersistentFlags().Lookup("storage-orchestrator-clickhouse-maxOpenConns")) - viper.BindPFlag("storage.orchestrator.clickhouse.maxIdleConns", rootCmd.PersistentFlags().Lookup("storage-orchestrator-clickhouse-maxIdleConns")) - viper.BindPFlag("storage.orchestrator.clickhouse.disableTLS", rootCmd.PersistentFlags().Lookup("storage-orchestrator-clickhouse-disableTLS")) - viper.BindPFlag("storage.orchestrator.clickhouse.enableParallelViewProcessing", rootCmd.PersistentFlags().Lookup("storage-orchestrator-clickhouse-enableParallelViewProcessing")) - viper.BindPFlag("storage.orchestrator.clickhouse.enableCompression", rootCmd.PersistentFlags().Lookup("storage-orchestrator-clickhouse-enableCompression")) - viper.BindPFlag("storage.orchestrator.clickhouse.maxQueryTime", rootCmd.PersistentFlags().Lookup("storage-orchestrator-clickhouse-maxQueryTime")) - viper.BindPFlag("storage.orchestrator.clickhouse.maxMemoryUsage", rootCmd.PersistentFlags().Lookup("storage-orchestrator-clickhouse-maxMemoryUsage")) - viper.BindPFlag("storage.orchestrator.postgres.host", rootCmd.PersistentFlags().Lookup("storage-orchestrator-postgres-host")) - viper.BindPFlag("storage.orchestrator.postgres.port", rootCmd.PersistentFlags().Lookup("storage-orchestrator-postgres-port")) - viper.BindPFlag("storage.orchestrator.postgres.username", rootCmd.PersistentFlags().Lookup("storage-orchestrator-postgres-username")) - viper.BindPFlag("storage.orchestrator.postgres.password", rootCmd.PersistentFlags().Lookup("storage-orchestrator-postgres-password")) - viper.BindPFlag("storage.orchestrator.postgres.database", rootCmd.PersistentFlags().Lookup("storage-orchestrator-postgres-database")) - viper.BindPFlag("storage.orchestrator.postgres.sslMode", rootCmd.PersistentFlags().Lookup("storage-orchestrator-postgres-sslMode")) - viper.BindPFlag("storage.orchestrator.postgres.maxOpenConns", rootCmd.PersistentFlags().Lookup("storage-orchestrator-postgres-maxOpenConns")) - viper.BindPFlag("storage.orchestrator.postgres.maxIdleConns", rootCmd.PersistentFlags().Lookup("storage-orchestrator-postgres-maxIdleConns")) - viper.BindPFlag("storage.orchestrator.postgres.maxConnLifetime", rootCmd.PersistentFlags().Lookup("storage-orchestrator-postgres-maxConnLifetime")) - viper.BindPFlag("storage.orchestrator.postgres.connectTimeout", rootCmd.PersistentFlags().Lookup("storage-orchestrator-postgres-connectTimeout")) - viper.BindPFlag("storage.orchestrator.redis.host", rootCmd.PersistentFlags().Lookup("storage-orchestrator-redis-host")) - viper.BindPFlag("storage.orchestrator.redis.port", rootCmd.PersistentFlags().Lookup("storage-orchestrator-redis-port")) - viper.BindPFlag("storage.orchestrator.redis.password", rootCmd.PersistentFlags().Lookup("storage-orchestrator-redis-password")) - viper.BindPFlag("storage.orchestrator.redis.db", rootCmd.PersistentFlags().Lookup("storage-orchestrator-redis-db")) - viper.BindPFlag("storage.orchestrator.redis.enableTLS", rootCmd.PersistentFlags().Lookup("storage-orchestrator-redis-enableTLS")) - viper.BindPFlag("storage.orchestrator.badger.path", rootCmd.PersistentFlags().Lookup("storage-orchestrator-badger-path")) - viper.BindPFlag("storage.orchestrator.pebble.path", rootCmd.PersistentFlags().Lookup("storage-orchestrator-pebble-path")) - viper.BindPFlag("storage.orchestrator.type", rootCmd.PersistentFlags().Lookup("storage-orchestrator-type")) - viper.BindPFlag("storage.staging.postgres.host", rootCmd.PersistentFlags().Lookup("storage-staging-postgres-host")) - viper.BindPFlag("storage.staging.postgres.port", rootCmd.PersistentFlags().Lookup("storage-staging-postgres-port")) - viper.BindPFlag("storage.staging.postgres.username", rootCmd.PersistentFlags().Lookup("storage-staging-postgres-username")) - viper.BindPFlag("storage.staging.postgres.password", rootCmd.PersistentFlags().Lookup("storage-staging-postgres-password")) - viper.BindPFlag("storage.staging.postgres.database", rootCmd.PersistentFlags().Lookup("storage-staging-postgres-database")) - viper.BindPFlag("storage.staging.postgres.sslMode", rootCmd.PersistentFlags().Lookup("storage-staging-postgres-sslMode")) - viper.BindPFlag("storage.staging.postgres.maxOpenConns", rootCmd.PersistentFlags().Lookup("storage-staging-postgres-maxOpenConns")) - viper.BindPFlag("storage.staging.postgres.maxIdleConns", rootCmd.PersistentFlags().Lookup("storage-staging-postgres-maxIdleConns")) - viper.BindPFlag("storage.staging.postgres.maxConnLifetime", rootCmd.PersistentFlags().Lookup("storage-staging-postgres-maxConnLifetime")) - viper.BindPFlag("storage.staging.postgres.connectTimeout", rootCmd.PersistentFlags().Lookup("storage-staging-postgres-connectTimeout")) - viper.BindPFlag("storage.staging.badger.path", rootCmd.PersistentFlags().Lookup("storage-staging-badger-path")) - viper.BindPFlag("storage.staging.pebble.path", rootCmd.PersistentFlags().Lookup("storage-staging-pebble-path")) - viper.BindPFlag("storage.staging.type", rootCmd.PersistentFlags().Lookup("storage-staging-type")) - viper.BindPFlag("storage.main.kafka.brokers", rootCmd.PersistentFlags().Lookup("storage-main-kafka-brokers")) - viper.BindPFlag("storage.main.kafka.username", rootCmd.PersistentFlags().Lookup("storage-main-kafka-username")) - viper.BindPFlag("storage.main.kafka.password", rootCmd.PersistentFlags().Lookup("storage-main-kafka-password")) - viper.BindPFlag("storage.main.kafka.enableTLS", rootCmd.PersistentFlags().Lookup("storage-main-kafka-enable-tls")) - viper.BindPFlag("storage.main.type", rootCmd.PersistentFlags().Lookup("storage-main-type")) - viper.BindPFlag("storage.main.s3.bucket", rootCmd.PersistentFlags().Lookup("storage-main-s3-bucket")) - viper.BindPFlag("storage.main.s3.region", rootCmd.PersistentFlags().Lookup("storage-main-s3-region")) - viper.BindPFlag("storage.main.s3.prefix", rootCmd.PersistentFlags().Lookup("storage-main-s3-prefix")) - viper.BindPFlag("storage.main.s3.accessKeyId", rootCmd.PersistentFlags().Lookup("storage-main-s3-accessKeyId")) - viper.BindPFlag("storage.main.s3.secretAccessKey", rootCmd.PersistentFlags().Lookup("storage-main-s3-secretAccessKey")) - viper.BindPFlag("storage.main.s3.endpoint", rootCmd.PersistentFlags().Lookup("storage-main-s3-endpoint")) - viper.BindPFlag("storage.main.s3.format", rootCmd.PersistentFlags().Lookup("storage-main-s3-format")) - viper.BindPFlag("storage.main.s3.bufferSizeMB", rootCmd.PersistentFlags().Lookup("storage-main-s3-bufferSizeMB")) - viper.BindPFlag("storage.main.s3.bufferTimeoutSeconds", rootCmd.PersistentFlags().Lookup("storage-main-s3-bufferTimeoutSeconds")) - viper.BindPFlag("storage.main.s3.maxBlocksPerFile", rootCmd.PersistentFlags().Lookup("storage-main-s3-maxBlocksPerFile")) - viper.BindPFlag("storage.main.s3.parquet.compression", rootCmd.PersistentFlags().Lookup("storage-main-s3-parquet-compression")) - viper.BindPFlag("storage.main.s3.parquet.rowGroupSize", rootCmd.PersistentFlags().Lookup("storage-main-s3-parquet-rowGroupSize")) - viper.BindPFlag("storage.main.s3.parquet.pageSize", rootCmd.PersistentFlags().Lookup("storage-main-s3-parquet-pageSize")) - viper.BindPFlag("api.host", rootCmd.PersistentFlags().Lookup("api-host")) - viper.BindPFlag("api.basicAuth.username", rootCmd.PersistentFlags().Lookup("api-basicAuth-username")) - viper.BindPFlag("api.basicAuth.password", rootCmd.PersistentFlags().Lookup("api-basicAuth-password")) - viper.BindPFlag("api.thirdweb.clientId", rootCmd.PersistentFlags().Lookup("api-thirdweb-clientId")) - viper.BindPFlag("api.contractApiRequest.maxIdleConns", rootCmd.PersistentFlags().Lookup("api-contractApiRequest-maxIdleConns")) - viper.BindPFlag("api.contractApiRequest.maxIdleConnsPerHost", rootCmd.PersistentFlags().Lookup("api-contractApiRequest-maxIdleConnsPerHost")) - viper.BindPFlag("api.contractApiRequest.maxConnsPerHost", rootCmd.PersistentFlags().Lookup("api-contractApiRequest-maxConnsPerHost")) - viper.BindPFlag("api.contractApiRequest.idleConnTimeout", rootCmd.PersistentFlags().Lookup("api-contractApiRequest-idleConnTimeout")) - viper.BindPFlag("api.contractApiRequest.disableCompression", rootCmd.PersistentFlags().Lookup("api-contractApiRequest-disableCompression")) - viper.BindPFlag("api.contractApiRequest.timeout", rootCmd.PersistentFlags().Lookup("api-contractApiRequest-timeout")) - viper.BindPFlag("publisher.enabled", rootCmd.PersistentFlags().Lookup("publisher-enabled")) - viper.BindPFlag("publisher.mode", rootCmd.PersistentFlags().Lookup("publisher-mode")) - viper.BindPFlag("publisher.brokers", rootCmd.PersistentFlags().Lookup("publisher-brokers")) - viper.BindPFlag("publisher.username", rootCmd.PersistentFlags().Lookup("publisher-username")) - viper.BindPFlag("publisher.password", rootCmd.PersistentFlags().Lookup("publisher-password")) - viper.BindPFlag("publisher.enableTLS", rootCmd.PersistentFlags().Lookup("publisher-enable-tls")) - viper.BindPFlag("publisher.blocks.enabled", rootCmd.PersistentFlags().Lookup("publisher-blocks-enabled")) - viper.BindPFlag("publisher.blocks.topicName", rootCmd.PersistentFlags().Lookup("publisher-blocks-topicName")) - viper.BindPFlag("publisher.transactions.enabled", rootCmd.PersistentFlags().Lookup("publisher-transactions-enabled")) - viper.BindPFlag("publisher.transactions.topicName", rootCmd.PersistentFlags().Lookup("publisher-transactions-topicName")) - viper.BindPFlag("publisher.transactions.toFilter", rootCmd.PersistentFlags().Lookup("publisher-transactions-toFilter")) - viper.BindPFlag("publisher.transactions.fromFilter", rootCmd.PersistentFlags().Lookup("publisher-transactions-fromFilter")) - viper.BindPFlag("publisher.traces.enabled", rootCmd.PersistentFlags().Lookup("publisher-traces-enabled")) - viper.BindPFlag("publisher.traces.topicName", rootCmd.PersistentFlags().Lookup("publisher-traces-topicName")) - viper.BindPFlag("publisher.events.enabled", rootCmd.PersistentFlags().Lookup("publisher-events-enabled")) - viper.BindPFlag("publisher.events.topicName", rootCmd.PersistentFlags().Lookup("publisher-events-topicName")) - viper.BindPFlag("publisher.events.addressFilter", rootCmd.PersistentFlags().Lookup("publisher-events-addressFilter")) - viper.BindPFlag("publisher.events.topic0Filter", rootCmd.PersistentFlags().Lookup("publisher-events-topic0Filter")) - viper.BindPFlag("validation.mode", rootCmd.PersistentFlags().Lookup("validation-mode")) - // Migrator viper bindings - viper.BindPFlag("migrator.destination.type", rootCmd.PersistentFlags().Lookup("migrator-destination-type")) - viper.BindPFlag("migrator.destination.clickhouse.host", rootCmd.PersistentFlags().Lookup("migrator-destination-clickhouse-host")) - viper.BindPFlag("migrator.destination.clickhouse.port", rootCmd.PersistentFlags().Lookup("migrator-destination-clickhouse-port")) - viper.BindPFlag("migrator.destination.clickhouse.username", rootCmd.PersistentFlags().Lookup("migrator-destination-clickhouse-username")) - viper.BindPFlag("migrator.destination.clickhouse.password", rootCmd.PersistentFlags().Lookup("migrator-destination-clickhouse-password")) - viper.BindPFlag("migrator.destination.clickhouse.database", rootCmd.PersistentFlags().Lookup("migrator-destination-clickhouse-database")) - viper.BindPFlag("migrator.destination.clickhouse.disableTLS", rootCmd.PersistentFlags().Lookup("migrator-destination-clickhouse-disableTLS")) - viper.BindPFlag("migrator.destination.clickhouse.asyncInsert", rootCmd.PersistentFlags().Lookup("migrator-destination-clickhouse-asyncInsert")) - viper.BindPFlag("migrator.destination.clickhouse.maxRowsPerInsert", rootCmd.PersistentFlags().Lookup("migrator-destination-clickhouse-maxRowsPerInsert")) - viper.BindPFlag("migrator.destination.clickhouse.maxOpenConns", rootCmd.PersistentFlags().Lookup("migrator-destination-clickhouse-maxOpenConns")) - viper.BindPFlag("migrator.destination.clickhouse.maxIdleConns", rootCmd.PersistentFlags().Lookup("migrator-destination-clickhouse-maxIdleConns")) - viper.BindPFlag("migrator.destination.postgres.host", rootCmd.PersistentFlags().Lookup("migrator-destination-postgres-host")) - viper.BindPFlag("migrator.destination.postgres.port", rootCmd.PersistentFlags().Lookup("migrator-destination-postgres-port")) - viper.BindPFlag("migrator.destination.postgres.username", rootCmd.PersistentFlags().Lookup("migrator-destination-postgres-username")) - viper.BindPFlag("migrator.destination.postgres.password", rootCmd.PersistentFlags().Lookup("migrator-destination-postgres-password")) - viper.BindPFlag("migrator.destination.postgres.database", rootCmd.PersistentFlags().Lookup("migrator-destination-postgres-database")) - viper.BindPFlag("migrator.destination.postgres.sslMode", rootCmd.PersistentFlags().Lookup("migrator-destination-postgres-sslMode")) - viper.BindPFlag("migrator.destination.postgres.maxOpenConns", rootCmd.PersistentFlags().Lookup("migrator-destination-postgres-maxOpenConns")) - viper.BindPFlag("migrator.destination.postgres.maxIdleConns", rootCmd.PersistentFlags().Lookup("migrator-destination-postgres-maxIdleConns")) - viper.BindPFlag("migrator.destination.postgres.maxConnLifetime", rootCmd.PersistentFlags().Lookup("migrator-destination-postgres-maxConnLifetime")) - viper.BindPFlag("migrator.destination.postgres.connectTimeout", rootCmd.PersistentFlags().Lookup("migrator-destination-postgres-connectTimeout")) - viper.BindPFlag("migrator.destination.kafka.brokers", rootCmd.PersistentFlags().Lookup("migrator-destination-kafka-brokers")) - viper.BindPFlag("migrator.destination.kafka.username", rootCmd.PersistentFlags().Lookup("migrator-destination-kafka-username")) - viper.BindPFlag("migrator.destination.kafka.password", rootCmd.PersistentFlags().Lookup("migrator-destination-kafka-password")) - viper.BindPFlag("migrator.destination.kafka.enableTLS", rootCmd.PersistentFlags().Lookup("migrator-destination-kafka-enableTLS")) - viper.BindPFlag("migrator.destination.s3.bucket", rootCmd.PersistentFlags().Lookup("migrator-destination-s3-bucket")) - viper.BindPFlag("migrator.destination.s3.region", rootCmd.PersistentFlags().Lookup("migrator-destination-s3-region")) - viper.BindPFlag("migrator.destination.s3.prefix", rootCmd.PersistentFlags().Lookup("migrator-destination-s3-prefix")) - viper.BindPFlag("migrator.destination.s3.accessKeyId", rootCmd.PersistentFlags().Lookup("migrator-destination-s3-accessKeyId")) - viper.BindPFlag("migrator.destination.s3.secretAccessKey", rootCmd.PersistentFlags().Lookup("migrator-destination-s3-secretAccessKey")) - viper.BindPFlag("migrator.destination.s3.endpoint", rootCmd.PersistentFlags().Lookup("migrator-destination-s3-endpoint")) - viper.BindPFlag("migrator.destination.s3.format", rootCmd.PersistentFlags().Lookup("migrator-destination-s3-format")) - viper.BindPFlag("migrator.destination.s3.bufferSizeMB", rootCmd.PersistentFlags().Lookup("migrator-destination-s3-bufferSizeMB")) - viper.BindPFlag("migrator.destination.s3.bufferTimeoutSeconds", rootCmd.PersistentFlags().Lookup("migrator-destination-s3-bufferTimeoutSeconds")) - viper.BindPFlag("migrator.destination.s3.maxBlocksPerFile", rootCmd.PersistentFlags().Lookup("migrator-destination-s3-maxBlocksPerFile")) - viper.BindPFlag("migrator.startBlock", rootCmd.PersistentFlags().Lookup("migrator-startBlock")) - viper.BindPFlag("migrator.endBlock", rootCmd.PersistentFlags().Lookup("migrator-endBlock")) - viper.BindPFlag("migrator.batchSize", rootCmd.PersistentFlags().Lookup("migrator-batchSize")) - viper.BindPFlag("migrator.workerCount", rootCmd.PersistentFlags().Lookup("migrator-workerCount")) - - rootCmd.AddCommand(orchestratorCmd) - rootCmd.AddCommand(apiCmd) rootCmd.AddCommand(committerCmd) rootCmd.AddCommand(backfillCmd) - rootCmd.AddCommand(validateAndFixCmd) - rootCmd.AddCommand(validateCmd) - rootCmd.AddCommand(migrateValidationCmd) } func initConfig() { diff --git a/cmd/validate.go b/cmd/validate.go deleted file mode 100644 index 9190cf21..00000000 --- a/cmd/validate.go +++ /dev/null @@ -1,77 +0,0 @@ -package cmd - -import ( - "math/big" - - "github.com/rs/zerolog/log" - "github.com/spf13/cobra" - config "github.com/thirdweb-dev/indexer/configs" - "github.com/thirdweb-dev/indexer/internal/orchestrator" - "github.com/thirdweb-dev/indexer/internal/rpc" - "github.com/thirdweb-dev/indexer/internal/storage" - "github.com/thirdweb-dev/indexer/internal/worker" -) - -var ( - validateCmd = &cobra.Command{ - Use: "validate", - Short: "Validate blockchain data integrity", - Long: "Validate a range of blocks for data integrity issues including transaction roots and logs bloom verification", - Run: func(cmd *cobra.Command, args []string) { - RunValidate(cmd, args) - }, - } -) - -/** - * Validates a range of blocks (end and start are inclusive) for a given chain - * First argument is the start block number - * Second argument (optional) is the end block number - */ -func RunValidate(cmd *cobra.Command, args []string) { - if len(args) < 1 { - log.Fatal().Msg("Start block number is required") - } - startBlock, success := new(big.Int).SetString(args[0], 10) - if !success { - log.Fatal().Msg("Failed to parse start block number") - } - - var endBlock *big.Int - if len(args) > 1 { - endBlock, success = new(big.Int).SetString(args[1], 10) - if !success { - log.Fatal().Msg("Failed to parse end block number") - } - } - if endBlock == nil { - endBlock = startBlock - } - - rpcClient, err := rpc.Initialize() - if err != nil { - log.Fatal().Err(err).Msg("Failed to initialize RPC") - } - log.Info().Msgf("Running validation for chain %d", rpcClient.GetChainID()) - - s, err := storage.NewStorageConnector(&config.Cfg.Storage) - if err != nil { - log.Fatal().Err(err).Msg("Failed to initialize storage") - } - - validator := orchestrator.NewValidator(rpcClient, s, worker.NewWorker(rpcClient)) - - _, invalidBlocks, err := validator.ValidateBlockRange(startBlock, endBlock) - if err != nil { - log.Fatal().Err(err).Msg("Failed to validate blocks") - } - - if len(invalidBlocks) > 0 { - log.Info().Msgf("Found %d invalid blocks", len(invalidBlocks)) - for _, block := range invalidBlocks { - log.Info().Msgf("Invalid block: %s", block.Block.Number) - } - } else { - log.Info().Msg("No invalid blocks found") - } -} diff --git a/cmd/validate_and_fix.go b/cmd/validate_and_fix.go deleted file mode 100644 index 60e02fbc..00000000 --- a/cmd/validate_and_fix.go +++ /dev/null @@ -1,152 +0,0 @@ -package cmd - -import ( - "crypto/tls" - "fmt" - "math/big" - "strconv" - - "github.com/ClickHouse/clickhouse-go/v2" - "github.com/rs/zerolog/log" - "github.com/spf13/cobra" - config "github.com/thirdweb-dev/indexer/configs" - "github.com/thirdweb-dev/indexer/internal/orchestrator" - "github.com/thirdweb-dev/indexer/internal/rpc" - "github.com/thirdweb-dev/indexer/internal/storage" - "github.com/thirdweb-dev/indexer/internal/validation" - "github.com/thirdweb-dev/indexer/internal/worker" -) - -var ( - validateAndFixCmd = &cobra.Command{ - Use: "validateAndFix", - Short: "Validate and fix blockchain data", - Long: "Validate blockchain data in batches and automatically fix any issues found including duplicates, gaps, and invalid blocks", - Run: func(cmd *cobra.Command, args []string) { - RunValidateAndFix(cmd, args) - }, - } -) - -func RunValidateAndFix(cmd *cobra.Command, args []string) { - batchSize := big.NewInt(1000) - fixBatchSize := 0 // default is no batch size - if len(args) > 0 { - batchSizeFromArgs, err := strconv.Atoi(args[0]) - if err != nil { - log.Fatal().Err(err).Msg("Failed to parse batch size") - } - if batchSizeFromArgs < 1 { - batchSizeFromArgs = 1 - } - batchSize = big.NewInt(int64(batchSizeFromArgs)) - log.Info().Msgf("Using batch size %d from args", batchSize) - } - if len(args) > 1 { - fixBatchSizeFromArgs, err := strconv.Atoi(args[1]) - if err != nil { - log.Fatal().Err(err).Msg("Failed to parse fix batch size") - } - fixBatchSize = fixBatchSizeFromArgs - } - log.Debug().Msgf("Batch size: %d, fix batch size: %d", batchSize, fixBatchSize) - batchSize = new(big.Int).Sub(batchSize, big.NewInt(1)) // -1 because range ends are inclusive - - rpcClient, err := rpc.Initialize() - if err != nil { - log.Fatal().Err(err).Msg("Failed to initialize RPC") - } - log.Info().Msgf("Running validationAndFix for chain %d", rpcClient.GetChainID()) - - s, err := storage.NewStorageConnector(&config.Cfg.Storage) - if err != nil { - log.Fatal().Err(err).Msg("Failed to initialize storage") - } - cursor, err := validation.InitCursor(rpcClient.GetChainID(), s) - if err != nil { - log.Fatal().Err(err).Msg("Failed to initialize cursor") - } - log.Debug().Msgf("Cursor initialized for chain %d, starting from block %d", rpcClient.GetChainID(), cursor.LastScannedBlockNumber) - - conn, err := clickhouse.Open(&clickhouse.Options{ - Addr: []string{fmt.Sprintf("%s:%d", config.Cfg.Storage.Main.Clickhouse.Host, config.Cfg.Storage.Main.Clickhouse.Port)}, - Protocol: clickhouse.Native, - TLS: &tls.Config{ - MinVersion: tls.VersionTLS12, - }, - Auth: clickhouse.Auth{ - Username: config.Cfg.Storage.Main.Clickhouse.Username, - Password: config.Cfg.Storage.Main.Clickhouse.Password, - }, - Settings: func() clickhouse.Settings { - settings := clickhouse.Settings{ - "do_not_merge_across_partitions_select_final": "1", - "use_skip_indexes_if_final": "1", - "optimize_move_to_prewhere_if_final": "1", - "async_insert": "1", - "wait_for_async_insert": "1", - } - return settings - }(), - }) - if err != nil { - log.Fatal().Err(err).Msg("Failed to connect to ClickHouse") - } - defer conn.Close() - - startBlock := new(big.Int).Add(cursor.LastScannedBlockNumber, big.NewInt(1)) - - for startBlock.Cmp(cursor.MaxBlockNumber) <= 0 { - batchEndBlock := new(big.Int).Add(startBlock, batchSize) - if batchEndBlock.Cmp(cursor.MaxBlockNumber) > 0 { - batchEndBlock = new(big.Int).Set(cursor.MaxBlockNumber) - } - - log.Info().Msgf("Validating batch of blocks from %s to %s", startBlock.String(), batchEndBlock.String()) - err := validateAndFixRange(rpcClient, s, conn, startBlock, batchEndBlock, fixBatchSize) - if err != nil { - log.Fatal().Err(err).Msgf("failed to validate and fix range %v-%v", startBlock, batchEndBlock) - } - - startBlock = new(big.Int).Add(batchEndBlock, big.NewInt(1)) - cursor.Update(batchEndBlock) - } -} - -/** - * Validates a range of blocks (end and start are inclusive) for a given chain and fixes any problems it finds - */ -func validateAndFixRange(rpcClient rpc.IRPCClient, s storage.IStorage, conn clickhouse.Conn, startBlock *big.Int, endBlock *big.Int, fixBatchSize int) error { - validator := orchestrator.NewValidator(rpcClient, s, worker.NewWorker(rpcClient)) - - chainId := rpcClient.GetChainID() - err := validation.FindAndRemoveDuplicates(conn, chainId, startBlock, endBlock) - if err != nil { - return fmt.Errorf("failed to find and fix duplicates: %w", err) - } - - err = validator.FindAndFixGaps(startBlock, endBlock) - if err != nil { - return fmt.Errorf("failed to find and fix gaps: %w", err) - } - - _, invalidBlocks, err := validator.ValidateBlockRange(startBlock, endBlock) - if err != nil { - return fmt.Errorf("failed to validate and fix blocks: %w", err) - } - - invalidBlockNumbers := make([]*big.Int, 0) - for _, blockData := range invalidBlocks { - invalidBlockNumbers = append(invalidBlockNumbers, blockData.Block.Number) - } - - if len(invalidBlocks) > 0 { - err = validator.FixBlocks(invalidBlockNumbers, fixBatchSize) - if err != nil { - return fmt.Errorf("failed to fix blocks: %w", err) - } - } - - log.Debug().Msgf("ValidationAndFix complete for range %v-%v", startBlock, endBlock) - return nil -} diff --git a/configs/config.go b/configs/config.go index 70ac4cf0..c94cc871 100644 --- a/configs/config.go +++ b/configs/config.go @@ -1,11 +1,8 @@ package config import ( - "encoding/json" "fmt" - "os" "strings" - "time" "github.com/caarlos0/env" "github.com/joho/godotenv" @@ -18,138 +15,6 @@ type LogConfig struct { Prettify bool `mapstructure:"prettify"` } -type PollerConfig struct { - Enabled bool `mapstructure:"enabled"` - ParallelPollers int `mapstructure:"parallelPollers"` - S3 *S3SourceConfig `mapstructure:"s3"` -} - -type CommitterConfig struct { - Enabled bool `mapstructure:"enabled"` - BlocksPerCommit int `mapstructure:"blocksPerCommit"` - FromBlock int `mapstructure:"fromBlock"` - ToBlock int `mapstructure:"toBlock"` -} - -type ReorgHandlerConfig struct { - Enabled bool `mapstructure:"enabled"` - Interval int `mapstructure:"interval"` - BlocksPerScan int `mapstructure:"blocksPerScan"` - FromBlock int `mapstructure:"fromBlock"` - ForceFromBlock bool `mapstructure:"forceFromBlock"` -} - -type StorageConfig struct { - Orchestrator StorageOrchestratorConfig `mapstructure:"orchestrator"` - Staging StorageStagingConfig `mapstructure:"staging"` - Main StorageMainConfig `mapstructure:"main"` -} - -type StorageOrchestratorConfig struct { - Type string `mapstructure:"type"` - Clickhouse *ClickhouseConfig `mapstructure:"clickhouse"` - Postgres *PostgresConfig `mapstructure:"postgres"` - Redis *RedisConfig `mapstructure:"redis"` - Badger *BadgerConfig `mapstructure:"badger"` - Pebble *PebbleConfig `mapstructure:"pebble"` -} - -type StorageStagingConfig struct { - Type string `mapstructure:"type"` - Clickhouse *ClickhouseConfig `mapstructure:"clickhouse"` - Postgres *PostgresConfig `mapstructure:"postgres"` - Badger *BadgerConfig `mapstructure:"badger"` - Pebble *PebbleConfig `mapstructure:"pebble"` -} - -type StorageMainConfig struct { - Type string `mapstructure:"type"` - Clickhouse *ClickhouseConfig `mapstructure:"clickhouse"` - Postgres *PostgresConfig `mapstructure:"postgres"` - Kafka *KafkaConfig `mapstructure:"kafka"` - S3 *S3StorageConfig `mapstructure:"s3"` -} - -type BadgerConfig struct { - Path string `mapstructure:"path"` -} - -type PebbleConfig struct { - Path string `mapstructure:"path"` -} - -type S3Config struct { - Bucket string `mapstructure:"bucket"` - Region string `mapstructure:"region"` - Prefix string `mapstructure:"prefix"` - AccessKeyID string `mapstructure:"accessKeyId"` - SecretAccessKey string `mapstructure:"secretAccessKey"` - Endpoint string `mapstructure:"endpoint"` -} - -type S3StorageConfig struct { - S3Config `mapstructure:",squash"` - Format string `mapstructure:"format"` - Parquet *ParquetConfig `mapstructure:"parquet"` - // Buffering configuration - BufferSize int64 `mapstructure:"bufferSizeMB"` // Target buffer size in MB before flush - BufferTimeout int `mapstructure:"bufferTimeoutSeconds"` // Max time in seconds before flush - MaxBlocksPerFile int `mapstructure:"maxBlocksPerFile"` // Max blocks per parquet file (0 = no limit, only size/timeout triggers) - FlushTimeout int `mapstructure:"flushTimeoutSeconds"` // Timeout in seconds for flush operations (default: 60) -} - -type ParquetConfig struct { - Compression string `mapstructure:"compression"` - RowGroupSize int64 `mapstructure:"rowGroupSize"` - PageSize int64 `mapstructure:"pageSize"` -} - -type TableConfig struct { - DefaultSelectFields []string `mapstructure:"defaultSelectFields"` - TableName string `mapstructure:"tableName"` -} - -type TableOverrideConfig map[string]TableConfig - -type ClickhouseConfig struct { - Host string `mapstructure:"host"` - Port int `mapstructure:"port"` - Username string `mapstructure:"username"` - Password string `mapstructure:"password"` - Database string `mapstructure:"database"` - DisableTLS bool `mapstructure:"disableTLS"` - AsyncInsert bool `mapstructure:"asyncInsert"` - MaxRowsPerInsert int `mapstructure:"maxRowsPerInsert"` - MaxOpenConns int `mapstructure:"maxOpenConns"` - MaxIdleConns int `mapstructure:"maxIdleConns"` - ChainBasedConfig map[string]TableOverrideConfig `mapstructure:"chainBasedConfig"` - EnableParallelViewProcessing bool `mapstructure:"enableParallelViewProcessing"` - MaxQueryTime int `mapstructure:"maxQueryTime"` - MaxMemoryUsage int `mapstructure:"maxMemoryUsage"` - EnableCompression bool `mapstructure:"enableCompression"` -} - -type PostgresConfig struct { - Host string `mapstructure:"host"` - Port int `mapstructure:"port"` - Username string `mapstructure:"username"` - Password string `mapstructure:"password"` - Database string `mapstructure:"database"` - SSLMode string `mapstructure:"sslMode"` - MaxOpenConns int `mapstructure:"maxOpenConns"` - MaxIdleConns int `mapstructure:"maxIdleConns"` - MaxConnLifetime int `mapstructure:"maxConnLifetime"` - ConnectTimeout int `mapstructure:"connectTimeout"` -} - -type RedisConfig struct { - Host string `mapstructure:"host"` - Port int `mapstructure:"port"` - Password string `mapstructure:"password"` - DB int `mapstructure:"db"` - EnableTLS bool `mapstructure:"enableTLS"` -} - type KafkaConfig struct { Brokers string `mapstructure:"brokers"` Username string `mapstructure:"username"` @@ -176,137 +41,43 @@ type RPCConfig struct { ChainID string `mapstructure:"chainId"` } -type BasicAuthConfig struct { - Username string `mapstructure:"username"` - Password string `mapstructure:"password"` -} - -type ThirdwebConfig struct { - ClientId string `mapstructure:"clientId"` -} - -type ContractApiRequestConfig struct { - MaxIdleConns int `mapstructure:"maxIdleConns"` - MaxIdleConnsPerHost int `mapstructure:"maxIdleConnsPerHost"` - MaxConnsPerHost int `mapstructure:"maxConnsPerHost"` - IdleConnTimeout int `mapstructure:"idleConnTimeout"` - DisableCompression bool `mapstructure:"disableCompression"` - Timeout int `mapstructure:"timeout"` -} - -type APIConfig struct { - Host string `mapstructure:"host"` - BasicAuth BasicAuthConfig `mapstructure:"basicAuth"` - ThirdwebContractApi string `mapstructure:"thirdwebContractApi"` - ContractApiRequest ContractApiRequestConfig `mapstructure:"contractApiRequest"` - AbiDecodingEnabled bool `mapstructure:"abiDecodingEnabled"` - Thirdweb ThirdwebConfig `mapstructure:"thirdweb"` -} - -type BlockPublisherConfig struct { - Enabled bool `mapstructure:"enabled"` - TopicName string `mapstructure:"topicName"` -} - -type TransactionPublisherConfig struct { - Enabled bool `mapstructure:"enabled"` - TopicName string `mapstructure:"topicName"` - ToFilter []string `mapstructure:"toFilter"` - FromFilter []string `mapstructure:"fromFilter"` -} - -type TracePublisherConfig struct { - Enabled bool `mapstructure:"enabled"` - TopicName string `mapstructure:"topicName"` -} - -type EventPublisherConfig struct { - Enabled bool `mapstructure:"enabled"` - TopicName string `mapstructure:"topicName"` - AddressFilter []string `mapstructure:"addressFilter"` - Topic0Filter []string `mapstructure:"topic0Filter"` -} - -type PublisherConfig struct { - Enabled bool `mapstructure:"enabled"` - Mode string `mapstructure:"mode"` - Brokers string `mapstructure:"brokers"` - Username string `mapstructure:"username"` - Password string `mapstructure:"password"` - EnableTLS bool `mapstructure:"enableTLS"` - Blocks BlockPublisherConfig `mapstructure:"blocks"` - Transactions TransactionPublisherConfig `mapstructure:"transactions"` - Traces TracePublisherConfig `mapstructure:"traces"` - Events EventPublisherConfig `mapstructure:"events"` -} - -type S3SourceConfig struct { - S3Config `mapstructure:",squash"` - CacheDir string `mapstructure:"cacheDir"` - MetadataTTL time.Duration `mapstructure:"metadataTTL"` - FileCacheTTL time.Duration `mapstructure:"fileCacheTTL"` - MaxCacheSize int64 `mapstructure:"maxCacheSize"` - CleanupInterval time.Duration `mapstructure:"cleanupInterval"` - MaxConcurrentDownloads int `mapstructure:"maxConcurrentDownloads"` -} - -type ValidationConfig struct { - Mode string `mapstructure:"mode"` // "disabled", "minimal", "strict" -} - -type MigratorConfig struct { - Destination StorageMainConfig `mapstructure:"destination"` - StartBlock uint `mapstructure:"startBlock"` - EndBlock uint `mapstructure:"endBlock"` - BatchSize uint `mapstructure:"batchSize"` - WorkerCount uint `mapstructure:"workerCount"` -} - type Config struct { - RPC RPCConfig `mapstructure:"rpc"` - Log LogConfig `mapstructure:"log"` - Poller PollerConfig `mapstructure:"poller"` - Committer CommitterConfig `mapstructure:"committer"` - ReorgHandler ReorgHandlerConfig `mapstructure:"reorgHandler"` - Storage StorageConfig `mapstructure:"storage"` - API APIConfig `mapstructure:"api"` - Publisher PublisherConfig `mapstructure:"publisher"` - Validation ValidationConfig `mapstructure:"validation"` - Migrator MigratorConfig `mapstructure:"migrator"` - - ZeetProjectName string `env:"ZEET_PROJECT_NAME" envDefault:"insight-indexer"` - ZeetDeploymentId string `env:"ZEET_DEPLOYMENT_ID"` - ZeetClusterId string `env:"ZEET_CLUSTER_ID"` - CommitterClickhouseDatabase string `env:"COMMITTER_CLICKHOUSE_DATABASE"` - CommitterClickhouseHost string `env:"COMMITTER_CLICKHOUSE_HOST"` - CommitterClickhousePort int `env:"COMMITTER_CLICKHOUSE_PORT"` - CommitterClickhouseUsername string `env:"COMMITTER_CLICKHOUSE_USERNAME"` - CommitterClickhousePassword string `env:"COMMITTER_CLICKHOUSE_PASSWORD"` - CommitterClickhouseEnableTLS bool `env:"COMMITTER_CLICKHOUSE_ENABLE_TLS" envDefault:"true"` - CommitterKafkaBrokers string `env:"COMMITTER_KAFKA_BROKERS"` - CommitterKafkaUsername string `env:"COMMITTER_KAFKA_USERNAME"` - CommitterKafkaPassword string `env:"COMMITTER_KAFKA_PASSWORD"` - CommitterKafkaEnableTLS bool `env:"COMMITTER_KAFKA_ENABLE_TLS" envDefault:"true"` - CommitterMaxMemoryMB int `env:"COMMITTER_MAX_MEMORY_MB" envDefault:"512"` - CommitterCompressionThresholdMB int `env:"COMMITTER_COMPRESSION_THRESHOLD_MB" envDefault:"50"` - CommitterKafkaBatchSize int `env:"COMMITTER_KAFKA_BATCH_SIZE" envDefault:"500"` - StagingS3Bucket string `env:"STAGING_S3_BUCKET" envDefault:"thirdweb-insight-production"` - StagingS3Region string `env:"STAGING_S3_REGION" envDefault:"us-west-2"` - StagingS3AccessKeyID string `env:"STAGING_S3_ACCESS_KEY_ID"` - StagingS3SecretAccessKey string `env:"STAGING_S3_SECRET_ACCESS_KEY"` - StagingS3MaxParallelFileDownload int `env:"STAGING_S3_MAX_PARALLEL_FILE_DOWNLOAD" envDefault:"2"` - BackfillStartBlock uint64 `env:"BACKFILL_START_BLOCK"` - BackfillEndBlock uint64 `env:"BACKFILL_END_BLOCK"` - RPCNumParallelCalls uint64 `env:"RPC_NUM_PARALLEL_CALLS" envDefault:"20"` - RPCBatchSize uint64 `env:"RPC_BATCH_SIZE" envDefault:"10"` - RPCBatchMaxMemoryUsageMB uint64 `env:"RPC_BATCH_MAX_MEMORY_USAGE_MB" envDefault:"32"` - ParquetMaxFileSizeMB int64 `env:"PARQUET_MAX_FILE_SIZE_MB" envDefault:"512"` - InsightServiceUrl string `env:"INSIGHT_SERVICE_URL" envDefault:"https://insight.thirdweb.com"` - InsightServiceApiKey string `env:"INSIGHT_SERVICE_API_KEY"` - RedisAddr string `env:"REDIS_ADDR" envDefault:"localhost:6379"` - RedisUsername string `env:"REDIS_USERNAME"` - RedisPassword string `env:"REDIS_PASSWORD"` - RedisDB int `env:"REDIS_DB" envDefault:"0"` + RPC RPCConfig `mapstructure:"rpc"` + Log LogConfig `mapstructure:"log"` + ZeetProjectName string `env:"ZEET_PROJECT_NAME" envDefault:"insight-indexer"` + ZeetDeploymentId string `env:"ZEET_DEPLOYMENT_ID"` + ZeetClusterId string `env:"ZEET_CLUSTER_ID"` + CommitterClickhouseDatabase string `env:"COMMITTER_CLICKHOUSE_DATABASE"` + CommitterClickhouseHost string `env:"COMMITTER_CLICKHOUSE_HOST"` + CommitterClickhousePort int `env:"COMMITTER_CLICKHOUSE_PORT"` + CommitterClickhouseUsername string `env:"COMMITTER_CLICKHOUSE_USERNAME"` + CommitterClickhousePassword string `env:"COMMITTER_CLICKHOUSE_PASSWORD"` + CommitterClickhouseEnableTLS bool `env:"COMMITTER_CLICKHOUSE_ENABLE_TLS" envDefault:"true"` + CommitterKafkaBrokers string `env:"COMMITTER_KAFKA_BROKERS"` + CommitterKafkaUsername string `env:"COMMITTER_KAFKA_USERNAME"` + CommitterKafkaPassword string `env:"COMMITTER_KAFKA_PASSWORD"` + CommitterKafkaEnableTLS bool `env:"COMMITTER_KAFKA_ENABLE_TLS" envDefault:"true"` + CommitterMaxMemoryMB int `env:"COMMITTER_MAX_MEMORY_MB" envDefault:"512"` + CommitterCompressionThresholdMB int `env:"COMMITTER_COMPRESSION_THRESHOLD_MB" envDefault:"50"` + CommitterKafkaBatchSize int `env:"COMMITTER_KAFKA_BATCH_SIZE" envDefault:"500"` + StagingS3Bucket string `env:"STAGING_S3_BUCKET" envDefault:"thirdweb-insight-production"` + StagingS3Region string `env:"STAGING_S3_REGION" envDefault:"us-west-2"` + StagingS3AccessKeyID string `env:"STAGING_S3_ACCESS_KEY_ID"` + StagingS3SecretAccessKey string `env:"STAGING_S3_SECRET_ACCESS_KEY"` + StagingS3MaxParallelFileDownload int `env:"STAGING_S3_MAX_PARALLEL_FILE_DOWNLOAD" envDefault:"2"` + BackfillStartBlock uint64 `env:"BACKFILL_START_BLOCK"` + BackfillEndBlock uint64 `env:"BACKFILL_END_BLOCK"` + RPCNumParallelCalls uint64 `env:"RPC_NUM_PARALLEL_CALLS" envDefault:"20"` + RPCBatchSize uint64 `env:"RPC_BATCH_SIZE" envDefault:"10"` + RPCBatchMaxMemoryUsageMB uint64 `env:"RPC_BATCH_MAX_MEMORY_USAGE_MB" envDefault:"32"` + ParquetMaxFileSizeMB int64 `env:"PARQUET_MAX_FILE_SIZE_MB" envDefault:"512"` + InsightServiceUrl string `env:"INSIGHT_SERVICE_URL" envDefault:"https://insight.thirdweb.com"` + InsightServiceApiKey string `env:"INSIGHT_SERVICE_API_KEY"` + RedisAddr string `env:"REDIS_ADDR" envDefault:"localhost:6379"` + RedisUsername string `env:"REDIS_USERNAME"` + RedisPassword string `env:"REDIS_PASSWORD"` + RedisDB int `env:"REDIS_DB" envDefault:"0"` + ValidationMode string `env:"VALIDATION_MODE" envDefault:"minimal"` } var Cfg Config @@ -352,48 +123,5 @@ func LoadConfig(cfgFile string) error { return fmt.Errorf("error unmarshalling config: %v", err) } - err = setCustomJSONConfigs() - if err != nil { - return fmt.Errorf("error setting custom JSON configs: %v", err) - } - - // Add debug logging - if clickhouse := Cfg.Storage.Main.Clickhouse; clickhouse != nil { - log.Debug(). - Interface("chainConfig", clickhouse.ChainBasedConfig). - Msgf("Loaded chain config %v", clickhouse.ChainBasedConfig) - } - - return nil -} - -func setCustomJSONConfigs() error { - if chainConfigJSON := os.Getenv("STORAGE_MAIN_CLICKHOUSE_CHAINBASEDCONFIG"); chainConfigJSON != "" { - var mainChainConfig map[string]TableOverrideConfig - if err := json.Unmarshal([]byte(chainConfigJSON), &mainChainConfig); err != nil { - return fmt.Errorf("error parsing main chainBasedConfig JSON: %v", err) - } - if Cfg.Storage.Main.Clickhouse != nil { - Cfg.Storage.Main.Clickhouse.ChainBasedConfig = mainChainConfig - } - } - if chainConfigJSON := os.Getenv("STORAGE_STAGING_CLICKHOUSE_CHAINBASEDCONFIG"); chainConfigJSON != "" { - var stagingChainConfig map[string]TableOverrideConfig - if err := json.Unmarshal([]byte(chainConfigJSON), &stagingChainConfig); err != nil { - return fmt.Errorf("error parsing staging chainBasedConfig JSON: %v", err) - } - if Cfg.Storage.Staging.Clickhouse != nil { - Cfg.Storage.Staging.Clickhouse.ChainBasedConfig = stagingChainConfig - } - } - if chainConfigJSON := os.Getenv("STORAGE_ORCHESTRATOR_CLICKHOUSE_CHAINBASEDCONFIG"); chainConfigJSON != "" { - var orchestratorChainConfig map[string]TableOverrideConfig - if err := json.Unmarshal([]byte(chainConfigJSON), &orchestratorChainConfig); err != nil { - return fmt.Errorf("error parsing orchestrator chainBasedConfig JSON: %v", err) - } - if Cfg.Storage.Main.Clickhouse != nil { - Cfg.Storage.Main.Clickhouse.ChainBasedConfig = orchestratorChainConfig - } - } return nil } diff --git a/configs/test_config.yml b/configs/test_config.yml deleted file mode 100644 index a817c6db..00000000 --- a/configs/test_config.yml +++ /dev/null @@ -1,78 +0,0 @@ -rpc: - url: https://1.rpc.thirdweb.com - chainId: "1" - -log: - level: debug - prettify: true - -poller: - enabled: true - interval: 2000 - blocksPerPoll: 100 - -committer: - enabled: true - interval: 2000 - blocksPerCommit: 100 - -storage: - main: - clickhouse: - host: localhost - port: 9440 - username: admin - password: password - database: default - disableTLS: true - asyncInsert: true - maxRowsPerInsert: 1000 - maxOpenConns: 50 - maxIdleConns: 10 - - staging: - postgres: - host: localhost - port: 5432 - username: admin - password: password - database: insight - sslMode: disable - maxOpenConns: 50 - maxIdleConns: 10 - maxConnLifetime: 300 - connectTimeout: 10 - - orchestrator: - postgres: - host: localhost - port: 5432 - username: admin - password: password - database: insight - sslMode: disable - maxOpenConns: 50 - maxIdleConns: 10 - maxConnLifetime: 300 - connectTimeout: 10 - -api: - host: localhost:3000 - basicAuth: - username: admin - password: admin - -publisher: - enabled: false - mode: default - -validation: - mode: minimal - -# Work mode configuration - Controls system behavior based on blockchain state -workMode: - # Interval in minutes to check if system should switch between live/historical mode - checkIntervalMinutes: 10 - # Block number threshold to determine if system is in "live mode" (near chain head) - # Setting this very high forces backfill mode for testing - liveModeThreshold: 1000000 \ No newline at end of file diff --git a/internal/common/abi.go b/internal/common/abi.go deleted file mode 100644 index f8b9e5ff..00000000 --- a/internal/common/abi.go +++ /dev/null @@ -1,271 +0,0 @@ -package common - -import ( - "fmt" - "net/http" - "regexp" - "strings" - "sync" - "time" - - "github.com/rs/zerolog/log" - config "github.com/thirdweb-dev/indexer/configs" - - "github.com/ethereum/go-ethereum/accounts/abi" -) - -var ( - httpClient *http.Client - httpClientOnce sync.Once -) - -func getHTTPClient() *http.Client { - httpClientOnce.Do(func() { - httpClient = &http.Client{ - Transport: &http.Transport{ - MaxIdleConns: config.Cfg.API.ContractApiRequest.MaxIdleConns, - MaxIdleConnsPerHost: config.Cfg.API.ContractApiRequest.MaxIdleConnsPerHost, - MaxConnsPerHost: config.Cfg.API.ContractApiRequest.MaxConnsPerHost, - IdleConnTimeout: time.Duration(config.Cfg.API.ContractApiRequest.IdleConnTimeout) * time.Second, - DisableCompression: config.Cfg.API.ContractApiRequest.DisableCompression, - }, - Timeout: time.Duration(config.Cfg.API.ContractApiRequest.Timeout) * time.Second, - } - }) - return httpClient -} - -func GetABIForContractWithCache(chainId string, contract string, abiCache *sync.Map) *abi.ABI { - if abiValue, ok := abiCache.Load(contract); ok { - if abi, ok := abiValue.(*abi.ABI); ok { - return abi - } - } - - abiResult, err := GetABIForContract(chainId, contract) - if err != nil { - abiCache.Store(contract, nil) - return nil - } - abiCache.Store(contract, abiResult) - return abiResult -} - -func GetABIForContract(chainId string, contract string) (*abi.ABI, error) { - url := fmt.Sprintf("%s/abi/%s/%s", config.Cfg.API.ThirdwebContractApi, chainId, contract) - - resp, err := getHTTPClient().Get(url) - if err != nil { - return nil, fmt.Errorf("failed to get contract abi: %v", err) - } - defer resp.Body.Close() - - if resp.StatusCode != http.StatusOK { - return nil, fmt.Errorf("failed to get contract abi: unexpected status code %d", resp.StatusCode) - } - - abi, err := abi.JSON(resp.Body) - if err != nil { - log.Warn().Err(err).Str("contract", contract).Str("chainId", chainId).Msg("Failed to parse contract ABI") - return nil, fmt.Errorf("failed to load contract abi: %v", err) - } - return &abi, nil -} - -func ConstructEventABI(signature string) (*abi.Event, error) { - // Regex to extract the event name and parameters - regex := regexp.MustCompile(`^(\w+)\s*\((.*)\)$`) - matches := regex.FindStringSubmatch(strings.TrimSpace(signature)) - if len(matches) != 3 { - return nil, fmt.Errorf("invalid event signature format") - } - - eventName := matches[1] - parameters := matches[2] - - inputs, err := parseParamsToAbiArguments(parameters) - if err != nil { - return nil, fmt.Errorf("failed to parse params to abi arguments '%s': %v", parameters, err) - } - - event := abi.NewEvent(eventName, eventName, false, inputs) - - return &event, nil -} - -func ConstructFunctionABI(signature string) (*abi.Method, error) { - regex := regexp.MustCompile(`^(\w+)\s*\((.*)\)$`) - matches := regex.FindStringSubmatch(strings.TrimSpace(signature)) - if len(matches) != 3 { - return nil, fmt.Errorf("invalid function signature format") - } - - functionName := matches[1] - params := matches[2] - - inputs, err := parseParamsToAbiArguments(params) - if err != nil { - return nil, fmt.Errorf("failed to parse params to abi arguments '%s': %v", params, err) - } - - function := abi.NewMethod(functionName, functionName, abi.Function, "", false, false, inputs, nil) - - return &function, nil -} - -func parseParamsToAbiArguments(params string) (abi.Arguments, error) { - paramList := splitParams(strings.TrimSpace(params)) - var inputs abi.Arguments - for idx, param := range paramList { - arg, err := parseParamToAbiArgument(param, fmt.Sprintf("%d", idx)) - if err != nil { - return nil, fmt.Errorf("failed to parse param to arg '%s': %v", param, err) - } - inputs = append(inputs, *arg) - } - return inputs, nil -} - -/** - * Splits a string of parameters into a list of parameters - */ -func splitParams(params string) []string { - var result []string - depth := 0 - current := "" - for _, r := range params { - switch r { - case ',': - if depth == 0 { - result = append(result, strings.TrimSpace(current)) - current = "" - continue - } - case '(': - depth++ - case ')': - depth-- - } - current += string(r) - } - if strings.TrimSpace(current) != "" { - result = append(result, strings.TrimSpace(current)) - } - return result -} - -func parseParamToAbiArgument(param string, fallbackName string) (*abi.Argument, error) { - argName, paramType, indexed, err := getArgNameAndType(param, fallbackName) - if err != nil { - return nil, fmt.Errorf("failed to get arg name and type '%s': %v", param, err) - } - if isTuple(paramType) { - argType, err := marshalTupleParamToArgumentType(paramType) - if err != nil { - return nil, fmt.Errorf("failed to marshal tuple: %v", err) - } - return &abi.Argument{ - Name: argName, - Type: argType, - Indexed: indexed, - }, nil - } else { - argType, err := abi.NewType(paramType, paramType, nil) - if err != nil { - return nil, fmt.Errorf("failed to parse type '%s': %v", paramType, err) - } - return &abi.Argument{ - Name: argName, - Type: argType, - Indexed: indexed, - }, nil - } -} - -func getArgNameAndType(param string, fallbackName string) (name string, paramType string, indexed bool, err error) { - param, indexed = checkIfParamIsIndexed(param) - if isTuple(param) { - lastParenIndex := strings.LastIndex(param, ")") - if lastParenIndex == -1 { - return "", "", false, fmt.Errorf("invalid tuple format") - } - if len(param)-1 == lastParenIndex { - return fallbackName, param, indexed, nil - } - paramsEndIdx := lastParenIndex + 1 - if strings.HasPrefix(param[paramsEndIdx:], "[]") { - paramsEndIdx = lastParenIndex + 3 - } - return strings.TrimSpace(param[paramsEndIdx:]), param[:paramsEndIdx], indexed, nil - } else { - tokens := strings.Fields(param) - if len(tokens) == 1 { - return fallbackName, strings.TrimSpace(tokens[0]), indexed, nil - } - return strings.TrimSpace(tokens[len(tokens)-1]), strings.Join(tokens[:len(tokens)-1], " "), indexed, nil - } -} - -func checkIfParamIsIndexed(param string) (string, bool) { - tokens := strings.Fields(param) - indexed := false - for i, token := range tokens { - if token == "indexed" || strings.HasPrefix(token, "index_topic_") { - tokens = append(tokens[:i], tokens[i+1:]...) - indexed = true - break - } - } - param = strings.Join(tokens, " ") - return param, indexed -} - -func isTuple(param string) bool { - return strings.HasPrefix(param, "(") -} - -func marshalTupleParamToArgumentType(paramType string) (abi.Type, error) { - typ := "tuple" - isSlice := strings.HasSuffix(paramType, "[]") - strippedParamType := strings.TrimPrefix(paramType, "(") - if isSlice { - strippedParamType = strings.TrimSuffix(strippedParamType, "[]") - typ = "tuple[]" - } - strippedParamType = strings.TrimSuffix(strippedParamType, ")") - components, err := marshalParamArguments(strippedParamType) - if err != nil { - return abi.Type{}, fmt.Errorf("failed to marshal tuple: %v", err) - } - return abi.NewType(typ, typ, components) -} - -func marshalParamArguments(param string) ([]abi.ArgumentMarshaling, error) { - paramList := splitParams(param) - components := []abi.ArgumentMarshaling{} - for idx, param := range paramList { - argName, paramType, indexed, err := getArgNameAndType(param, fmt.Sprintf("field%d", idx)) - if err != nil { - return nil, fmt.Errorf("failed to get arg name and type '%s': %v", param, err) - } - if isTuple(paramType) { - subComponents, err := marshalParamArguments(paramType[1 : len(paramType)-1]) - if err != nil { - return nil, fmt.Errorf("failed to marshal tuple: %v", err) - } - components = append(components, abi.ArgumentMarshaling{ - Type: "tuple", - Name: argName, - Components: subComponents, - Indexed: indexed, - }) - } else { - components = append(components, abi.ArgumentMarshaling{ - Type: paramType, - Name: argName, - Indexed: indexed, - }) - } - } - return components, nil -} diff --git a/internal/common/balances.go b/internal/common/balances.go deleted file mode 100644 index e3089efd..00000000 --- a/internal/common/balances.go +++ /dev/null @@ -1,14 +0,0 @@ -package common - -import ( - "math/big" -) - -type TokenBalance struct { - ChainId *big.Int `json:"chain_id" ch:"chain_id"` - TokenType string `json:"token_type" ch:"token_type"` - TokenAddress string `json:"token_address" ch:"address"` - Owner string `json:"owner" ch:"owner"` - TokenId *big.Int `json:"token_id" ch:"token_id"` - Balance *big.Int `json:"balance" ch:"balance"` -} diff --git a/internal/common/block.go b/internal/common/block.go index f0cd0194..9d6b0212 100644 --- a/internal/common/block.go +++ b/internal/common/block.go @@ -32,32 +32,6 @@ type Block struct { InsertTimestamp time.Time `json:"insert_timestamp" ch:"insert_timestamp"` } -// BlockModel represents a simplified Block structure for Swagger documentation -type BlockModel struct { - ChainId string `json:"chain_id"` - BlockNumber uint64 `json:"block_number"` - BlockHash string `json:"block_hash"` - ParentHash string `json:"parent_hash"` - BlockTimestamp uint64 `json:"block_timestamp"` - Nonce string `json:"nonce"` - Sha3Uncles string `json:"sha3_uncles"` - MixHash string `json:"mix_hash"` - Miner string `json:"miner"` - StateRoot string `json:"state_root"` - TransactionsRoot string `json:"transactions_root"` - ReceiptsRoot string `json:"receipts_root"` - LogsBloom string `json:"logs_bloom"` - Size uint64 `json:"size"` - ExtraData string `json:"extra_data"` - Difficulty string `json:"difficulty"` - TotalDifficulty string `json:"total_difficulty"` - TransactionCount uint64 `json:"transaction_count"` - GasLimit string `json:"gas_limit"` - GasUsed string `json:"gas_used"` - WithdrawalsRoot string `json:"withdrawals_root"` - BaseFeePerGas uint64 `json:"base_fee_per_gas"` -} - type BlockData struct { Block Block `json:"block"` Transactions []Transaction `json:"transactions"` @@ -65,56 +39,4 @@ type BlockData struct { Traces []Trace `json:"traces"` } -type BlockHeader struct { - Number *big.Int `json:"number"` - Hash string `json:"hash"` - ParentHash string `json:"parent_hash"` -} - type RawBlock = map[string]interface{} - -func (b *Block) Serialize() BlockModel { - return BlockModel{ - ChainId: b.ChainId.String(), - BlockNumber: b.Number.Uint64(), - BlockHash: b.Hash, - ParentHash: b.ParentHash, - BlockTimestamp: uint64(b.Timestamp.Unix()), - Nonce: b.Nonce, - Sha3Uncles: b.Sha3Uncles, - MixHash: b.MixHash, - Miner: b.Miner, - StateRoot: b.StateRoot, - TransactionsRoot: b.TransactionsRoot, - ReceiptsRoot: b.ReceiptsRoot, - LogsBloom: b.LogsBloom, - Size: b.Size, - ExtraData: b.ExtraData, - Difficulty: b.Difficulty.String(), - TotalDifficulty: b.TotalDifficulty.String(), - TransactionCount: b.TransactionCount, - GasLimit: b.GasLimit.String(), - GasUsed: b.GasUsed.String(), - WithdrawalsRoot: b.WithdrawalsRoot, - BaseFeePerGas: b.BaseFeePerGas, - } -} - -func (b *BlockData) Serialize() BlockData { - data := BlockData{ - Block: b.Block, - Transactions: b.Transactions, - Logs: b.Logs, - Traces: b.Traces, - } - if data.Transactions == nil { - data.Transactions = []Transaction{} - } - if data.Logs == nil { - data.Logs = []Log{} - } - if data.Traces == nil { - data.Traces = []Trace{} - } - return data -} diff --git a/internal/common/log.go b/internal/common/log.go index 58093bce..7b5dec65 100644 --- a/internal/common/log.go +++ b/internal/common/log.go @@ -1,15 +1,8 @@ package common import ( - "encoding/hex" - "fmt" "math/big" - "sync" "time" - - "github.com/ethereum/go-ethereum/accounts/abi" - gethCommon "github.com/ethereum/go-ethereum/common" - "github.com/rs/zerolog/log" ) type Log struct { @@ -30,247 +23,6 @@ type Log struct { InsertTimestamp time.Time `json:"insert_timestamp" ch:"insert_timestamp"` } -func (l *Log) GetTopic(index int) (string, error) { - if index == 0 { - return l.Topic0, nil - } else if index == 1 { - return l.Topic1, nil - } else if index == 2 { - return l.Topic2, nil - } else if index == 3 { - return l.Topic3, nil - } - return "", fmt.Errorf("invalid topic index: %d", index) -} - -// LogModel represents a simplified Log structure for Swagger documentation -type LogModel struct { - ChainId string `json:"chain_id"` - BlockNumber uint64 `json:"block_number"` - BlockHash string `json:"block_hash"` - BlockTimestamp uint64 `json:"block_timestamp"` - TransactionHash string `json:"transaction_hash"` - TransactionIndex uint64 `json:"transaction_index"` - LogIndex uint64 `json:"log_index"` - Address string `json:"address"` - Data string `json:"data"` - Topics []string `json:"topics" swaggertype:"array,string"` -} - -type DecodedLogDataModel struct { - Name string `json:"name"` - Signature string `json:"signature"` - IndexedParams map[string]interface{} `json:"indexed_params" swaggertype:"object"` - NonIndexedParams map[string]interface{} `json:"non_indexed_params" swaggertype:"object"` -} - -type DecodedLogModel struct { - LogModel - Decoded DecodedLogDataModel `json:"decoded"` -} - type RawLogs = []map[string]interface{} type RawReceipts = []RawReceipt type RawReceipt = map[string]interface{} - -type DecodedLogData struct { - Name string `json:"name"` - Signature string `json:"signature"` - IndexedParams map[string]interface{} `json:"indexed_params"` - NonIndexedParams map[string]interface{} `json:"non_indexed_params"` -} - -type DecodedLog struct { - Log - Decoded DecodedLogData `json:"decoded"` -} - -func DecodeLogs(chainId string, logs []Log) []*DecodedLog { - decodedLogs := make([]*DecodedLog, len(logs)) - abiCache := &sync.Map{} - - decodeLogFunc := func(eventLog *Log) *DecodedLog { - decodedLog := DecodedLog{Log: *eventLog} - abi := GetABIForContractWithCache(chainId, eventLog.Address, abiCache) - if abi == nil { - return &decodedLog - } - - event, err := abi.EventByID(gethCommon.HexToHash(eventLog.Topic0)) - if err != nil { - log.Debug().Msgf("failed to get method by id: %v", err) - return &decodedLog - } - if event == nil { - return &decodedLog - } - return eventLog.Decode(event) - } - - var wg sync.WaitGroup - for idx, eventLog := range logs { - wg.Add(1) - go func(idx int, eventLog Log) { - defer func() { - if err := recover(); err != nil { - log.Error(). - Any("chainId", chainId). - Str("txHash", eventLog.TransactionHash). - Uint64("logIndex", eventLog.LogIndex). - Str("logAddress", eventLog.Address). - Str("logTopic0", eventLog.Topic0). - Err(fmt.Errorf("%v", err)). - Msg("Caught panic in DecodeLogs, possibly in decodeLogFunc") - decodedLogs[idx] = &DecodedLog{Log: eventLog} - } - }() - defer wg.Done() - decodedLog := decodeLogFunc(&eventLog) - decodedLogs[idx] = decodedLog - }(idx, eventLog) - } - wg.Wait() - return decodedLogs -} - -func (l *Log) Decode(eventABI *abi.Event) *DecodedLog { - decodedIndexed := make(map[string]interface{}) - indexedArgs := abi.Arguments{} - for _, arg := range eventABI.Inputs { - if arg.Indexed { - indexedArgs = append(indexedArgs, arg) - } - } - // Decode indexed parameters - for i, arg := range indexedArgs { - topic, err := l.GetTopic(i + 1) - if err != nil { - log.Warn().Msgf("missing topic for indexed parameter: %s, signature: %s", arg.Name, eventABI.Sig) - return &DecodedLog{Log: *l} - } - decodedValue, err := decodeIndexedArgument(arg.Type, topic) - if err != nil { - log.Warn().Msgf("failed to decode indexed parameter %s: %v, signature: %s", arg.Name, err, eventABI.Sig) - return &DecodedLog{Log: *l} - } - decodedIndexed[arg.Name] = decodedValue - } - - // Decode non-indexed parameters - decodedNonIndexed := make(map[string]interface{}) - dataBytes := gethCommon.Hex2Bytes(l.Data[2:]) - err := eventABI.Inputs.UnpackIntoMap(decodedNonIndexed, dataBytes) - if err != nil { - log.Warn().Msgf("failed to decode non-indexed parameters: %v, signature: %s", err, eventABI.Sig) - return &DecodedLog{Log: *l} - } - - return &DecodedLog{ - Log: *l, - Decoded: DecodedLogData{ - Name: eventABI.Name, - Signature: eventABI.Sig, - IndexedParams: decodedIndexed, - NonIndexedParams: convertBytesAndNumericToHex(decodedNonIndexed).(map[string]interface{}), - }, - } -} - -func decodeIndexedArgument(argType abi.Type, topic string) (interface{}, error) { - if len(topic) < 3 { - return nil, fmt.Errorf("invalid topic %s", topic) - } - topicBytes := gethCommon.Hex2Bytes(topic[2:]) // Remove "0x" prefix - switch argType.T { - case abi.AddressTy: - return gethCommon.BytesToAddress(topicBytes), nil - case abi.UintTy, abi.IntTy: - return new(big.Int).SetBytes(topicBytes), nil - case abi.BoolTy: - return topicBytes[0] != 0, nil - case abi.BytesTy, abi.FixedBytesTy, abi.StringTy: - return "0x" + gethCommon.Bytes2Hex(topicBytes), nil - case abi.HashTy: - if len(topicBytes) != 32 { - return nil, fmt.Errorf("invalid hash length: expected 32, got %d", len(topicBytes)) - } - return gethCommon.BytesToHash(topicBytes), nil - case abi.FixedPointTy: - bi := new(big.Int).SetBytes(topicBytes) - bf := new(big.Float).SetInt(bi) - return bf, nil - default: - return topic, nil - } -} - -func convertBytesAndNumericToHex(data interface{}) interface{} { - switch v := data.(type) { - case map[string]interface{}: - for key, value := range v { - v[key] = convertBytesAndNumericToHex(value) - } - return v - case []interface{}: - for i, value := range v { - v[i] = convertBytesAndNumericToHex(value) - } - return v - case []byte: - return fmt.Sprintf("0x%s", hex.EncodeToString(v)) - case []uint: - hexStrings := make([]string, len(v)) - for i, num := range v { - hexStrings[i] = fmt.Sprintf("0x%x", num) - } - return hexStrings - case [32]uint8: - return fmt.Sprintf("0x%s", hex.EncodeToString(v[:])) - case [64]uint8: - return fmt.Sprintf("0x%s", hex.EncodeToString(v[:])) - case [128]uint8: - return fmt.Sprintf("0x%s", hex.EncodeToString(v[:])) - case [256]uint8: - return fmt.Sprintf("0x%s", hex.EncodeToString(v[:])) - default: - return v - } -} - -func (l *Log) Serialize() LogModel { - allTopics := []string{l.Topic0, l.Topic1, l.Topic2, l.Topic3} - topics := make([]string, 0, len(allTopics)) - for _, topic := range allTopics { - if topic != "" { - topics = append(topics, topic) - } - } - return LogModel{ - ChainId: l.ChainId.String(), - BlockNumber: l.BlockNumber.Uint64(), - BlockHash: l.BlockHash, - BlockTimestamp: uint64(l.BlockTimestamp.Unix()), - TransactionHash: l.TransactionHash, - TransactionIndex: l.TransactionIndex, - LogIndex: l.LogIndex, - Address: l.Address, - Data: l.Data, - Topics: topics, - } -} - -func (l *DecodedLog) Serialize() DecodedLogModel { - // Convert big numbers to strings in both indexed and non-indexed parameters - indexedParams := ConvertBigNumbersToString(l.Decoded.IndexedParams).(map[string]interface{}) - nonIndexedParams := ConvertBigNumbersToString(l.Decoded.NonIndexedParams).(map[string]interface{}) - - return DecodedLogModel{ - LogModel: l.Log.Serialize(), - Decoded: DecodedLogDataModel{ - Name: l.Decoded.Name, - Signature: l.Decoded.Signature, - IndexedParams: indexedParams, - NonIndexedParams: nonIndexedParams, - }, - } -} diff --git a/internal/common/log_test.go b/internal/common/log_test.go deleted file mode 100644 index 7738f87c..00000000 --- a/internal/common/log_test.go +++ /dev/null @@ -1,43 +0,0 @@ -package common - -import ( - "math/big" - "testing" - - gethCommon "github.com/ethereum/go-ethereum/common" - "github.com/stretchr/testify/assert" -) - -func TestDecodeLog(t *testing.T) { - event := Log{ - Data: "0x000000000000000000000000000000000000000000000000b2da0f6658944b0600000000000000000000000000000000000000000000000000000000000000003492dc030870ae719a0babc07807601edd3fc7e150a6b4878d1c5571bd9995c00000000000000000000000000000000000000000000000e076c8d70085af000000000000000000000000000000000000000000000000000000469c6478f693140000000000000000000000000000000000000000000000000000000000000000", - Topic0: "0x7be266734f0c132a415c32a35b76cbf3d8a02fa3d88628b286dcf713f53f1e2d", - Topic1: "0xc148159472ef0bbd3a304d3d3637b8deeda456572700669fda4f8d0fad814402", - Topic2: "0x000000000000000000000000ff0cb0351a356ad16987e5809a8daaaf34f5adbe", - } - - eventABI, err := ConstructEventABI("LogCanonicalOrderFilled(bytes32 indexed orderHash,address indexed orderMaker,uint256 fillAmount,uint256 triggerPrice,bytes32 orderFlags,(uint256 price,uint128 fee,bool isNegativeFee) fill)") - assert.NoError(t, err) - decodedEvent := event.Decode(eventABI) - - assert.Equal(t, "LogCanonicalOrderFilled", decodedEvent.Decoded.Name) - assert.Equal(t, "0xc148159472ef0bbd3a304d3d3637b8deeda456572700669fda4f8d0fad814402", decodedEvent.Decoded.IndexedParams["orderHash"]) - assert.Equal(t, gethCommon.HexToAddress("0xff0cb0351a356ad16987e5809a8daaaf34f5adbe"), decodedEvent.Decoded.IndexedParams["orderMaker"]) - - expectedFillAmountValue := big.NewInt(0) - expectedFillAmountValue.SetString("12887630215921289990", 10) - assert.Equal(t, expectedFillAmountValue, decodedEvent.Decoded.NonIndexedParams["fillAmount"]) - assert.Equal(t, "0x3492dc030870ae719a0babc07807601edd3fc7e150a6b4878d1c5571bd9995c0", decodedEvent.Decoded.NonIndexedParams["orderFlags"]) - expectedTriggerPriceValue := big.NewInt(0) - assert.Equal(t, expectedTriggerPriceValue.String(), decodedEvent.Decoded.NonIndexedParams["triggerPrice"].(*big.Int).String()) - - fillTuple := decodedEvent.Decoded.NonIndexedParams["fill"].(struct { - Price *big.Int `json:"price"` - Fee *big.Int `json:"fee"` - IsNegativeFee bool `json:"isNegativeFee"` - }) - - assert.Equal(t, "4140630000000000000000", fillTuple.Price.String()) - assert.Equal(t, "19875203709834004", fillTuple.Fee.String()) - assert.Equal(t, false, fillTuple.IsNegativeFee) -} diff --git a/internal/common/set.go b/internal/common/set.go deleted file mode 100644 index d9759826..00000000 --- a/internal/common/set.go +++ /dev/null @@ -1,42 +0,0 @@ -package common - -type Set[T comparable] struct { - elements map[T]struct{} -} - -// NewSet creates a new set -func NewSet[T comparable]() *Set[T] { - return &Set[T]{ - elements: make(map[T]struct{}), - } -} - -// Add inserts an element into the set -func (s *Set[T]) Add(value T) { - s.elements[value] = struct{}{} -} - -// Remove deletes an element from the set -func (s *Set[T]) Remove(value T) { - delete(s.elements, value) -} - -// Contains checks if an element is in the set -func (s *Set[T]) Contains(value T) bool { - _, found := s.elements[value] - return found -} - -// Size returns the number of elements in the set -func (s *Set[T]) Size() int { - return len(s.elements) -} - -// List returns all elements in the set as a slice -func (s *Set[T]) List() []T { - keys := make([]T, 0, len(s.elements)) - for key := range s.elements { - keys = append(keys, key) - } - return keys -} diff --git a/internal/common/trace.go b/internal/common/trace.go index 86791c91..b5a925e0 100644 --- a/internal/common/trace.go +++ b/internal/common/trace.go @@ -32,52 +32,3 @@ type Trace struct { } type RawTraces = []map[string]interface{} - -type TraceModel struct { - ChainId string `json:"chain_id"` - BlockNumber uint64 `json:"block_number"` - BlockHash string `json:"block_hash"` - BlockTimestamp uint64 `json:"block_timestamp"` - TransactionHash string `json:"transaction_hash"` - TransactionIndex uint64 `json:"transaction_index"` - Subtraces int64 `json:"subtraces"` - TraceAddress []int64 `json:"trace_address"` - TraceType string `json:"trace_type"` - CallType string `json:"call_type"` - Error string `json:"error"` - FromAddress string `json:"from_address"` - ToAddress string `json:"to_address"` - Gas uint64 `json:"gas"` - GasUsed uint64 `json:"gas_used"` - Input string `json:"input"` - Output string `json:"output"` - Value uint64 `json:"value"` - Author string `json:"author"` - RewardType string `json:"reward_type"` - RefundAddress string `json:"refund_address"` -} - -func (t *Trace) Serialize() TraceModel { - return TraceModel{ - ChainId: t.ChainID.String(), - BlockNumber: t.BlockNumber.Uint64(), - BlockHash: t.BlockHash, - TransactionHash: t.TransactionHash, - TransactionIndex: t.TransactionIndex, - Subtraces: t.Subtraces, - TraceAddress: t.TraceAddress, - TraceType: t.TraceType, - CallType: t.CallType, - Error: t.Error, - FromAddress: t.FromAddress, - ToAddress: t.ToAddress, - Gas: t.Gas, - GasUsed: t.GasUsed, - Input: t.Input, - Output: t.Output, - Value: t.Value.Uint64(), - Author: t.Author, - RewardType: t.RewardType, - RefundAddress: t.RefundAddress, - } -} diff --git a/internal/common/transaction.go b/internal/common/transaction.go index 011a9acd..f9564283 100644 --- a/internal/common/transaction.go +++ b/internal/common/transaction.go @@ -1,14 +1,8 @@ package common import ( - "encoding/hex" "math/big" - "strings" - "sync" "time" - - "github.com/ethereum/go-ethereum/accounts/abi" - "github.com/rs/zerolog/log" ) type RawTransaction = map[string]interface{} @@ -49,199 +43,3 @@ type Transaction struct { Sign int8 `json:"sign" ch:"sign"` InsertTimestamp time.Time `json:"insert_timestamp" ch:"insert_timestamp"` } - -type DecodedTransactionData struct { - Name string `json:"name"` - Signature string `json:"signature"` - Inputs map[string]interface{} `json:"inputs"` -} - -type DecodedTransaction struct { - Transaction - Decoded DecodedTransactionData `json:"decoded"` -} - -// TransactionModel represents a simplified Transaction structure for Swagger documentation -type TransactionModel struct { - ChainId string `json:"chain_id"` - Hash string `json:"hash"` - Nonce uint64 `json:"nonce"` - BlockHash string `json:"block_hash"` - BlockNumber uint64 `json:"block_number"` - BlockTimestamp uint64 `json:"block_timestamp"` - TransactionIndex uint64 `json:"transaction_index"` - FromAddress string `json:"from_address"` - ToAddress string `json:"to_address"` - Value string `json:"value"` - Gas uint64 `json:"gas"` - GasPrice string `json:"gas_price"` - Data string `json:"data"` - FunctionSelector string `json:"function_selector"` - MaxFeePerGas string `json:"max_fee_per_gas"` - MaxPriorityFeePerGas string `json:"max_priority_fee_per_gas"` - MaxFeePerBlobGas *string `json:"max_fee_per_blob_gas,omitempty"` - BlobVersionedHashes []string `json:"blob_versioned_hashes,omitempty"` - TransactionType uint8 `json:"transaction_type"` - R string `json:"r"` - S string `json:"s"` - V string `json:"v"` - AccessListJson *string `json:"access_list_json"` - AuthorizationListJson *string `json:"authorization_list_json"` - ContractAddress *string `json:"contract_address"` - GasUsed *uint64 `json:"gas_used"` - CumulativeGasUsed *uint64 `json:"cumulative_gas_used"` - EffectiveGasPrice *string `json:"effective_gas_price"` - BlobGasUsed *uint64 `json:"blob_gas_used"` - BlobGasPrice *string `json:"blob_gas_price"` - LogsBloom *string `json:"logs_bloom"` - Status *uint64 `json:"status"` -} - -type DecodedTransactionDataModel struct { - Name string `json:"name"` - Signature string `json:"signature"` - Inputs map[string]interface{} `json:"inputs"` -} - -type DecodedTransactionModel struct { - TransactionModel - Decoded DecodedTransactionDataModel `json:"decoded"` -} - -func DecodeTransactions(chainId string, txs []Transaction) []*DecodedTransaction { - decodedTxs := make([]*DecodedTransaction, len(txs)) - abiCache := &sync.Map{} - decodeTxFunc := func(transaction *Transaction) *DecodedTransaction { - decodedTransaction := DecodedTransaction{Transaction: *transaction} - abi := GetABIForContractWithCache(chainId, transaction.ToAddress, abiCache) - if abi == nil { - return &decodedTransaction - } - - decodedData, err := hex.DecodeString(strings.TrimPrefix(transaction.Data, "0x")) - if err != nil { - return &decodedTransaction - } - - if len(decodedData) < 4 { - return &decodedTransaction - } - methodID := decodedData[:4] - method, err := abi.MethodById(methodID) - if err != nil { - log.Debug().Msgf("failed to get method by id: %v", err) - return &decodedTransaction - } - if method == nil { - return &decodedTransaction - } - return transaction.Decode(method) - } - - var wg sync.WaitGroup - for idx, transaction := range txs { - wg.Add(1) - go func(idx int, transaction Transaction) { - defer wg.Done() - decodedTx := decodeTxFunc(&transaction) - decodedTxs[idx] = decodedTx - }(idx, transaction) - } - wg.Wait() - return decodedTxs -} - -func (t *Transaction) Decode(functionABI *abi.Method) *DecodedTransaction { - decodedData, err := hex.DecodeString(strings.TrimPrefix(t.Data, "0x")) - if err != nil { - log.Debug().Msgf("failed to decode transaction data: %v", err) - return &DecodedTransaction{Transaction: *t} - } - - if len(decodedData) < 4 { - log.Debug().Msg("Data too short to contain function selector") - return &DecodedTransaction{Transaction: *t} - } - inputData := decodedData[4:] - decodedInputs := make(map[string]interface{}) - err = functionABI.Inputs.UnpackIntoMap(decodedInputs, inputData) - if err != nil { - log.Warn().Msgf("failed to decode function parameters: %v, signature: %s", err, functionABI.Sig) - } - return &DecodedTransaction{ - Transaction: *t, - Decoded: DecodedTransactionData{ - Name: functionABI.RawName, - Signature: functionABI.Sig, - Inputs: decodedInputs, - }} -} - -func (t *Transaction) Serialize() TransactionModel { - return TransactionModel{ - ChainId: t.ChainId.String(), - Hash: t.Hash, - Nonce: t.Nonce, - BlockHash: t.BlockHash, - BlockNumber: t.BlockNumber.Uint64(), - BlockTimestamp: uint64(t.BlockTimestamp.Unix()), - TransactionIndex: t.TransactionIndex, - FromAddress: t.FromAddress, - ToAddress: t.ToAddress, - Value: t.Value.String(), - Gas: t.Gas, - GasPrice: t.GasPrice.String(), - Data: t.Data, - FunctionSelector: t.FunctionSelector, - MaxFeePerGas: t.MaxFeePerGas.String(), - MaxPriorityFeePerGas: t.MaxPriorityFeePerGas.String(), - MaxFeePerBlobGas: func() *string { - if t.MaxFeePerBlobGas == nil { - return nil - } - v := t.MaxFeePerBlobGas.String() - return &v - }(), - BlobVersionedHashes: t.BlobVersionedHashes, - TransactionType: t.TransactionType, - R: t.R.String(), - S: t.S.String(), - V: t.V.String(), - AccessListJson: t.AccessListJson, - AuthorizationListJson: t.AuthorizationListJson, - ContractAddress: t.ContractAddress, - GasUsed: t.GasUsed, - CumulativeGasUsed: t.CumulativeGasUsed, - EffectiveGasPrice: func() *string { - if t.EffectiveGasPrice == nil { - return nil - } - v := t.EffectiveGasPrice.String() - return &v - }(), - BlobGasUsed: t.BlobGasUsed, - BlobGasPrice: func() *string { - if t.BlobGasPrice == nil { - return nil - } - v := t.BlobGasPrice.String() - return &v - }(), - LogsBloom: t.LogsBloom, - Status: t.Status, - } -} - -func (t *DecodedTransaction) Serialize() DecodedTransactionModel { - // Convert big numbers to strings in the decoded inputs - decodedInputs := ConvertBigNumbersToString(t.Decoded.Inputs).(map[string]interface{}) - - return DecodedTransactionModel{ - TransactionModel: t.Transaction.Serialize(), - Decoded: DecodedTransactionDataModel{ - Name: t.Decoded.Name, - Signature: t.Decoded.Signature, - Inputs: decodedInputs, - }, - } -} diff --git a/internal/common/transaction_test.go b/internal/common/transaction_test.go deleted file mode 100644 index a4e38b3d..00000000 --- a/internal/common/transaction_test.go +++ /dev/null @@ -1,49 +0,0 @@ -package common - -import ( - "math/big" - "testing" - - gethCommon "github.com/ethereum/go-ethereum/common" - "github.com/stretchr/testify/assert" -) - -func TestDecodeTransaction(t *testing.T) { - transaction := Transaction{ - Data: "0x095ea7b3000000000000000000000000971add32ea87f10bd192671630be3be8a11b862300000000000000000000000000000000000000000000010df58ac64e49b91ea0", - } - - abi, err := ConstructFunctionABI("approve(address _spender, uint256 _value)") - assert.NoError(t, err) - decodedTransaction := transaction.Decode(abi) - - assert.Equal(t, "approve", decodedTransaction.Decoded.Name) - assert.Equal(t, gethCommon.HexToAddress("0x971add32Ea87f10bD192671630be3BE8A11b8623"), decodedTransaction.Decoded.Inputs["_spender"]) - expectedValue := big.NewInt(0) - expectedValue.SetString("4979867327953494417056", 10) - assert.Equal(t, expectedValue, decodedTransaction.Decoded.Inputs["_value"]) - - transaction2 := Transaction{ - Data: "0x27c777a9000000000000000000000000000000000000000000000000000000000000002000000000000000000000000000000000000000000000000000000000000000c0000000000000000000000000000000000000000000000000000000000000007b00000000000000000000000000000000000000000000000000000000672c0c60302aafae8a36ffd8c12b32f1000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000038d7ea4c680000000000000000000000000000734d56da60852a03e2aafae8a36ffd8c12b32f10000000000000000000000000000000000000000000000000000000000000000", - } - abi2, err := ConstructFunctionABI("allocatedWithdrawal((bytes,uint256,uint256,uint256,uint256,address) _withdrawal)") - assert.NoError(t, err) - decodedTransaction2 := transaction2.Decode(abi2) - - assert.Equal(t, "allocatedWithdrawal", decodedTransaction2.Decoded.Name) - withdrawal := decodedTransaction2.Decoded.Inputs["_withdrawal"].(struct { - Field0 []uint8 `json:"field0"` - Field1 *big.Int `json:"field1"` - Field2 *big.Int `json:"field2"` - Field3 *big.Int `json:"field3"` - Field4 *big.Int `json:"field4"` - Field5 gethCommon.Address `json:"field5"` - }) - - assert.Equal(t, []uint8{}, withdrawal.Field0) - assert.Equal(t, "123", withdrawal.Field1.String()) - assert.Equal(t, "1730940000", withdrawal.Field2.String()) - assert.Equal(t, "21786436819914608908212656341824591317420268878283544900672692017070052737024", withdrawal.Field3.String()) - assert.Equal(t, "1000000000000000", withdrawal.Field4.String()) - assert.Equal(t, "0x0734d56DA60852A03e2Aafae8a36FFd8c12B32f1", withdrawal.Field5.Hex()) -} diff --git a/internal/common/transfers.go b/internal/common/transfers.go deleted file mode 100644 index 931ee1c7..00000000 --- a/internal/common/transfers.go +++ /dev/null @@ -1,22 +0,0 @@ -package common - -import ( - "math/big" - "time" -) - -type TokenTransfer struct { - TokenType string `json:"token_type" ch:"token_type"` - ChainID *big.Int `json:"chain_id" ch:"chain_id"` - TokenAddress string `json:"token_address" ch:"token_address"` - FromAddress string `json:"from_address" ch:"from_address"` - ToAddress string `json:"to_address" ch:"to_address"` - BlockNumber *big.Int `json:"block_number" ch:"block_number"` - BlockTimestamp time.Time `json:"block_timestamp" ch:"block_timestamp"` - TransactionHash string `json:"transaction_hash" ch:"transaction_hash"` - TokenID *big.Int `json:"token_id" ch:"token_id"` - Amount *big.Int `json:"amount" ch:"amount"` - LogIndex uint64 `json:"log_index" ch:"log_index"` - Sign int8 `json:"sign" ch:"sign"` - InsertTimestamp time.Time `json:"insert_timestamp" ch:"insert_timestamp"` -} diff --git a/internal/handlers/blocks_handlers.go b/internal/handlers/blocks_handlers.go deleted file mode 100644 index 0ae8bcc7..00000000 --- a/internal/handlers/blocks_handlers.go +++ /dev/null @@ -1,123 +0,0 @@ -package handlers - -import ( - "github.com/gin-gonic/gin" - "github.com/rs/zerolog/log" - "github.com/thirdweb-dev/indexer/api" - "github.com/thirdweb-dev/indexer/internal/common" - "github.com/thirdweb-dev/indexer/internal/storage" -) - -// @Summary Get all blocks -// @Description Retrieve all blocks -// @Tags blocks -// @Accept json -// @Produce json -// @Security BasicAuth -// @Param chainId path string true "Chain ID" -// @Param filter query string false "Filter parameters" -// @Param group_by query string false "Field to group results by" -// @Param sort_by query string false "Field to sort results by" -// @Param sort_order query string false "Sort order (asc or desc)" -// @Param page query int false "Page number for pagination" -// @Param limit query int false "Number of items per page" default(5) -// @Param aggregate query []string false "List of aggregate functions to apply" -// @Param force_consistent_data query bool false "Force consistent data at the expense of query speed" -// @Success 200 {object} api.QueryResponse{data=[]common.BlockModel} -// @Failure 400 {object} api.Error -// @Failure 401 {object} api.Error -// @Failure 500 {object} api.Error -// @Router /{chainId}/blocks [get] -func GetBlocks(c *gin.Context) { - handleBlocksRequest(c) -} - -func handleBlocksRequest(c *gin.Context) { - chainId, err := api.GetChainId(c) - if err != nil { - api.BadRequestErrorHandler(c, err) - return - } - - queryParams, err := api.ParseQueryParams(c.Request) - if err != nil { - api.BadRequestErrorHandler(c, err) - return - } - - // Validate GroupBy and SortBy fields - if err := api.ValidateGroupByAndSortBy("blocks", queryParams.GroupBy, queryParams.SortBy, queryParams.Aggregates); err != nil { - api.BadRequestErrorHandler(c, err) - return - } - - mainStorage, err := getMainStorage() - if err != nil { - log.Error().Err(err).Msg("Error getting main storage") - api.InternalErrorHandler(c) - return - } - - // Prepare the QueryFilter - qf := storage.QueryFilter{ - FilterParams: queryParams.FilterParams, - ChainId: chainId, - SortBy: queryParams.SortBy, - SortOrder: queryParams.SortOrder, - Page: queryParams.Page, - Limit: queryParams.Limit, - ForceConsistentData: queryParams.ForceConsistentData, - } - - // Initialize the QueryResult - queryResult := api.QueryResponse{ - Meta: api.Meta{ - ChainId: chainId.Uint64(), - Page: queryParams.Page, - Limit: queryParams.Limit, - TotalItems: 0, - TotalPages: 0, // TODO: Implement total pages count - }, - Data: nil, - Aggregations: nil, - } - - // If aggregates or groupings are specified, retrieve them - if len(queryParams.Aggregates) > 0 || len(queryParams.GroupBy) > 0 { - qf.Aggregates = queryParams.Aggregates - qf.GroupBy = queryParams.GroupBy - - aggregatesResult, err := mainStorage.GetAggregations("blocks", qf) - if err != nil { - log.Error().Err(err).Msg("Error querying aggregates") - // TODO: might want to choose BadRequestError if it's due to not-allowed functions - api.InternalErrorHandler(c) - return - } - queryResult.Aggregations = &aggregatesResult.Aggregates - queryResult.Meta.TotalItems = len(aggregatesResult.Aggregates) - } else { - // Retrieve blocks data - blocksResult, err := mainStorage.GetBlocks(qf) - if err != nil { - log.Error().Err(err).Msg("Error querying blocks") - // TODO: might want to choose BadRequestError if it's due to not-allowed functions - api.InternalErrorHandler(c) - return - } - - var data interface{} = serializeBlocks(blocksResult.Data) - queryResult.Data = &data - queryResult.Meta.TotalItems = len(blocksResult.Data) - } - - sendJSONResponse(c, queryResult) -} - -func serializeBlocks(blocks []common.Block) []common.BlockModel { - blockModels := make([]common.BlockModel, len(blocks)) - for i, block := range blocks { - blockModels[i] = block.Serialize() - } - return blockModels -} diff --git a/internal/handlers/logs_handlers.go b/internal/handlers/logs_handlers.go deleted file mode 100644 index 63ac1975..00000000 --- a/internal/handlers/logs_handlers.go +++ /dev/null @@ -1,255 +0,0 @@ -package handlers - -import ( - "net/http" - "sync" - - "github.com/ethereum/go-ethereum/accounts/abi" - "github.com/gin-gonic/gin" - "github.com/rs/zerolog/log" - "github.com/thirdweb-dev/indexer/api" - config "github.com/thirdweb-dev/indexer/configs" - "github.com/thirdweb-dev/indexer/internal/common" - "github.com/thirdweb-dev/indexer/internal/storage" -) - -// package-level variables -var ( - mainStorage storage.IMainStorage - storageOnce sync.Once - storageErr error -) - -// @Summary Get all logs -// @Description Retrieve all logs across all contracts -// @Tags events -// @Accept json -// @Produce json -// @Security BasicAuth -// @Param chainId path string true "Chain ID" -// @Param filter query string false "Filter parameters" -// @Param group_by query string false "Field to group results by" -// @Param sort_by query string false "Field to sort results by" -// @Param sort_order query string false "Sort order (asc or desc)" -// @Param page query int false "Page number for pagination" -// @Param limit query int false "Number of items per page" default(5) -// @Param aggregate query []string false "List of aggregate functions to apply" -// @Param force_consistent_data query bool false "Force consistent data at the expense of query speed" -// @Success 200 {object} api.QueryResponse{data=[]common.LogModel} -// @Failure 400 {object} api.Error -// @Failure 401 {object} api.Error -// @Failure 500 {object} api.Error -// @Router /{chainId}/events [get] -func GetLogs(c *gin.Context) { - handleLogsRequest(c) -} - -// @Summary Get logs by contract -// @Description Retrieve logs for a specific contract -// @Tags events -// @Accept json -// @Produce json -// @Security BasicAuth -// @Param chainId path string true "Chain ID" -// @Param contract path string true "Contract address" -// @Param filter query string false "Filter parameters" -// @Param group_by query string false "Field to group results by" -// @Param sort_by query string false "Field to sort results by" -// @Param sort_order query string false "Sort order (asc or desc)" -// @Param page query int false "Page number for pagination" -// @Param limit query int false "Number of items per page" default(5) -// @Param aggregate query []string false "List of aggregate functions to apply" -// @Param force_consistent_data query bool false "Force consistent data at the expense of query speed" -// @Success 200 {object} api.QueryResponse{data=[]common.LogModel} -// @Failure 400 {object} api.Error -// @Failure 401 {object} api.Error -// @Failure 500 {object} api.Error -// @Router /{chainId}/events/{contract} [get] -func GetLogsByContract(c *gin.Context) { - handleLogsRequest(c) -} - -// @Summary Get logs by contract and event signature -// @Description Retrieve logs for a specific contract and event signature. When a valid event signature is provided, the response includes decoded log data with both indexed and non-indexed parameters. -// @Tags events -// @Accept json -// @Produce json -// @Security BasicAuth -// @Param chainId path string true "Chain ID" -// @Param contract path string true "Contract address" -// @Param signature path string true "Event signature (e.g., 'Transfer(address,address,uint256)')" -// @Param filter query string false "Filter parameters" -// @Param group_by query string false "Field to group results by" -// @Param sort_by query string false "Field to sort results by" -// @Param sort_order query string false "Sort order (asc or desc)" -// @Param page query int false "Page number for pagination" -// @Param limit query int false "Number of items per page" default(5) -// @Param aggregate query []string false "List of aggregate functions to apply" -// @Param force_consistent_data query bool false "Force consistent data at the expense of query speed" -// @Success 200 {object} api.QueryResponse{data=[]common.DecodedLogModel} -// @Failure 400 {object} api.Error -// @Failure 401 {object} api.Error -// @Failure 500 {object} api.Error -// @Router /{chainId}/events/{contract}/{signature} [get] -func GetLogsByContractAndSignature(c *gin.Context) { - handleLogsRequest(c) -} - -func handleLogsRequest(c *gin.Context) { - chainId, err := api.GetChainId(c) - if err != nil { - api.BadRequestErrorHandler(c, err) - return - } - - contractAddress := c.Param("contract") - signature := c.Param("signature") - - queryParams, err := api.ParseQueryParams(c.Request) - if err != nil { - api.BadRequestErrorHandler(c, err) - return - } - - // Validate GroupBy and SortBy fields - if err := api.ValidateGroupByAndSortBy("logs", queryParams.GroupBy, queryParams.SortBy, queryParams.Aggregates); err != nil { - api.BadRequestErrorHandler(c, err) - return - } - - var eventABI *abi.Event - signatureHash := "" - if signature != "" { - eventABI, err = common.ConstructEventABI(signature) - if err != nil { - log.Debug().Err(err).Msgf("Unable to construct event ABI for %s", signature) - } - signatureHash = eventABI.ID.Hex() - } - - mainStorage, err := getMainStorage() - if err != nil { - log.Error().Err(err).Msg("Error getting main storage") - api.InternalErrorHandler(c) - return - } - - // Prepare the QueryFilter - qf := storage.QueryFilter{ - FilterParams: queryParams.FilterParams, - ContractAddress: contractAddress, - Signature: signatureHash, - ChainId: chainId, - SortBy: queryParams.SortBy, - SortOrder: queryParams.SortOrder, - Page: queryParams.Page, - Limit: queryParams.Limit, - ForceConsistentData: queryParams.ForceConsistentData, - } - - // Initialize the QueryResult - queryResult := api.QueryResponse{ - Meta: api.Meta{ - ChainId: chainId.Uint64(), - ContractAddress: contractAddress, - Signature: signatureHash, - Page: queryParams.Page, - Limit: queryParams.Limit, - TotalItems: 0, - TotalPages: 0, // TODO: Implement total pages count - }, - Data: nil, - Aggregations: nil, - } - - // If aggregates or groupings are specified, retrieve them - if len(queryParams.Aggregates) > 0 || len(queryParams.GroupBy) > 0 { - qf.Aggregates = queryParams.Aggregates - qf.GroupBy = queryParams.GroupBy - - aggregatesResult, err := mainStorage.GetAggregations("logs", qf) - if err != nil { - log.Error().Err(err).Msg("Error querying aggregates") - // TODO: might want to choose BadRequestError if it's due to not-allowed functions - api.InternalErrorHandler(c) - return - } - queryResult.Aggregations = &aggregatesResult.Aggregates - queryResult.Meta.TotalItems = len(aggregatesResult.Aggregates) - } else { - // Retrieve logs data - logsResult, err := mainStorage.GetLogs(qf) - if err != nil { - log.Error().Err(err).Msg("Error querying logs") - // TODO: might want to choose BadRequestError if it's due to not-allowed functions - api.InternalErrorHandler(c) - return - } - - var data interface{} - if decodedLogs := decodeLogsIfNeeded(chainId.String(), logsResult.Data, eventABI, config.Cfg.API.AbiDecodingEnabled && queryParams.Decode); decodedLogs != nil { - data = serializeDecodedLogs(decodedLogs) - } else { - data = serializeLogs(logsResult.Data) - } - queryResult.Data = &data - queryResult.Meta.TotalItems = len(logsResult.Data) - } - - sendJSONResponse(c, queryResult) -} - -func decodeLogsIfNeeded(chainId string, logs []common.Log, eventABI *abi.Event, useContractService bool) []*common.DecodedLog { - if eventABI != nil { - decodingCompletelySuccessful := true - decodedLogs := []*common.DecodedLog{} - for _, log := range logs { - decodedLog := log.Decode(eventABI) - if decodedLog.Decoded.Name == "" || decodedLog.Decoded.Signature == "" { - decodingCompletelySuccessful = false - } - decodedLogs = append(decodedLogs, decodedLog) - } - if !useContractService || decodingCompletelySuccessful { - // decoding was successful or contract service decoding is disabled - return decodedLogs - } - } - if useContractService { - return common.DecodeLogs(chainId, logs) - } - return nil -} - -func getMainStorage() (storage.IMainStorage, error) { - storageOnce.Do(func() { - var err error - // TODO: move this to a QueryConnector later to decouple read/write connector - mainStorage, err = storage.NewMainConnector(&config.Cfg.Storage.Main, nil) - if err != nil { - storageErr = err - log.Error().Err(err).Msg("Error creating storage connector") - } - }) - return mainStorage, storageErr -} - -func sendJSONResponse(c *gin.Context, response interface{}) { - c.JSON(http.StatusOK, response) -} - -func serializeDecodedLogs(logs []*common.DecodedLog) []common.DecodedLogModel { - decodedLogModels := make([]common.DecodedLogModel, len(logs)) - for i, log := range logs { - decodedLogModels[i] = log.Serialize() - } - return decodedLogModels -} - -func serializeLogs(logs []common.Log) []common.LogModel { - logModels := make([]common.LogModel, len(logs)) - for i, log := range logs { - logModels[i] = log.Serialize() - } - return logModels -} diff --git a/internal/handlers/search_handlers.go b/internal/handlers/search_handlers.go deleted file mode 100644 index 89a33768..00000000 --- a/internal/handlers/search_handlers.go +++ /dev/null @@ -1,488 +0,0 @@ -package handlers - -import ( - "context" - "encoding/hex" - "fmt" - "math/big" - "strconv" - "strings" - "sync" - "time" - - "github.com/gin-gonic/gin" - "github.com/rs/zerolog/log" - "github.com/thirdweb-dev/indexer/api" - config "github.com/thirdweb-dev/indexer/configs" - "github.com/thirdweb-dev/indexer/internal/common" - "github.com/thirdweb-dev/indexer/internal/rpc" - "github.com/thirdweb-dev/indexer/internal/storage" -) - -type SearchResultType string - -const ( - SearchResultTypeBlock SearchResultType = "block" - SearchResultTypeTransaction SearchResultType = "transaction" - SearchResultTypeEventSignature SearchResultType = "event_signature" - SearchResultTypeFunctionSignature SearchResultType = "function_signature" - SearchResultTypeAddress SearchResultType = "address" - SearchResultTypeContract SearchResultType = "contract" -) - -type SearchResultModel struct { - Blocks []common.BlockModel `json:"blocks,omitempty"` - Transactions []common.TransactionModel `json:"transactions,omitempty"` - Events []common.LogModel `json:"events,omitempty"` - Type SearchResultType `json:"type,omitempty"` -} - -type SearchInput struct { - BlockNumber *big.Int - Hash string - Address string - FunctionSignature string - ErrorMessage string -} - -// @Summary Search blockchain data -// @Description Search blocks, transactions and events -// @Tags search -// @Accept json -// @Produce json -// @Security BasicAuth -// @Param chainId path string true "Chain ID" -// @Param input path string true "Search input" -// @Success 200 {object} api.QueryResponse{data=SearchResultModel} -// @Failure 400 {object} api.Error -// @Failure 401 {object} api.Error -// @Failure 500 {object} api.Error -// @Router /search/:input [GET] -func Search(c *gin.Context) { - chainId, err := api.GetChainId(c) - if err != nil { - api.BadRequestErrorHandler(c, err) - return - } - searchInput := parseSearchInput(c.Param("input")) - if searchInput.ErrorMessage != "" { - api.BadRequestErrorHandler(c, fmt.Errorf(searchInput.ErrorMessage)) - return - } - - mainStorage, err := getMainStorage() - if err != nil { - log.Error().Err(err).Msg("Error getting main storage") - api.InternalErrorHandler(c) - return - } - - result, err := executeSearch(c.Request.Context(), mainStorage, chainId, searchInput) - if err != nil { - log.Error().Err(err).Msg("Error executing search") - api.InternalErrorHandler(c) - return - } - - var data interface{} = result - sendJSONResponse(c, api.QueryResponse{ - Meta: api.Meta{ - ChainId: chainId.Uint64(), - }, - Data: &data, - }) -} - -func parseSearchInput(searchInput string) SearchInput { - if searchInput == "" { - return SearchInput{ErrorMessage: "search input cannot be empty"} - } - - blockNumber, ok := new(big.Int).SetString(searchInput, 10) - if ok { - if blockNumber.Sign() == -1 { - return SearchInput{ErrorMessage: fmt.Sprintf("invalid block number '%s'", searchInput)} - } - return SearchInput{BlockNumber: blockNumber} - } - - if isValidHashWithLength(searchInput, 66) { - return SearchInput{Hash: searchInput} - } else if isValidHashWithLength(searchInput, 42) { - return SearchInput{Address: searchInput} - } else if isValidHashWithLength(searchInput, 10) { - return SearchInput{FunctionSignature: searchInput} - } - return SearchInput{ErrorMessage: fmt.Sprintf("invalid input '%s'", searchInput)} -} - -func isValidHashWithLength(input string, length int) bool { - if len(input) == length && strings.HasPrefix(input, "0x") { - _, err := hex.DecodeString(input[2:]) - if err == nil { - return true - } - } - return false -} - -func executeSearch(ctx context.Context, storage storage.IMainStorage, chainId *big.Int, input SearchInput) (SearchResultModel, error) { - switch { - case input.BlockNumber != nil: - block, err := searchByBlockNumber(storage, chainId, input.BlockNumber) - return SearchResultModel{Blocks: []common.BlockModel{*block}, Type: SearchResultTypeBlock}, err - - case input.Hash != "": - return searchByHash(storage, chainId, input.Hash) - - case input.Address != "": - return searchByAddress(ctx, storage, chainId, input.Address) - - case input.FunctionSignature != "": - transactions, err := searchByFunctionSelectorOptimistically(storage, chainId, input.FunctionSignature) - return SearchResultModel{Transactions: transactions, Type: SearchResultTypeFunctionSignature}, err - - default: - return SearchResultModel{}, nil - } -} - -func searchByBlockNumber(mainStorage storage.IMainStorage, chainId *big.Int, blockNumber *big.Int) (*common.BlockModel, error) { - result, err := mainStorage.GetBlocks(storage.QueryFilter{ - ChainId: chainId, - BlockNumbers: []*big.Int{blockNumber}, - Limit: 1, - }) - if err != nil { - return nil, err - } - blocks := result.Data - if len(blocks) == 0 { - return nil, nil - } - block := blocks[0].Serialize() - return &block, nil -} - -func searchByFunctionSelectorOptimistically(mainStorage storage.IMainStorage, chainId *big.Int, functionSelector string) ([]common.TransactionModel, error) { - now := time.Now() - thirtyDaysAgo := now.AddDate(0, 0, -30) - - result, err := mainStorage.GetTransactions(storage.QueryFilter{ - ChainId: chainId, - Signature: functionSelector, - FilterParams: map[string]string{ - "block_timestamp_gte": strconv.FormatInt(thirtyDaysAgo.Unix(), 10), - }, - SortBy: "block_number", - SortOrder: "desc", - Limit: 20, - }) - if err != nil { - return nil, err - } - if len(result.Data) == 0 { - result, err = mainStorage.GetTransactions(storage.QueryFilter{ - ChainId: chainId, - Signature: functionSelector, - FilterParams: map[string]string{ - "block_timestamp_lte": strconv.FormatInt(thirtyDaysAgo.Unix(), 10), - }, - SortBy: "block_number", - SortOrder: "desc", - Limit: 20, - }) - if err != nil { - return nil, err - } - } - - transactions := make([]common.TransactionModel, len(result.Data)) - for i, transaction := range result.Data { - transactions[i] = transaction.Serialize() - } - return transactions, nil -} - -func searchByHash(mainStorage storage.IMainStorage, chainId *big.Int, hash string) (SearchResultModel, error) { - var result SearchResultModel - var wg sync.WaitGroup - resultChan := make(chan SearchResultModel) - doneChan := make(chan struct{}) - errChan := make(chan error) - - wg.Add(3) - // Try as transaction hash past 5 days - go func() { - defer wg.Done() - txs, err := searchTransactionsByTimeRange(mainStorage, chainId, hash, 5, 0) - if err != nil { - errChan <- err - return - } - if len(txs) > 0 { - select { - case resultChan <- SearchResultModel{Transactions: []common.TransactionModel{txs[0]}, Type: SearchResultTypeTransaction}: - case <-doneChan: - } - } - }() - - // Try as transaction hash past 5-30 days - go func() { - defer wg.Done() - txs, err := searchTransactionsByTimeRange(mainStorage, chainId, hash, 30, 5) - if err != nil { - errChan <- err - return - } - if len(txs) > 0 { - select { - case resultChan <- SearchResultModel{Transactions: []common.TransactionModel{txs[0]}, Type: SearchResultTypeTransaction}: - case <-doneChan: - } - } - }() - - // Try as transaction hash more than 30 days ago - go func() { - defer wg.Done() - txs, err := searchTransactionsByTimeRange(mainStorage, chainId, hash, 0, 30) - if err != nil { - errChan <- err - return - } - if len(txs) > 0 { - select { - case resultChan <- SearchResultModel{Transactions: []common.TransactionModel{txs[0]}, Type: SearchResultTypeTransaction}: - case <-doneChan: - } - } - }() - - // Try as block hash - wg.Add(1) - go func() { - defer wg.Done() - blockResult, err := mainStorage.GetBlocks(storage.QueryFilter{ - ChainId: chainId, - FilterParams: map[string]string{ - "hash": hash, - }, - Limit: 1, - }) - if err != nil { - errChan <- err - return - } - if len(blockResult.Data) > 0 { - blockModel := blockResult.Data[0].Serialize() - select { - case resultChan <- SearchResultModel{Blocks: []common.BlockModel{blockModel}, Type: SearchResultTypeBlock}: - case <-doneChan: - } - } - }() - - // Try as topic_0 for logs - wg.Add(1) - go func() { - defer wg.Done() - logsResult, err := mainStorage.GetLogs(storage.QueryFilter{ - ChainId: chainId, - Signature: hash, - Limit: 20, - SortBy: "block_number", - SortOrder: "desc", - }) - if err != nil { - errChan <- err - return - } - if len(logsResult.Data) > 0 { - logs := make([]common.LogModel, len(logsResult.Data)) - for i, log := range logsResult.Data { - logs[i] = log.Serialize() - } - select { - case resultChan <- SearchResultModel{Events: logs, Type: SearchResultTypeEventSignature}: - case <-doneChan: - } - } - }() - - // Wait for first result or all goroutines to finish - go func() { - wg.Wait() - close(resultChan) - }() - - // Get first result or error - select { - case err := <-errChan: - close(doneChan) - return result, err - case res, ok := <-resultChan: - if !ok { - return result, nil // No results found - } - close(doneChan) - return res, nil - } -} - -func searchByAddress(ctx context.Context, mainStorage storage.IMainStorage, chainId *big.Int, address string) (SearchResultModel, error) { - searchResult := SearchResultModel{Type: SearchResultTypeAddress} - contractCode, err := checkIfContractHasCode(ctx, chainId, address) - if err != nil { - return searchResult, err - } - if contractCode == ContractCodeExists { - searchResult.Type = SearchResultTypeContract - txs, err := findLatestTransactionsToAddress(mainStorage, chainId, address) - if err == nil { - searchResult.Transactions = txs - return searchResult, nil - } - return searchResult, err - } else if contractCode == ContractCodeDoesNotExist { - txs, err := findLatestTransactionsFromAddressOptimistically(mainStorage, chainId, address) - if err == nil { - searchResult.Transactions = txs - return searchResult, nil - } - return searchResult, err - } else { - transactionsTo, err := findLatestTransactionsToAddress(mainStorage, chainId, address) - if err != nil { - return searchResult, err - } - for _, tx := range transactionsTo { - if len(tx.Data) > 0 && tx.Data != "0x" { - // if any received transactions is a function call, likely a contract - searchResult.Type = SearchResultTypeContract - searchResult.Transactions = transactionsTo - return searchResult, nil - } - } - transactionsFrom, err := findLatestTransactionsFromAddressOptimistically(mainStorage, chainId, address) - if err != nil { - return searchResult, err - } - searchResult.Transactions = transactionsFrom - return searchResult, nil - } -} - -func findLatestTransactionsToAddress(mainStorage storage.IMainStorage, chainId *big.Int, address string) ([]common.TransactionModel, error) { - result, err := mainStorage.GetTransactions(storage.QueryFilter{ - ChainId: chainId, - ContractAddress: address, - Limit: 20, - SortBy: "block_number", - SortOrder: "desc", - }) - if err != nil { - return nil, err - } - transactions := make([]common.TransactionModel, len(result.Data)) - for i, transaction := range result.Data { - transactions[i] = transaction.Serialize() - } - return transactions, nil -} - -func findLatestTransactionsFromAddressOptimistically(mainStorage storage.IMainStorage, chainId *big.Int, address string) ([]common.TransactionModel, error) { - now := time.Now() - thirtyDaysAgo := now.AddDate(0, 0, -30) - - result, err := mainStorage.GetTransactions(storage.QueryFilter{ - ChainId: chainId, - FromAddress: address, - FilterParams: map[string]string{ - "block_timestamp_gte": strconv.FormatInt(thirtyDaysAgo.Unix(), 10), - }, - Limit: 20, - SortBy: "block_number", - SortOrder: "desc", - }) - if err != nil { - return nil, err - } - if len(result.Data) == 0 { - result, err = mainStorage.GetTransactions(storage.QueryFilter{ - ChainId: chainId, - FromAddress: address, - FilterParams: map[string]string{ - "block_timestamp_lte": strconv.FormatInt(thirtyDaysAgo.Unix(), 10), - }, - Limit: 20, - SortBy: "block_number", - SortOrder: "desc", - }) - if err != nil { - return nil, err - } - } - transactions := make([]common.TransactionModel, len(result.Data)) - for i, transaction := range result.Data { - transactions[i] = transaction.Serialize() - } - return transactions, nil -} - -type ContractCodeState int - -const ( - ContractCodeUnknown ContractCodeState = iota - ContractCodeExists - ContractCodeDoesNotExist -) - -func checkIfContractHasCode(ctx context.Context, chainId *big.Int, address string) (ContractCodeState, error) { - if config.Cfg.API.Thirdweb.ClientId != "" { - rpcUrl := fmt.Sprintf("https://%s.rpc.thirdweb.com/%s", chainId.String(), config.Cfg.API.Thirdweb.ClientId) - r, err := rpc.InitializeSimpleRPCWithUrl(rpcUrl) - if err != nil { - return ContractCodeUnknown, err - } - hasCode, err := r.HasCode(ctx, address) - if err != nil { - return ContractCodeUnknown, err - } - if hasCode { - return ContractCodeExists, nil - } - return ContractCodeDoesNotExist, nil - } - return ContractCodeUnknown, nil -} - -func searchTransactionsByTimeRange(mainStorage storage.IMainStorage, chainId *big.Int, hash string, startOffsetDays, endOffsetDays int) ([]common.TransactionModel, error) { - now := time.Now() - filters := map[string]string{ - "hash": hash, - } - if startOffsetDays > 0 { - startTime := now.AddDate(0, 0, -startOffsetDays) - filters["block_timestamp_gte"] = strconv.FormatInt(startTime.Unix(), 10) - } - if endOffsetDays > 0 { - endTime := now.AddDate(0, 0, -endOffsetDays) - filters["block_timestamp_lte"] = strconv.FormatInt(endTime.Unix(), 10) - } - - txResult, err := mainStorage.GetTransactions(storage.QueryFilter{ - ChainId: chainId, - FilterParams: filters, - Limit: 1, - }) - if err != nil { - return nil, err - } - serialized := make([]common.TransactionModel, len(txResult.Data)) - for i, tx := range txResult.Data { - serialized[i] = tx.Serialize() - } - return serialized, nil -} diff --git a/internal/handlers/search_handlers_test.go b/internal/handlers/search_handlers_test.go deleted file mode 100644 index 898e1aa8..00000000 --- a/internal/handlers/search_handlers_test.go +++ /dev/null @@ -1,302 +0,0 @@ -package handlers - -import ( - "encoding/json" - "math/big" - "net/http" - "net/http/httptest" - "testing" - - "github.com/gin-gonic/gin" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/mock" - "github.com/thirdweb-dev/indexer/internal/common" - "github.com/thirdweb-dev/indexer/internal/storage" - "github.com/thirdweb-dev/indexer/test/mocks" -) - -func setupTestRouter() (*gin.Engine, *mocks.MockIMainStorage) { - gin.SetMode(gin.TestMode) - router := gin.New() - mockStorage := new(mocks.MockIMainStorage) - - // Set the mock storage as the global storage - mainStorage = mockStorage - storageOnce.Do(func() { - mainStorage = mockStorage - }) - storageErr = nil - - router.GET("/v1/search/:chainId/:input", Search) - return router, mockStorage -} - -func TestSearch_BlockNumber(t *testing.T) { - router, mockStorage := setupTestRouter() - - blockNumber := big.NewInt(12345) - mockStorage.EXPECT().GetBlocks(mock.Anything).Return(storage.QueryResult[common.Block]{ - Data: []common.Block{{ - Number: blockNumber, - Hash: "0xabc", - GasLimit: big.NewInt(1000000), - GasUsed: big.NewInt(500000), - }}, - }, nil) - - w := httptest.NewRecorder() - req, _ := http.NewRequest("GET", "/v1/search/1/12345", nil) - router.ServeHTTP(w, req) - - assert.Equal(t, 200, w.Code) - - var response struct { - Data struct { - Blocks []common.BlockModel `json:"blocks"` - Type SearchResultType `json:"type"` - } `json:"data"` - } - err := json.Unmarshal(w.Body.Bytes(), &response) - assert.NoError(t, err) - assert.Equal(t, SearchResultTypeBlock, response.Data.Type) - assert.Equal(t, blockNumber.Uint64(), response.Data.Blocks[0].BlockNumber) - assert.Equal(t, "0xabc", response.Data.Blocks[0].BlockHash) - - mockStorage.AssertExpectations(t) -} - -func TestSearch_TransactionHash(t *testing.T) { - router, mockStorage := setupTestRouter() - - txHash := "0x1234567890123456789012345678901234567890123456789012345678901234" - - // Mock the 3 GetTransactions calls for different time ranges - // 1. Past 5 days (startOffsetDays=5, endOffsetDays=0) - This should always be called first and return a result - mockStorage.EXPECT().GetTransactions(mock.MatchedBy(func(filter storage.QueryFilter) bool { - return filter.ChainId.Cmp(big.NewInt(1)) == 0 && - filter.FilterParams["hash"] == txHash && - filter.FilterParams["block_timestamp_gte"] != "" && - filter.FilterParams["block_timestamp_lte"] == "" - })).Return(storage.QueryResult[common.Transaction]{ - Data: []common.Transaction{{ - Hash: txHash, - BlockNumber: big.NewInt(12345), - Value: big.NewInt(0), - GasPrice: big.NewInt(500000), - MaxFeePerGas: big.NewInt(500000), - MaxPriorityFeePerGas: big.NewInt(500000), - }}, - }, nil) - - // 2. 5-30 days (startOffsetDays=30, endOffsetDays=5) - This might not be called due to race conditions - mockStorage.On("GetTransactions", mock.MatchedBy(func(filter storage.QueryFilter) bool { - return filter.ChainId.Cmp(big.NewInt(1)) == 0 && - filter.FilterParams["hash"] == txHash && - filter.FilterParams["block_timestamp_gte"] != "" && - filter.FilterParams["block_timestamp_lte"] != "" - })).Return(storage.QueryResult[common.Transaction]{}, nil).Maybe() - - // 3. More than 30 days (startOffsetDays=0, endOffsetDays=30) - This might not be called due to race conditions - mockStorage.On("GetTransactions", mock.MatchedBy(func(filter storage.QueryFilter) bool { - return filter.ChainId.Cmp(big.NewInt(1)) == 0 && - filter.FilterParams["hash"] == txHash && - filter.FilterParams["block_timestamp_gte"] == "" && - filter.FilterParams["block_timestamp_lte"] != "" - })).Return(storage.QueryResult[common.Transaction]{}, nil).Maybe() - - // Mock the GetBlocks call for block hash search - This might not be called due to race conditions - mockStorage.On("GetBlocks", mock.MatchedBy(func(filter storage.QueryFilter) bool { - return filter.ChainId.Cmp(big.NewInt(1)) == 0 && - filter.FilterParams["hash"] == txHash - })).Return(storage.QueryResult[common.Block]{}, nil).Maybe() - - // Mock the GetLogs call for topic_0 search - This might not be called due to race conditions - mockStorage.On("GetLogs", mock.MatchedBy(func(filter storage.QueryFilter) bool { - return filter.ChainId.Cmp(big.NewInt(1)) == 0 && - filter.Signature == txHash - })).Return(storage.QueryResult[common.Log]{}, nil).Maybe() - - w := httptest.NewRecorder() - req, _ := http.NewRequest("GET", "/v1/search/1/"+txHash, nil) - router.ServeHTTP(w, req) - - assert.Equal(t, 200, w.Code) - - var response struct { - Data struct { - Transactions []common.TransactionModel `json:"transactions"` - Type SearchResultType `json:"type"` - } `json:"data"` - } - err := json.Unmarshal(w.Body.Bytes(), &response) - assert.NoError(t, err) - assert.Equal(t, SearchResultTypeTransaction, response.Data.Type) - assert.Equal(t, txHash, response.Data.Transactions[0].Hash) - - mockStorage.AssertExpectations(t) -} - -func TestSearch_Address(t *testing.T) { - router, mockStorage := setupTestRouter() - - address := "0x1234567890123456789012345678901234567890" - mockStorage.EXPECT().GetTransactions(mock.MatchedBy(func(filter storage.QueryFilter) bool { - return filter.ChainId.Cmp(big.NewInt(1)) == 0 && - filter.ContractAddress == address - })).Return(storage.QueryResult[common.Transaction]{ - Data: []common.Transaction{{ - ToAddress: address, - BlockNumber: big.NewInt(12345), - Value: big.NewInt(0), - GasPrice: big.NewInt(500000), - MaxFeePerGas: big.NewInt(500000), - MaxPriorityFeePerGas: big.NewInt(500000), - }}, - }, nil) - - mockStorage.EXPECT().GetTransactions(mock.MatchedBy(func(filter storage.QueryFilter) bool { - return filter.ChainId.Cmp(big.NewInt(1)) == 0 && - filter.FromAddress == address - })).Return(storage.QueryResult[common.Transaction]{ - Data: []common.Transaction{{ - FromAddress: address, - BlockNumber: big.NewInt(12345), - Value: big.NewInt(0), - GasPrice: big.NewInt(500000), - MaxFeePerGas: big.NewInt(500000), - MaxPriorityFeePerGas: big.NewInt(500000), - }}, - }, nil) - - w := httptest.NewRecorder() - req, _ := http.NewRequest("GET", "/v1/search/1/"+address, nil) - router.ServeHTTP(w, req) - - assert.Equal(t, 200, w.Code) - - var response struct { - Data struct { - Transactions []common.TransactionModel `json:"transactions"` - Type SearchResultType `json:"type"` - } `json:"data"` - } - err := json.Unmarshal(w.Body.Bytes(), &response) - assert.NoError(t, err) - assert.Equal(t, SearchResultTypeAddress, response.Data.Type) - assert.Equal(t, address, response.Data.Transactions[0].FromAddress) - - mockStorage.AssertExpectations(t) -} - -func TestSearch_Contract(t *testing.T) { - router, mockStorage := setupTestRouter() - - address := "0x1234567890123456789012345678901234567890" - mockStorage.EXPECT().GetTransactions(mock.MatchedBy(func(filter storage.QueryFilter) bool { - return filter.ChainId.Cmp(big.NewInt(1)) == 0 && - filter.ContractAddress == address - })).Return(storage.QueryResult[common.Transaction]{ - Data: []common.Transaction{{ - ToAddress: address, - BlockNumber: big.NewInt(12345), - Value: big.NewInt(0), - GasPrice: big.NewInt(500000), - MaxFeePerGas: big.NewInt(500000), - MaxPriorityFeePerGas: big.NewInt(500000), - Data: "0xaabbccdd", - }}, - }, nil) - - w := httptest.NewRecorder() - req, _ := http.NewRequest("GET", "/v1/search/1/"+address, nil) - router.ServeHTTP(w, req) - - assert.Equal(t, 200, w.Code) - - var response struct { - Data struct { - Transactions []common.TransactionModel `json:"transactions"` - Type SearchResultType `json:"type"` - } `json:"data"` - } - err := json.Unmarshal(w.Body.Bytes(), &response) - assert.NoError(t, err) - assert.Equal(t, SearchResultTypeContract, response.Data.Type) - assert.Equal(t, address, response.Data.Transactions[0].ToAddress) - assert.Equal(t, "0xaabbccdd", response.Data.Transactions[0].Data) - - mockStorage.AssertExpectations(t) -} - -func TestSearch_FunctionSignature(t *testing.T) { - router, mockStorage := setupTestRouter() - - signature := "0x12345678" - mockStorage.EXPECT().GetTransactions(mock.MatchedBy(func(filter storage.QueryFilter) bool { - return filter.ChainId.Cmp(big.NewInt(1)) == 0 && - filter.Signature == signature - })).Return(storage.QueryResult[common.Transaction]{ - Data: []common.Transaction{{ - Data: signature + "000000", - BlockNumber: big.NewInt(12345), - Value: big.NewInt(0), - GasPrice: big.NewInt(500000), - MaxFeePerGas: big.NewInt(500000), - MaxPriorityFeePerGas: big.NewInt(500000), - }}, - }, nil) - - w := httptest.NewRecorder() - req, _ := http.NewRequest("GET", "/v1/search/1/"+signature, nil) - router.ServeHTTP(w, req) - - assert.Equal(t, 200, w.Code) - - var response struct { - Data struct { - Transactions []common.TransactionModel `json:"transactions"` - Type SearchResultType `json:"type"` - } `json:"data"` - } - err := json.Unmarshal(w.Body.Bytes(), &response) - assert.NoError(t, err) - assert.Equal(t, SearchResultTypeFunctionSignature, response.Data.Type) - assert.Equal(t, signature+"000000", response.Data.Transactions[0].Data) - - mockStorage.AssertExpectations(t) -} - -func TestSearch_InvalidInput(t *testing.T) { - router, _ := setupTestRouter() - - testCases := []struct { - name string - input string - }{ - {"Empty input", " "}, - {"Invalid block number", "-1"}, - {"Invalid hash", "0xinvalidhash"}, - {"Invalid address", "0xinvalidaddress"}, - {"Invalid function signature", "0xinvalidsig"}, - } - - for _, tc := range testCases { - t.Run(tc.name, func(t *testing.T) { - w := httptest.NewRecorder() - req, _ := http.NewRequest("GET", "/v1/search/1/"+tc.input, nil) - router.ServeHTTP(w, req) - - assert.Equal(t, 400, w.Code) - }) - } -} - -func TestSearch_InvalidChainId(t *testing.T) { - router, _ := setupTestRouter() - - w := httptest.NewRecorder() - req, _ := http.NewRequest("GET", "/v1/search/invalid/12345", nil) - router.ServeHTTP(w, req) - - assert.Equal(t, 400, w.Code) -} diff --git a/internal/handlers/token_handlers.go b/internal/handlers/token_handlers.go deleted file mode 100644 index adeb75a0..00000000 --- a/internal/handlers/token_handlers.go +++ /dev/null @@ -1,446 +0,0 @@ -package handlers - -import ( - "fmt" - "math/big" - "strings" - - "github.com/gin-gonic/gin" - "github.com/rs/zerolog/log" - "github.com/thirdweb-dev/indexer/api" - "github.com/thirdweb-dev/indexer/internal/common" - "github.com/thirdweb-dev/indexer/internal/storage" -) - -// Models return type for Swagger documentation -type BalanceModel struct { - TokenAddress string `json:"token_address" ch:"address"` - TokenId string `json:"token_id" ch:"token_id"` - Balance string `json:"balance" ch:"balance"` - TokenType string `json:"token_type" ch:"token_type"` -} - -type TokenIdModel struct { - TokenId string `json:"token_id" ch:"token_id"` - TokenType string `json:"token_type" ch:"token_type"` -} - -type HolderModel struct { - HolderAddress string `json:"holder_address" ch:"owner"` - TokenId string `json:"token_id" ch:"token_id"` - Balance string `json:"balance" ch:"balance"` - TokenType string `json:"token_type" ch:"token_type"` -} - -// @Summary Get token IDs by type for a specific token address -// @Description Retrieve token IDs by type for a specific token address -// @Tags tokens -// @Accept json -// @Produce json -// @Security BasicAuth -// @Param chainId path string true "Chain ID" -// @Param address path string true "Token address" -// @Param token_type query string false "Type of token (erc721 or erc1155)" -// @Param hide_zero_balances query bool true "Hide zero balances" -// @Param page query int false "Page number for pagination" -// @Param limit query int false "Number of items per page" default(5) -// @Success 200 {object} api.QueryResponse{data=[]TokenIdModel} -// @Failure 400 {object} api.Error -// @Failure 401 {object} api.Error -// @Failure 500 {object} api.Error -// @Router /{chainId}/tokens/{address} [get] -func GetTokenIdsByType(c *gin.Context) { - chainId, err := api.GetChainId(c) - if err != nil { - api.BadRequestErrorHandler(c, err) - return - } - - address := strings.ToLower(c.Param("address")) - if !strings.HasPrefix(address, "0x") { - api.BadRequestErrorHandler(c, fmt.Errorf("invalid token address '%s'", address)) - return - } - - tokenTypes, err := getTokenTypesFromReq(c) - if err != nil { - api.BadRequestErrorHandler(c, err) - return - } - - // Filter out erc20 tokens as they don't have token IDs - filteredTokenTypes := []string{} - for _, tokenType := range tokenTypes { - if tokenType == "erc721" || tokenType == "erc1155" { - filteredTokenTypes = append(filteredTokenTypes, tokenType) - } - } - - if len(filteredTokenTypes) == 0 { - // Default to both ERC721 and ERC1155 if no valid token types specified - filteredTokenTypes = []string{"erc721", "erc1155"} - } - - hideZeroBalances := c.Query("hide_zero_balances") != "false" - - // We only care about token_id and token_type - columns := []string{"token_id", "token_type"} - groupBy := []string{"token_id", "token_type"} - sortBy := c.Query("sort_by") - - // Validate GroupBy and SortBy fields - if err := api.ValidateGroupByAndSortBy("balances", groupBy, sortBy, nil); err != nil { - api.BadRequestErrorHandler(c, err) - return - } - - tokenIds, err := getTokenIdsFromReq(c) - if err != nil { - api.BadRequestErrorHandler(c, fmt.Errorf("invalid token ids '%s'", err)) - return - } - - qf := storage.BalancesQueryFilter{ - ChainId: chainId, - TokenTypes: filteredTokenTypes, - TokenAddress: address, - ZeroBalance: hideZeroBalances, - TokenIds: tokenIds, - GroupBy: groupBy, - SortBy: sortBy, - SortOrder: c.Query("sort_order"), - Page: api.ParseIntQueryParam(c.Query("page"), 0), - Limit: api.ParseIntQueryParam(c.Query("limit"), 0), - } - - queryResult := api.QueryResponse{ - Meta: api.Meta{ - ChainId: chainId.Uint64(), - Page: qf.Page, - Limit: qf.Limit, - }, - } - - mainStorage, err = getMainStorage() - if err != nil { - log.Error().Err(err).Msg("Error getting main storage") - api.InternalErrorHandler(c) - return - } - - balancesResult, err := mainStorage.GetTokenBalances(qf, columns...) - if err != nil { - log.Error().Err(err).Msg("Error querying token IDs") - api.InternalErrorHandler(c) - return - } - - var data interface{} = serializeTokenIds(balancesResult.Data) - queryResult.Data = &data - sendJSONResponse(c, queryResult) -} - -// @Summary Get token balances of an address by type -// @Description Retrieve token balances of an address by type -// @Tags balances -// @Accept json -// @Produce json -// @Security BasicAuth -// @Param chainId path string true "Chain ID" -// @Param owner path string true "Owner address" -// @Param type path string true "Type of token balance" -// @Param hide_zero_balances query bool true "Hide zero balances" -// @Param page query int false "Page number for pagination" -// @Param limit query int false "Number of items per page" default(5) -// @Success 200 {object} api.QueryResponse{data=[]BalanceModel} -// @Failure 400 {object} api.Error -// @Failure 401 {object} api.Error -// @Failure 500 {object} api.Error -// @Router /{chainId}/balances/{owner}/{type} [get] -func GetTokenBalancesByType(c *gin.Context) { - chainId, err := api.GetChainId(c) - if err != nil { - api.BadRequestErrorHandler(c, err) - return - } - - tokenTypes, err := getTokenTypesFromReq(c) - if err != nil { - api.BadRequestErrorHandler(c, err) - return - } - - owner := strings.ToLower(c.Param("owner")) - if !strings.HasPrefix(owner, "0x") { - api.BadRequestErrorHandler(c, fmt.Errorf("invalid owner address '%s'", owner)) - return - } - tokenAddress := strings.ToLower(c.Query("token_address")) - if tokenAddress != "" && !strings.HasPrefix(tokenAddress, "0x") { - api.BadRequestErrorHandler(c, fmt.Errorf("invalid token address '%s'", tokenAddress)) - return - } - - tokenIds, err := getTokenIdsFromReq(c) - if err != nil { - api.BadRequestErrorHandler(c, fmt.Errorf("invalid token ids '%s'", err)) - return - } - - hideZeroBalances := c.Query("hide_zero_balances") != "false" - - columns := []string{"address", "sum(balance) as balance"} - groupBy := []string{"address"} - if !strings.Contains(strings.Join(tokenTypes, ","), "erc20") { - columns = []string{"address", "token_id", "sum(balance) as balance", "token_type"} - groupBy = []string{"address", "token_id", "token_type"} - } - - sortBy := c.Query("sort_by") - - // Validate GroupBy and SortBy fields - if err := api.ValidateGroupByAndSortBy("balances", groupBy, sortBy, nil); err != nil { - api.BadRequestErrorHandler(c, err) - return - } - - qf := storage.BalancesQueryFilter{ - ChainId: chainId, - Owner: owner, - TokenTypes: tokenTypes, - TokenAddress: tokenAddress, - ZeroBalance: hideZeroBalances, - TokenIds: tokenIds, - GroupBy: groupBy, - SortBy: sortBy, - SortOrder: c.Query("sort_order"), - Page: api.ParseIntQueryParam(c.Query("page"), 0), - Limit: api.ParseIntQueryParam(c.Query("limit"), 0), - } - - queryResult := api.QueryResponse{ - Meta: api.Meta{ - ChainId: chainId.Uint64(), - Page: qf.Page, - Limit: qf.Limit, - }, - } - - mainStorage, err = getMainStorage() - if err != nil { - log.Error().Err(err).Msg("Error getting main storage") - api.InternalErrorHandler(c) - return - } - - balancesResult, err := mainStorage.GetTokenBalances(qf, columns...) - if err != nil { - log.Error().Err(err).Msg("Error querying balances") - // TODO: might want to choose BadRequestError if it's due to not-allowed functions - api.InternalErrorHandler(c) - return - } - var data interface{} = serializeBalances(balancesResult.Data) - queryResult.Data = &data - sendJSONResponse(c, queryResult) -} - -// @Summary Get holders of a token -// @Description Retrieve holders of a token -// @Tags holders -// @Accept json -// @Produce json -// @Security BasicAuth -// @Param chainId path string true "Chain ID" -// @Param address path string true "Address of the token" -// @Param token_type query string false "Type of token" -// @Param hide_zero_balances query bool true "Hide zero balances" -// @Param page query int false "Page number for pagination" -// @Param limit query int false "Number of items per page" default(5) -// @Success 200 {object} api.QueryResponse{data=[]HolderModel} -// @Failure 400 {object} api.Error -// @Failure 401 {object} api.Error -// @Failure 500 {object} api.Error -// @Router /{chainId}/holders/{address} [get] -func GetTokenHoldersByType(c *gin.Context) { - chainId, err := api.GetChainId(c) - if err != nil { - api.BadRequestErrorHandler(c, err) - return - } - - address := strings.ToLower(c.Param("address")) - if !strings.HasPrefix(address, "0x") { - api.BadRequestErrorHandler(c, fmt.Errorf("invalid address '%s'", address)) - return - } - - tokenTypes, err := getTokenTypesFromReq(c) - if err != nil { - api.BadRequestErrorHandler(c, err) - return - } - hideZeroBalances := c.Query("hide_zero_balances") != "false" - - columns := []string{"owner", "sum(balance) as balance"} - groupBy := []string{"owner"} - - if !strings.Contains(strings.Join(tokenTypes, ","), "erc20") { - columns = []string{"owner", "token_id", "sum(balance) as balance", "token_type"} - groupBy = []string{"owner", "token_id", "token_type"} - } - - tokenIds, err := getTokenIdsFromReq(c) - if err != nil { - api.BadRequestErrorHandler(c, fmt.Errorf("invalid token ids '%s'", err)) - return - } - - sortBy := c.Query("sort_by") - - // Validate GroupBy and SortBy fields - if err := api.ValidateGroupByAndSortBy("balances", groupBy, sortBy, nil); err != nil { - api.BadRequestErrorHandler(c, err) - return - } - - qf := storage.BalancesQueryFilter{ - ChainId: chainId, - TokenTypes: tokenTypes, - TokenAddress: address, - ZeroBalance: hideZeroBalances, - TokenIds: tokenIds, - GroupBy: groupBy, - SortBy: c.Query("sort_by"), - SortOrder: c.Query("sort_order"), - Page: api.ParseIntQueryParam(c.Query("page"), 0), - Limit: api.ParseIntQueryParam(c.Query("limit"), 0), - } - - queryResult := api.QueryResponse{ - Meta: api.Meta{ - ChainId: chainId.Uint64(), - Page: qf.Page, - Limit: qf.Limit, - }, - } - - mainStorage, err = getMainStorage() - if err != nil { - log.Error().Err(err).Msg("Error getting main storage") - api.InternalErrorHandler(c) - return - } - - balancesResult, err := mainStorage.GetTokenBalances(qf, columns...) - if err != nil { - log.Error().Err(err).Msg("Error querying balances") - // TODO: might want to choose BadRequestError if it's due to not-allowed functions - api.InternalErrorHandler(c) - return - } - var data interface{} = serializeHolders(balancesResult.Data) - queryResult.Data = &data - sendJSONResponse(c, queryResult) -} - -func serializeBalances(balances []common.TokenBalance) []BalanceModel { - balanceModels := make([]BalanceModel, len(balances)) - for i, balance := range balances { - balanceModels[i] = serializeBalance(balance) - } - return balanceModels -} - -func serializeBalance(balance common.TokenBalance) BalanceModel { - return BalanceModel{ - TokenAddress: balance.TokenAddress, - Balance: balance.Balance.String(), - TokenId: func() string { - if balance.TokenId != nil { - return balance.TokenId.String() - } - return "" - }(), - TokenType: balance.TokenType, - } -} - -func getTokenTypesFromReq(c *gin.Context) ([]string, error) { - tokenTypeParam := c.Param("type") - var tokenTypes []string - if tokenTypeParam != "" { - tokenTypes = []string{tokenTypeParam} - } else { - tokenTypes = c.QueryArray("token_type") - } - - for i, tokenType := range tokenTypes { - tokenType = strings.ToLower(tokenType) - if tokenType != "erc721" && tokenType != "erc1155" && tokenType != "erc20" { - return []string{}, fmt.Errorf("invalid token type: %s", tokenType) - } - tokenTypes[i] = tokenType - } - return tokenTypes, nil -} - -func getTokenIdsFromReq(c *gin.Context) ([]*big.Int, error) { - tokenIds := c.QueryArray("token_id") - tokenIdsBn := make([]*big.Int, len(tokenIds)) - for i, tokenId := range tokenIds { - tokenId = strings.TrimSpace(tokenId) // Remove potential whitespace - if tokenId == "" { - return nil, fmt.Errorf("invalid token id: %s", tokenId) - } - num := new(big.Int) - _, ok := num.SetString(tokenId, 10) // Base 10 - if !ok { - return nil, fmt.Errorf("invalid token id: %s", tokenId) - } - tokenIdsBn[i] = num - } - return tokenIdsBn, nil -} - -func serializeHolders(holders []common.TokenBalance) []HolderModel { - holderModels := make([]HolderModel, len(holders)) - for i, holder := range holders { - holderModels[i] = serializeHolder(holder) - } - return holderModels -} - -func serializeHolder(holder common.TokenBalance) HolderModel { - return HolderModel{ - HolderAddress: holder.Owner, - Balance: holder.Balance.String(), - TokenId: func() string { - if holder.TokenId != nil { - return holder.TokenId.String() - } - return "" - }(), - TokenType: holder.TokenType, - } -} - -func serializeTokenIds(balances []common.TokenBalance) []TokenIdModel { - tokenIdModels := make([]TokenIdModel, len(balances)) - for i, balance := range balances { - tokenIdModels[i] = serializeTokenId(balance) - } - return tokenIdModels -} - -func serializeTokenId(balance common.TokenBalance) TokenIdModel { - return TokenIdModel{ - TokenId: func() string { - if balance.TokenId != nil { - return balance.TokenId.String() - } - return "" - }(), - TokenType: balance.TokenType, - } -} diff --git a/internal/handlers/transactions_handlers.go b/internal/handlers/transactions_handlers.go deleted file mode 100644 index ecc60ec3..00000000 --- a/internal/handlers/transactions_handlers.go +++ /dev/null @@ -1,257 +0,0 @@ -package handlers - -import ( - "net/http" - - "github.com/ethereum/go-ethereum/accounts/abi" - gethCommon "github.com/ethereum/go-ethereum/common" - "github.com/gin-gonic/gin" - "github.com/rs/zerolog/log" - "github.com/thirdweb-dev/indexer/api" - config "github.com/thirdweb-dev/indexer/configs" - "github.com/thirdweb-dev/indexer/internal/common" - "github.com/thirdweb-dev/indexer/internal/storage" -) - -// @Summary Get all transactions -// @Description Retrieve all transactions across all contracts -// @Tags transactions -// @Accept json -// @Produce json -// @Security BasicAuth -// @Param chainId path string true "Chain ID" -// @Param filter query string false "Filter parameters" -// @Param group_by query string false "Field to group results by" -// @Param sort_by query string false "Field to sort results by" -// @Param sort_order query string false "Sort order (asc or desc)" -// @Param page query int false "Page number for pagination" -// @Param limit query int false "Number of items per page" default(5) -// @Param aggregate query []string false "List of aggregate functions to apply" -// @Param force_consistent_data query bool false "Force consistent data at the expense of query speed" -// @Success 200 {object} api.QueryResponse{data=[]common.TransactionModel} -// @Failure 400 {object} api.Error -// @Failure 401 {object} api.Error -// @Failure 500 {object} api.Error -// @Router /{chainId}/transactions [get] -func GetTransactions(c *gin.Context) { - handleTransactionsRequest(c) -} - -// @Summary Get wallet transactions -// @Description Retrieve all incoming and outgoing transactions for a specific wallet address -// @Tags wallet -// @Accept json -// @Produce json -// @Security BasicAuth -// @Param chainId path string true "Chain ID" -// @Param wallet_address path string true "Wallet address" -// @Param filter query string false "Filter parameters" -// @Param group_by query string false "Field to group results by" -// @Param sort_by query string false "Field to sort results by" -// @Param sort_order query string false "Sort order (asc or desc)" -// @Param page query int false "Page number for pagination" -// @Param limit query int false "Number of items per page" default(5) -// @Param force_consistent_data query bool false "Force consistent data at the expense of query speed" -// @Param decode query bool false "Decode transaction data" -// @Success 200 {object} api.QueryResponse{data=[]common.DecodedTransactionModel} -// @Failure 400 {object} api.Error -// @Failure 401 {object} api.Error -// @Failure 500 {object} api.Error -// @Router /{chainId}/wallet-transactions [get] -func GetWalletTransactions(c *gin.Context) { - handleTransactionsRequest(c) -} - -// @Summary Get transactions by contract -// @Description Retrieve transactions for a specific contract -// @Tags transactions -// @Accept json -// @Produce json -// @Security BasicAuth -// @Param chainId path string true "Chain ID" -// @Param to path string true "Contract address" -// @Param filter query string false "Filter parameters" -// @Param group_by query string false "Field to group results by" -// @Param sort_by query string false "Field to sort results by" -// @Param sort_order query string false "Sort order (asc or desc)" -// @Param page query int false "Page number for pagination" -// @Param limit query int false "Number of items per page" default(5) -// @Param aggregate query []string false "List of aggregate functions to apply" -// @Param force_consistent_data query bool false "Force consistent data at the expense of query speed" -// @Success 200 {object} api.QueryResponse{data=[]common.TransactionModel} -// @Failure 400 {object} api.Error -// @Failure 401 {object} api.Error -// @Failure 500 {object} api.Error -// @Router /{chainId}/transactions/{to} [get] -func GetTransactionsByContract(c *gin.Context) { - handleTransactionsRequest(c) -} - -// @Summary Get transactions by contract and signature -// @Description Retrieve transactions for a specific contract and signature. When a valid function signature is provided, the response includes decoded transaction data with function inputs. -// @Tags transactions -// @Accept json -// @Produce json -// @Security BasicAuth -// @Param chainId path string true "Chain ID" -// @Param to path string true "Contract address" -// @Param signature path string true "Function signature (e.g., 'transfer(address,uint256)')" -// @Param filter query string false "Filter parameters" -// @Param group_by query string false "Field to group results by" -// @Param sort_by query string false "Field to sort results by" -// @Param sort_order query string false "Sort order (asc or desc)" -// @Param page query int false "Page number for pagination" -// @Param limit query int false "Number of items per page" default(5) -// @Param aggregate query []string false "List of aggregate functions to apply" -// @Param force_consistent_data query bool false "Force consistent data at the expense of query speed" -// @Success 200 {object} api.QueryResponse{data=[]common.DecodedTransactionModel} -// @Failure 400 {object} api.Error -// @Failure 401 {object} api.Error -// @Failure 500 {object} api.Error -// @Router /{chainId}/transactions/{to}/{signature} [get] -func GetTransactionsByContractAndSignature(c *gin.Context) { - handleTransactionsRequest(c) -} - -func handleTransactionsRequest(c *gin.Context) { - chainId, err := api.GetChainId(c) - if err != nil { - api.BadRequestErrorHandler(c, err) - return - } - - contractAddress := c.Param("to") - signature := c.Param("signature") - walletAddress := c.Param("wallet_address") - queryParams, err := api.ParseQueryParams(c.Request) - if err != nil { - api.BadRequestErrorHandler(c, err) - return - } - - // Validate GroupBy and SortBy fields - if err := api.ValidateGroupByAndSortBy("transactions", queryParams.GroupBy, queryParams.SortBy, queryParams.Aggregates); err != nil { - api.BadRequestErrorHandler(c, err) - return - } - - var functionABI *abi.Method - signatureHash := "" - if signature != "" { - functionABI, err = common.ConstructFunctionABI(signature) - if err != nil { - log.Debug().Err(err).Msgf("Unable to construct function ABI for %s", signature) - } - signatureHash = "0x" + gethCommon.Bytes2Hex(functionABI.ID) - } - - mainStorage, err := getMainStorage() - if err != nil { - log.Error().Err(err).Msg("Error creating storage connector") - api.InternalErrorHandler(c) - return - } - - // Prepare the QueryFilter - qf := storage.QueryFilter{ - FilterParams: queryParams.FilterParams, - ContractAddress: contractAddress, - WalletAddress: walletAddress, - Signature: signatureHash, - ChainId: chainId, - SortBy: queryParams.SortBy, - SortOrder: queryParams.SortOrder, - Page: queryParams.Page, - Limit: queryParams.Limit, - ForceConsistentData: queryParams.ForceConsistentData, - } - - // Initialize the QueryResult - queryResult := api.QueryResponse{ - Meta: api.Meta{ - ChainId: chainId.Uint64(), - ContractAddress: contractAddress, - Signature: signatureHash, - Page: queryParams.Page, - Limit: queryParams.Limit, - TotalItems: 0, - TotalPages: 0, // TODO: Implement total pages count - }, - Data: nil, - Aggregations: nil, - } - - // If aggregates or groupings are specified, retrieve them - if len(queryParams.Aggregates) > 0 || len(queryParams.GroupBy) > 0 { - qf.Aggregates = queryParams.Aggregates - qf.GroupBy = queryParams.GroupBy - - aggregatesResult, err := mainStorage.GetAggregations("transactions", qf) - if err != nil { - log.Error().Err(err).Msg("Error querying aggregates") - // TODO: might want to choose BadRequestError if it's due to not-allowed functions - api.InternalErrorHandler(c) - return - } - queryResult.Aggregations = &aggregatesResult.Aggregates - queryResult.Meta.TotalItems = len(aggregatesResult.Aggregates) - } else { - // Retrieve logs data - transactionsResult, err := mainStorage.GetTransactions(qf) - if err != nil { - log.Error().Err(err).Msg("Error querying transactions") - // TODO: might want to choose BadRequestError if it's due to not-allowed functions - api.InternalErrorHandler(c) - return - } - - var data interface{} - if decodedTxs := decodeTransactionsIfNeeded(chainId.String(), transactionsResult.Data, functionABI, config.Cfg.API.AbiDecodingEnabled && queryParams.Decode); decodedTxs != nil { - data = serializeDecodedTransactions(decodedTxs) - } else { - data = serializeTransactions(transactionsResult.Data) - } - queryResult.Data = &data - queryResult.Meta.TotalItems = len(transactionsResult.Data) - } - - c.JSON(http.StatusOK, queryResult) -} - -func decodeTransactionsIfNeeded(chainId string, transactions []common.Transaction, functionABI *abi.Method, useContractService bool) []*common.DecodedTransaction { - if functionABI != nil { - decodingCompletelySuccessful := true - decodedTransactions := []*common.DecodedTransaction{} - for _, transaction := range transactions { - decodedTransaction := transaction.Decode(functionABI) - if decodedTransaction.Decoded.Name == "" || decodedTransaction.Decoded.Signature == "" { - decodingCompletelySuccessful = false - } - decodedTransactions = append(decodedTransactions, decodedTransaction) - } - if !useContractService || decodingCompletelySuccessful { - // decoding was successful or contract service decoding is disabled - return decodedTransactions - } - } - if useContractService { - return common.DecodeTransactions(chainId, transactions) - } - return nil -} - -func serializeDecodedTransactions(transactions []*common.DecodedTransaction) []common.DecodedTransactionModel { - decodedTransactionModels := make([]common.DecodedTransactionModel, len(transactions)) - for i, transaction := range transactions { - decodedTransactionModels[i] = transaction.Serialize() - } - return decodedTransactionModels -} - -func serializeTransactions(transactions []common.Transaction) []common.TransactionModel { - transactionModels := make([]common.TransactionModel, len(transactions)) - for i, transaction := range transactions { - transactionModels[i] = transaction.Serialize() - } - return transactionModels -} diff --git a/internal/handlers/transfer_handlers.go b/internal/handlers/transfer_handlers.go deleted file mode 100644 index 58f7df87..00000000 --- a/internal/handlers/transfer_handlers.go +++ /dev/null @@ -1,199 +0,0 @@ -package handlers - -import ( - "fmt" - "math/big" - "strings" - "time" - - "github.com/gin-gonic/gin" - "github.com/rs/zerolog/log" - "github.com/thirdweb-dev/indexer/api" - "github.com/thirdweb-dev/indexer/internal/common" - "github.com/thirdweb-dev/indexer/internal/storage" -) - -// TransferModel return type for Swagger documentation -type TransferModel struct { - TokenType string `json:"token_type" ch:"token_type"` - TokenAddress string `json:"token_address" ch:"token_address"` - FromAddress string `json:"from_address" ch:"from_address"` - ToAddress string `json:"to_address" ch:"to_address"` - TokenId string `json:"token_id" ch:"token_id"` - Amount string `json:"amount" ch:"amount"` - BlockNumber string `json:"block_number" ch:"block_number"` - BlockTimestamp string `json:"block_timestamp" ch:"block_timestamp"` - TransactionHash string `json:"transaction_hash" ch:"transaction_hash"` - LogIndex uint64 `json:"log_index" ch:"log_index"` -} - -// @Summary Get token transfers -// @Description Retrieve token transfers by various filters -// @Tags transfers -// @Accept json -// @Produce json -// @Security BasicAuth -// @Param chainId path string true "Chain ID" -// @Param token_type query []string false "Token types (erc721, erc1155, erc20)" -// @Param token_address query string false "Token contract address" -// @Param wallet query string false "Wallet address" -// @Param start_block query string false "Start block number" -// @Param end_block query string false "End block number" -// @Param start_timestamp query string false "Start timestamp (RFC3339 format)" -// @Param end_timestamp query string false "End timestamp (RFC3339 format)" -// @Param token_id query []string false "Token IDs" -// @Param transaction_hash query string false "Transaction hash" -// @Param page query int false "Page number for pagination" -// @Param limit query int false "Number of items per page" default(20) -// @Success 200 {object} api.QueryResponse{data=[]TransferModel} -// @Failure 400 {object} api.Error -// @Failure 401 {object} api.Error -// @Failure 500 {object} api.Error -// @Router /{chainId}/transfers [get] -func GetTokenTransfers(c *gin.Context) { - chainId, err := api.GetChainId(c) - if err != nil { - api.BadRequestErrorHandler(c, err) - return - } - - tokenTypes, err := getTokenTypesFromReq(c) - if err != nil { - api.BadRequestErrorHandler(c, err) - return - } - - walletAddress := strings.ToLower(c.Query("wallet_address")) - if walletAddress != "" && !strings.HasPrefix(walletAddress, "0x") { - api.BadRequestErrorHandler(c, fmt.Errorf("invalid wallet_address '%s'", walletAddress)) - return - } - - tokenAddress := strings.ToLower(c.Query("token_address")) - if tokenAddress != "" && !strings.HasPrefix(tokenAddress, "0x") { - api.BadRequestErrorHandler(c, fmt.Errorf("invalid token_address '%s'", tokenAddress)) - return - } - - transactionHash := strings.ToLower(c.Query("transaction_hash")) - if transactionHash != "" && !strings.HasPrefix(transactionHash, "0x") { - api.BadRequestErrorHandler(c, fmt.Errorf("invalid transaction_hash '%s'", transactionHash)) - return - } - - tokenIds, err := getTokenIdsFromReq(c) - if err != nil { - api.BadRequestErrorHandler(c, fmt.Errorf("invalid token_id: %s", err)) - return - } - - // Parse block number parameters - var startBlockNumber, endBlockNumber *big.Int - startBlockStr := c.Query("start_block") - if startBlockStr != "" { - startBlockNumber = new(big.Int) - _, ok := startBlockNumber.SetString(startBlockStr, 10) - if !ok { - api.BadRequestErrorHandler(c, fmt.Errorf("invalid start_block '%s'", startBlockStr)) - return - } - } - - endBlockStr := c.Query("end_block") - if endBlockStr != "" { - endBlockNumber = new(big.Int) - _, ok := endBlockNumber.SetString(endBlockStr, 10) - if !ok { - api.BadRequestErrorHandler(c, fmt.Errorf("invalid end_block '%s'", endBlockStr)) - return - } - } - - // Validate SortBy field (transfers don't use GroupBy or Aggregates) - sortBy := c.Query("sort_by") - if sortBy != "" { - if err := api.ValidateGroupByAndSortBy("transfers", nil, sortBy, nil); err != nil { - api.BadRequestErrorHandler(c, err) - return - } - } - - // Define query filter - qf := storage.TransfersQueryFilter{ - ChainId: chainId, - TokenTypes: tokenTypes, - WalletAddress: walletAddress, - TokenAddress: tokenAddress, - TokenIds: tokenIds, - TransactionHash: transactionHash, - StartBlockNumber: startBlockNumber, - EndBlockNumber: endBlockNumber, - Page: api.ParseIntQueryParam(c.Query("page"), 0), - Limit: api.ParseIntQueryParam(c.Query("limit"), 20), - SortBy: sortBy, - SortOrder: c.Query("sort_order"), - } - - // Define columns for query - columns := []string{ - "token_type", - "token_address", - "from_address", - "to_address", - "token_id", - "amount", - "block_number", - "block_timestamp", - "transaction_hash", - "log_index", - } - - queryResult := api.QueryResponse{ - Meta: api.Meta{ - ChainId: chainId.Uint64(), - Page: qf.Page, - Limit: qf.Limit, - }, - } - - mainStorage, err = getMainStorage() - if err != nil { - log.Error().Err(err).Msg("Error getting main storage") - api.InternalErrorHandler(c) - return - } - - transfersResult, err := mainStorage.GetTokenTransfers(qf, columns...) - if err != nil { - log.Error().Err(err).Msg("Error querying token transfers") - api.InternalErrorHandler(c) - return - } - - var data interface{} = serializeTransfers(transfersResult.Data) - queryResult.Data = &data - sendJSONResponse(c, queryResult) -} - -func serializeTransfers(transfers []common.TokenTransfer) []TransferModel { - transferModels := make([]TransferModel, len(transfers)) - for i, transfer := range transfers { - transferModels[i] = serializeTransfer(transfer) - } - return transferModels -} - -func serializeTransfer(transfer common.TokenTransfer) TransferModel { - return TransferModel{ - TokenType: transfer.TokenType, - TokenAddress: transfer.TokenAddress, - FromAddress: transfer.FromAddress, - ToAddress: transfer.ToAddress, - TokenId: transfer.TokenID.String(), - Amount: transfer.Amount.String(), - BlockNumber: transfer.BlockNumber.String(), - BlockTimestamp: transfer.BlockTimestamp.Format(time.RFC3339), - TransactionHash: transfer.TransactionHash, - LogIndex: transfer.LogIndex, - } -} diff --git a/internal/libs/libblockdata/validator.go b/internal/libs/libblockdata/validator.go index 7a251e0d..8b421b78 100644 --- a/internal/libs/libblockdata/validator.go +++ b/internal/libs/libblockdata/validator.go @@ -14,7 +14,7 @@ func Validate(blockData *common.BlockData) (valid bool, err error) { if blockData == nil { return false, nil } - if config.Cfg.Validation.Mode == "disabled" { + if config.Cfg.ValidationMode == "disabled" { return true, nil } @@ -38,7 +38,7 @@ func Validate(blockData *common.BlockData) (valid bool, err error) { } // strict mode also validates logsBloom and transactionsRoot - if config.Cfg.Validation.Mode == "strict" { + if config.Cfg.ValidationMode == "strict" { // Calculate logsBloom from logs calculatedLogsBloom := validation.CalculateLogsBloom(blockData.Logs) // Compare calculated logsBloom with block's logsBloom diff --git a/internal/metrics/metrics.go b/internal/metrics/metrics.go index a751e6af..6791c1d2 100644 --- a/internal/metrics/metrics.go +++ b/internal/metrics/metrics.go @@ -5,182 +5,6 @@ import ( "github.com/prometheus/client_golang/prometheus/promauto" ) -// Committer Metrics -var ( - SuccessfulCommits = promauto.NewCounter(prometheus.CounterOpts{ - Name: "committer_successful_commits_total", - Help: "The total number of successful block commits", - }) - - LastCommittedBlock = promauto.NewGauge(prometheus.GaugeOpts{ - Name: "committer_last_committed_block", - Help: "The last successfully committed block number", - }) - - CommitterLagInSeconds = promauto.NewGauge(prometheus.GaugeOpts{ - Name: "committer_lag_seconds", - Help: "The lag in seconds between the last committed block and the current timestamp", - }) - - GapCounter = promauto.NewCounter(prometheus.CounterOpts{ - Name: "committer_gap_counter", - Help: "The number of gaps detected during commits", - }) - - MissedBlockNumbers = promauto.NewGauge(prometheus.GaugeOpts{ - Name: "committer_first_missed_block_number", - Help: "The first blocknumber detected in a commit gap", - }) -) - -// Worker Metrics -var LastFetchedBlock = promauto.NewGauge(prometheus.GaugeOpts{ - Name: "worker_last_fetched_block_from_rpc", - Help: "The last block number fetched by the worker from the RPC", -}) - -// ChainTracker Metrics -var ( - ChainHead = promauto.NewGauge(prometheus.GaugeOpts{ - Name: "chain_tracker_chain_head", - Help: "The latest block number in the current chain", - }) -) - -// Poller metrics -var ( - PolledBatchSize = promauto.NewGauge(prometheus.GaugeOpts{ - Name: "polled_batch_size", - Help: "The number of blocks polled in a single batch", - }) -) - -var ( - PollerLastTriggeredBlock = promauto.NewGauge(prometheus.GaugeOpts{ - Name: "poller_last_triggered_block", - Help: "The last block number that the poller was triggered for", - }) -) - -// Failure Recoverer Metrics -var ( - FailureRecovererLastTriggeredBlock = promauto.NewGauge(prometheus.GaugeOpts{ - Name: "failure_recoverer_last_triggered_block", - Help: "The last block number that the failure recoverer was triggered for", - }) - - FirstBlocknumberInFailureRecovererBatch = promauto.NewGauge(prometheus.GaugeOpts{ - Name: "failure_recoverer_first_block_in_batch", - Help: "The first block number in the failure recoverer batch", - }) -) - -// Reorg Handler Metrics -var ( - ReorgHandlerLastCheckedBlock = promauto.NewGauge(prometheus.GaugeOpts{ - Name: "reorg_handler_last_checked_block", - Help: "The last block number that the reorg handler checked", - }) - - ReorgCounter = promauto.NewCounter(prometheus.CounterOpts{ - Name: "reorg_handler_reorg_counter", - Help: "The number of reorgs detected", - }) -) - -// Publisher Metrics -var ( - PublisherBlockCounter = promauto.NewCounter(prometheus.CounterOpts{ - Name: "publisher_block_counter", - Help: "The number of blocks published", - }) - - PublisherReorgedBlockCounter = promauto.NewCounter(prometheus.CounterOpts{ - Name: "publisher_reorged_block_counter", - Help: "The number of reorged blocks published", - }) - - LastPublishedBlock = promauto.NewGauge(prometheus.GaugeOpts{ - Name: "last_published_block", - Help: "The last block number that was published", - }) -) - -// Operation Duration Metrics -var ( - StagingInsertDuration = promauto.NewHistogram(prometheus.HistogramOpts{ - Name: "staging_insert_duration_seconds", - Help: "Time taken to insert data into staging storage", - Buckets: prometheus.DefBuckets, - }) - - MainStorageInsertDuration = promauto.NewHistogram(prometheus.HistogramOpts{ - Name: "main_storage_insert_duration_seconds", - Help: "Time taken to insert data into main storage", - Buckets: prometheus.DefBuckets, - }) - - PublishDuration = promauto.NewHistogram(prometheus.HistogramOpts{ - Name: "publish_duration_seconds", - Help: "Time taken to publish block data to Kafka", - Buckets: prometheus.DefBuckets, - }) - - StagingDeleteDuration = promauto.NewHistogram(prometheus.HistogramOpts{ - Name: "staging_delete_duration_seconds", - Help: "Time taken to delete data from staging storage", - Buckets: prometheus.DefBuckets, - }) - - GetBlockNumbersToCommitDuration = promauto.NewHistogram(prometheus.HistogramOpts{ - Name: "get_block_numbers_to_commit_duration_seconds", - Help: "Time taken to get block numbers to commit from storage", - Buckets: prometheus.DefBuckets, - }) - - GetStagingDataDuration = promauto.NewHistogram(prometheus.HistogramOpts{ - Name: "get_staging_data_duration_seconds", - Help: "Time taken to get data from staging storage", - Buckets: prometheus.DefBuckets, - }) -) - -// Work Mode Metrics -var ( - CurrentWorkMode = promauto.NewGauge(prometheus.GaugeOpts{ - Name: "current_work_mode", - Help: "The current work mode (0 = backfill, 1 = live)", - }) -) - -// ClickHouse Insert Row Count Metrics -var ( - ClickHouseMainStorageInsertOperations = promauto.NewCounter(prometheus.CounterOpts{ - Name: "clickhouse_main_storage_insert_operations", - Help: "The total number of insert operations into ClickHouse main storage", - }) - - ClickHouseMainStorageRowsInserted = promauto.NewCounter(prometheus.CounterOpts{ - Name: "clickhouse_main_storage_rows_inserted_total", - Help: "The total number of rows inserted into ClickHouse main storage", - }) - - ClickHouseTransactionsInserted = promauto.NewCounter(prometheus.CounterOpts{ - Name: "clickhouse_transactions_inserted_total", - Help: "The total number of transactions inserted into ClickHouse", - }) - - ClickHouseLogsInserted = promauto.NewCounter(prometheus.CounterOpts{ - Name: "clickhouse_logs_inserted_total", - Help: "The total number of logs inserted into ClickHouse", - }) - - ClickHouseTracesInserted = promauto.NewCounter(prometheus.CounterOpts{ - Name: "clickhouse_traces_inserted_total", - Help: "The total number of traces inserted into ClickHouse", - }) -) - // Backfill Metrics var ( BackfillStartBlock = promauto.NewGaugeVec(prometheus.GaugeOpts{ diff --git a/internal/middleware/authorization.go b/internal/middleware/authorization.go deleted file mode 100644 index 224d0012..00000000 --- a/internal/middleware/authorization.go +++ /dev/null @@ -1,36 +0,0 @@ -package middleware - -import ( - "fmt" - - "github.com/ethereum/go-ethereum/log" - "github.com/gin-gonic/gin" - "github.com/thirdweb-dev/indexer/api" - config "github.com/thirdweb-dev/indexer/configs" -) - -var ErrUnauthorized = fmt.Errorf("invalid username or password") - -func Authorization(c *gin.Context) { - if !isBasicAuthEnabled() { - c.Next() - return - } - - username, password, ok := c.Request.BasicAuth() - if !ok || !validateCredentials(username, password) { - log.Error(ErrUnauthorized.Error()) - api.UnauthorizedErrorHandler(c, ErrUnauthorized) - c.Abort() - return - } - c.Next() -} - -func isBasicAuthEnabled() bool { - return config.Cfg.API.BasicAuth.Username != "" && config.Cfg.API.BasicAuth.Password != "" -} - -func validateCredentials(username, password string) bool { - return username == config.Cfg.API.BasicAuth.Username && password == config.Cfg.API.BasicAuth.Password -} diff --git a/internal/middleware/cors.go b/internal/middleware/cors.go deleted file mode 100644 index 4235d65e..00000000 --- a/internal/middleware/cors.go +++ /dev/null @@ -1,18 +0,0 @@ -package middleware - -import ( - "github.com/gin-gonic/gin" -) - -func Cors(c *gin.Context) { - c.Writer.Header().Set("Access-Control-Allow-Origin", "*") - c.Writer.Header().Set("Access-Control-Allow-Methods", "GET, POST, PUT, DELETE, OPTIONS") - c.Writer.Header().Set("Access-Control-Allow-Headers", "Origin, Content-Type, Content-Length, Accept-Encoding, X-CSRF-Token, Authorization, accept, origin, Cache-Control, X-Requested-With") - c.Writer.Header().Set("Access-Control-Allow-Credentials", "true") - - if c.Request.Method == "OPTIONS" { - c.AbortWithStatus(200) - return - } - c.Next() -} diff --git a/internal/middleware/logger.go b/internal/middleware/logger.go deleted file mode 100644 index ad7e0cb7..00000000 --- a/internal/middleware/logger.go +++ /dev/null @@ -1,50 +0,0 @@ -package middleware - -import ( - "time" - - "github.com/gin-gonic/gin" - "github.com/rs/zerolog/log" -) - -// Logger returns a gin.HandlerFunc (middleware) that logs requests using zerolog. -func Logger() gin.HandlerFunc { - return func(c *gin.Context) { - // Start timer - start := time.Now() - path := c.Request.URL.Path - raw := c.Request.URL.RawQuery - - // Process request - c.Next() - - // Stop timer - end := time.Now() - latency := end.Sub(start) - - // Get status code - statusCode := c.Writer.Status() - - // Get client IP - clientIP := c.ClientIP() - - // Get method - method := c.Request.Method - - // Get error message if any - var errorMessage string - if len(c.Errors) > 0 { - errorMessage = c.Errors.String() - } - - log.Debug(). - Str("path", path). - Str("raw", raw). - Int("status", statusCode). - Str("method", method). - Str("ip", clientIP). - Dur("latency", latency). - Str("error", errorMessage). - Msg("incoming request") - } -} diff --git a/internal/orchestrator/chain_tracker.go b/internal/orchestrator/chain_tracker.go deleted file mode 100644 index 29a8fc7f..00000000 --- a/internal/orchestrator/chain_tracker.go +++ /dev/null @@ -1,53 +0,0 @@ -package orchestrator - -import ( - "context" - "time" - - "github.com/rs/zerolog/log" - "github.com/thirdweb-dev/indexer/internal/metrics" - "github.com/thirdweb-dev/indexer/internal/rpc" -) - -const DEFAULT_CHAIN_TRACKER_POLL_INTERVAL = 60 * 1000 // 1 minutes - -type ChainTracker struct { - rpc rpc.IRPCClient - triggerIntervalMs int -} - -func NewChainTracker(rpc rpc.IRPCClient) *ChainTracker { - return &ChainTracker{ - rpc: rpc, - triggerIntervalMs: DEFAULT_CHAIN_TRACKER_POLL_INTERVAL, - } -} - -func (ct *ChainTracker) Start(ctx context.Context) { - interval := time.Duration(ct.triggerIntervalMs) * time.Millisecond - ticker := time.NewTicker(interval) - defer ticker.Stop() - - log.Debug().Msgf("Chain tracker running") - ct.trackLatestBlockNumber(ctx) - - for { - select { - case <-ctx.Done(): - log.Info().Msg("Chain tracker shutting down") - return - case <-ticker.C: - ct.trackLatestBlockNumber(ctx) - } - } -} - -func (ct *ChainTracker) trackLatestBlockNumber(ctx context.Context) { - latestBlockNumber, err := ct.rpc.GetLatestBlockNumber(ctx) - if err != nil { - log.Error().Err(err).Msg("Error getting latest block number") - return - } - latestBlockNumberFloat, _ := latestBlockNumber.Float64() - metrics.ChainHead.Set(latestBlockNumberFloat) -} diff --git a/internal/orchestrator/committer.go b/internal/orchestrator/committer.go deleted file mode 100644 index 968a021a..00000000 --- a/internal/orchestrator/committer.go +++ /dev/null @@ -1,531 +0,0 @@ -package orchestrator - -import ( - "context" - "fmt" - "math/big" - "sort" - "sync" - "sync/atomic" - "time" - - "github.com/rs/zerolog/log" - config "github.com/thirdweb-dev/indexer/configs" - "github.com/thirdweb-dev/indexer/internal/common" - "github.com/thirdweb-dev/indexer/internal/metrics" - "github.com/thirdweb-dev/indexer/internal/publisher" - "github.com/thirdweb-dev/indexer/internal/rpc" - "github.com/thirdweb-dev/indexer/internal/storage" - "github.com/thirdweb-dev/indexer/internal/worker" -) - -const DEFAULT_BLOCKS_PER_COMMIT = 1000 - -type Committer struct { - blocksPerCommit int - storage storage.IStorage - commitFromBlock *big.Int - commitToBlock *big.Int - rpc rpc.IRPCClient - lastCommittedBlock atomic.Uint64 - lastPublishedBlock atomic.Uint64 - publisher *publisher.Publisher - poller *Poller - validator *Validator -} - -type CommitterOption func(*Committer) - -func NewCommitter(rpc rpc.IRPCClient, storage storage.IStorage, poller *Poller, opts ...CommitterOption) *Committer { - blocksPerCommit := config.Cfg.Committer.BlocksPerCommit - if blocksPerCommit == 0 { - blocksPerCommit = DEFAULT_BLOCKS_PER_COMMIT - } - - commitToBlock := config.Cfg.Committer.ToBlock - if commitToBlock == 0 { - commitToBlock = -1 - } - - commitFromBlock := big.NewInt(int64(config.Cfg.Committer.FromBlock)) - committer := &Committer{ - blocksPerCommit: blocksPerCommit, - storage: storage, - commitFromBlock: commitFromBlock, - commitToBlock: big.NewInt(int64(commitToBlock)), - rpc: rpc, - publisher: publisher.GetInstance(), - poller: poller, - validator: NewValidator(rpc, storage, worker.NewWorker(rpc)), // validator uses worker without sources - } - cfb := commitFromBlock.Uint64() - committer.lastCommittedBlock.Store(cfb) - committer.lastPublishedBlock.Store(cfb) - - for _, opt := range opts { - opt(committer) - } - - return committer -} - -func (c *Committer) Start(ctx context.Context) { - log.Debug().Msgf("Committer running") - chainID := c.rpc.GetChainID() - - latestCommittedBlockNumber, err := c.storage.MainStorage.GetMaxBlockNumber(chainID) - if err != nil { - // It's okay to fail silently here; this value is only used for staging cleanup and - // the worker loop will eventually correct the state and delete as needed. - log.Error().Msgf("Error getting latest committed block number: %v", err) - } else if latestCommittedBlockNumber != nil && latestCommittedBlockNumber.Sign() > 0 { - c.lastCommittedBlock.Store(latestCommittedBlockNumber.Uint64()) - } - - // Initialize publisher position - always use max(lastPublished, lastCommitted) to prevent double publishing - lastPublished, err := c.storage.OrchestratorStorage.GetLastPublishedBlockNumber(chainID) - if err != nil { - // It's okay to fail silently here; it's only used for staging cleanup and will be - // corrected by the worker loop. - log.Error().Err(err).Msg("failed to get last published block number") - } else if lastPublished != nil && lastPublished.Sign() > 0 { - // Always ensure publisher starts from at least the committed value - if latestCommittedBlockNumber != nil && latestCommittedBlockNumber.Sign() > 0 { - if lastPublished.Cmp(latestCommittedBlockNumber) < 0 { - gap := new(big.Int).Sub(latestCommittedBlockNumber, lastPublished) - log.Warn(). - Str("last_published", lastPublished.String()). - Str("latest_committed", latestCommittedBlockNumber.String()). - Str("gap", gap.String()). - Msg("Publisher is behind committed position, seeking forward to committed value") - - c.lastPublishedBlock.Store(latestCommittedBlockNumber.Uint64()) - if err := c.storage.OrchestratorStorage.SetLastPublishedBlockNumber(chainID, latestCommittedBlockNumber); err != nil { - log.Error().Err(err).Msg("Failed to update last published block number after seeking forward") - // Fall back to the stored value on error - c.lastPublishedBlock.Store(lastPublished.Uint64()) - } - } else { - c.lastPublishedBlock.Store(lastPublished.Uint64()) - } - } else { - c.lastPublishedBlock.Store(lastPublished.Uint64()) - } - } else { - c.lastPublishedBlock.Store(c.lastCommittedBlock.Load()) - } - - // Determine the correct publish position - always take the maximum to avoid going backwards - var targetPublishBlock *big.Int - - if lastPublished == nil || lastPublished.Sign() == 0 { - // No previous publish position - if latestCommittedBlockNumber != nil && latestCommittedBlockNumber.Sign() > 0 { - // Start from committed position - targetPublishBlock = latestCommittedBlockNumber - } else if c.commitFromBlock.Sign() > 0 { - // Start from configured position minus 1 (since we publish from next block) - targetPublishBlock = new(big.Int).Sub(c.commitFromBlock, big.NewInt(1)) - } else { - // Start from 0 - targetPublishBlock = big.NewInt(0) - } - - log.Info(). - Str("target_publish_block", targetPublishBlock.String()). - Msg("No previous publish position, initializing publisher cursor") - } else { - // We have a previous position - targetPublishBlock = lastPublished - } - - // Only update storage if we're changing the position - if lastPublished == nil || targetPublishBlock.Cmp(lastPublished) != 0 { - if err := c.storage.OrchestratorStorage.SetLastPublishedBlockNumber(chainID, targetPublishBlock); err != nil { - log.Error().Err(err).Msg("Failed to update published block number in storage") - // If we can't update storage, use what was there originally to avoid issues - if lastPublished != nil { - targetPublishBlock = lastPublished - } - } - } - - // Store in memory for quick acess - c.lastPublishedBlock.Store(targetPublishBlock.Uint64()) - - log.Info(). - Str("publish_from", targetPublishBlock.String()). - Str("committed_at", func() string { - if latestCommittedBlockNumber != nil { - return latestCommittedBlockNumber.String() - } - return "0" - }()). - Msg("Publisher initialized") - - if config.Cfg.Publisher.Mode == "parallel" { - var wg sync.WaitGroup - wg.Add(2) - - go func() { - defer wg.Done() - c.runPublishLoop(ctx) - }() - - go func() { - defer wg.Done() - c.runCommitLoop(ctx) - }() - - <-ctx.Done() - - wg.Wait() - } else { - c.runCommitLoop(ctx) - } - - log.Info().Msg("Committer shutting down") - c.publisher.Close() -} - -func (c *Committer) runCommitLoop(ctx context.Context) { - for { - select { - case <-ctx.Done(): - return - default: - if c.commitToBlock.Sign() > 0 && c.lastCommittedBlock.Load() >= c.commitToBlock.Uint64() { - // Completing the commit loop if we've committed more than commit to block - log.Info().Msgf("Committer reached configured toBlock %s, the last commit block is %d, stopping commits", c.commitToBlock.String(), c.lastCommittedBlock.Load()) - return - } - blockDataToCommit, err := c.getSequentialBlockDataToCommit(ctx) - if err != nil { - log.Error().Err(err).Msg("Error getting block data to commit") - continue - } - if len(blockDataToCommit) == 0 { - log.Debug().Msg("No block data to commit") - continue - } - if err := c.commit(ctx, blockDataToCommit); err != nil { - log.Error().Err(err).Msg("Error committing blocks") - } - go c.cleanupProcessedStagingBlocks(ctx) - } - } -} - -func (c *Committer) runPublishLoop(ctx context.Context) { - for { - select { - case <-ctx.Done(): - return - default: - if c.commitToBlock.Sign() > 0 && c.lastPublishedBlock.Load() >= c.commitToBlock.Uint64() { - // Completing the publish loop if we've published more than commit to block - log.Info().Msgf("Committer reached configured toBlock %s, the last publish block is %d, stopping publishes", c.commitToBlock.String(), c.lastPublishedBlock.Load()) - return - } - if err := c.publish(ctx); err != nil { - log.Error().Err(err).Msg("Error publishing blocks") - } - go c.cleanupProcessedStagingBlocks(ctx) - } - } -} - -func (c *Committer) cleanupProcessedStagingBlocks(ctx context.Context) { - committed := c.lastCommittedBlock.Load() - published := c.lastPublishedBlock.Load() - if published == 0 || committed == 0 { - return - } - limit := committed - if published < limit { - limit = published - } - if limit == 0 { - return - } - chainID := c.rpc.GetChainID() - blockNumber := new(big.Int).SetUint64(limit) - stagingDeleteStart := time.Now() - - // Check if context is cancelled before deleting - select { - case <-ctx.Done(): - return - default: - } - - if err := c.storage.StagingStorage.DeleteStagingDataOlderThan(chainID, blockNumber); err != nil { - log.Error().Err(err).Msg("Failed to delete staging data") - return - } - - log.Debug(). - Uint64("committed_block_number", committed). - Uint64("published_block_number", published). - Str("older_than_block_number", blockNumber.String()). - Str("metric", "staging_delete_duration").Msgf("StagingStorage.DeleteStagingDataOlderThan duration: %f", time.Since(stagingDeleteStart).Seconds()) - metrics.StagingDeleteDuration.Observe(time.Since(stagingDeleteStart).Seconds()) -} - -func (c *Committer) getBlockNumbersToCommit(ctx context.Context) ([]*big.Int, error) { - startTime := time.Now() - defer func() { - log.Debug().Str("metric", "get_block_numbers_to_commit_duration").Msgf("getBlockNumbersToCommit duration: %f", time.Since(startTime).Seconds()) - metrics.GetBlockNumbersToCommitDuration.Observe(time.Since(startTime).Seconds()) - }() - - latestCommittedBlockNumber, err := c.storage.MainStorage.GetMaxBlockNumber(c.rpc.GetChainID()) - if err != nil { - return nil, err - } - if latestCommittedBlockNumber == nil { - latestCommittedBlockNumber = new(big.Int).SetUint64(0) - } - log.Debug().Msgf("Committer found this max block number in main storage: %s", latestCommittedBlockNumber.String()) - - if latestCommittedBlockNumber.Sign() == 0 { - // If no blocks have been committed yet, start from the fromBlock specified in the config - latestCommittedBlockNumber = new(big.Int).Sub(c.commitFromBlock, big.NewInt(1)) - } else { - lastCommitted := new(big.Int).SetUint64(c.lastCommittedBlock.Load()) - if latestCommittedBlockNumber.Cmp(lastCommitted) < 0 { - log.Warn().Msgf("Max block in storage (%s) is less than last committed block in memory (%s).", latestCommittedBlockNumber.String(), lastCommitted.String()) - return []*big.Int{}, nil - } - } - - startBlock := new(big.Int).Add(latestCommittedBlockNumber, big.NewInt(1)) - endBlock, err := c.getBlockToCommitUntil(ctx, latestCommittedBlockNumber) - if err != nil { - return nil, fmt.Errorf("error getting block to commit until: %v", err) - } - - blockCount := new(big.Int).Sub(endBlock, startBlock).Int64() + 1 - if blockCount < 0 { - return []*big.Int{}, fmt.Errorf("more blocks have been committed than the RPC has available - possible chain reset") - } - if blockCount == 0 { - return []*big.Int{}, nil - } - blockNumbers := make([]*big.Int, blockCount) - for i := int64(0); i < blockCount; i++ { - blockNumber := new(big.Int).Add(startBlock, big.NewInt(i)) - blockNumbers[i] = blockNumber - } - return blockNumbers, nil -} - -func (c *Committer) getBlockNumbersToPublish(ctx context.Context) ([]*big.Int, error) { - // Get the last published block from storage (which was already corrected in Start) - latestPublishedBlockNumber, err := c.storage.OrchestratorStorage.GetLastPublishedBlockNumber(c.rpc.GetChainID()) - if err != nil { - return nil, fmt.Errorf("failed to get last published block number: %v", err) - } - - // This should never happen after Start() has run, but handle it defensively - if latestPublishedBlockNumber == nil || latestPublishedBlockNumber.Sign() == 0 { - // Fall back to in-memory value which was set during Start - latestPublishedBlockNumber = new(big.Int).SetUint64(c.lastPublishedBlock.Load()) - log.Warn(). - Str("fallback_value", latestPublishedBlockNumber.String()). - Msg("Storage returned nil/0 for last published block, using in-memory value") - } - - log.Debug(). - Str("last_published", latestPublishedBlockNumber.String()). - Msg("Determining blocks to publish") - - startBlock := new(big.Int).Add(latestPublishedBlockNumber, big.NewInt(1)) - endBlock, err := c.getBlockToCommitUntil(ctx, latestPublishedBlockNumber) - if err != nil { - return nil, fmt.Errorf("error getting block to commit until: %v", err) - } - - blockCount := new(big.Int).Sub(endBlock, startBlock).Int64() + 1 - if blockCount < 0 { - return []*big.Int{}, fmt.Errorf("more blocks have been committed than the RPC has available - possible chain reset") - } - if blockCount == 0 { - return []*big.Int{}, nil - } - blockNumbers := make([]*big.Int, blockCount) - for i := int64(0); i < blockCount; i++ { - blockNumber := new(big.Int).Add(startBlock, big.NewInt(i)) - blockNumbers[i] = blockNumber - } - return blockNumbers, nil -} - -func (c *Committer) getBlockToCommitUntil(ctx context.Context, latestCommittedBlockNumber *big.Int) (*big.Int, error) { - untilBlock := new(big.Int).Add(latestCommittedBlockNumber, big.NewInt(int64(c.blocksPerCommit))) - - // If a commit until block is set, then set a limit on the commit until block - if c.commitToBlock.Sign() > 0 && untilBlock.Cmp(c.commitToBlock) > 0 { - return new(big.Int).Set(c.commitToBlock), nil - } - - // get latest block from RPC and if that's less than until block, return that - latestBlock, err := c.rpc.GetLatestBlockNumber(ctx) - if err != nil { - return nil, fmt.Errorf("error getting latest block from RPC: %v", err) - } - - if latestBlock.Cmp(untilBlock) < 0 { - log.Debug().Msgf("Committing until latest block: %s", latestBlock.String()) - return latestBlock, nil - } - - return untilBlock, nil -} - -func (c *Committer) fetchBlockData(ctx context.Context, blockNumbers []*big.Int) ([]common.BlockData, error) { - blocksData := c.poller.Request(ctx, blockNumbers) - if len(blocksData) == 0 { - log.Warn().Msgf("Committer didn't find the following range: %v - %v. %v", blockNumbers[0].Int64(), blockNumbers[len(blockNumbers)-1].Int64(), c.poller.GetPollerStatus()) - time.Sleep(500 * time.Millisecond) // TODO: wait for block time - return nil, nil - } - return blocksData, nil -} - -func (c *Committer) getSequentialBlockData(ctx context.Context, blockNumbers []*big.Int) ([]common.BlockData, error) { - blocksData, err := c.fetchBlockData(ctx, blockNumbers) - if err != nil { - return nil, err - } - - if len(blocksData) == 0 { - return nil, nil - } - - blocksData, err = c.validator.EnsureValidBlocks(ctx, blocksData) - if err != nil { - return nil, err - } - - if len(blocksData) == 0 { - return nil, nil - } - - // Sort blocks by block number - sort.Slice(blocksData, func(i, j int) bool { - return blocksData[i].Block.Number.Cmp(blocksData[j].Block.Number) < 0 - }) - - hasGap := blocksData[0].Block.Number.Cmp(blockNumbers[0]) != 0 - if hasGap { - return nil, fmt.Errorf("first block number (%s) in commit batch does not match expected (%s)", blocksData[0].Block.Number.String(), blockNumbers[0].String()) - } - - var sequentialBlockData []common.BlockData - sequentialBlockData = append(sequentialBlockData, blocksData[0]) - expectedBlockNumber := new(big.Int).Add(blocksData[0].Block.Number, big.NewInt(1)) - - for i := 1; i < len(blocksData); i++ { - if blocksData[i].Block.Number.Cmp(blocksData[i-1].Block.Number) == 0 { - // Duplicate block, skip -- might happen if block has been polled multiple times - continue - } - if blocksData[i].Block.Number.Cmp(expectedBlockNumber) != 0 { - // Note: Gap detected, stop here - log.Warn().Msgf("Gap detected at block %s, committing until %s", expectedBlockNumber.String(), blocksData[i-1].Block.Number.String()) - // increment the gap counter in prometheus - metrics.GapCounter.Inc() - // record the first missed block number in prometheus - metrics.MissedBlockNumbers.Set(float64(blocksData[0].Block.Number.Int64())) - break - } - sequentialBlockData = append(sequentialBlockData, blocksData[i]) - expectedBlockNumber.Add(expectedBlockNumber, big.NewInt(1)) - } - - return sequentialBlockData, nil -} - -func (c *Committer) getSequentialBlockDataToCommit(ctx context.Context) ([]common.BlockData, error) { - blocksToCommit, err := c.getBlockNumbersToCommit(ctx) - if err != nil { - return nil, fmt.Errorf("error determining blocks to commit: %v", err) - } - if len(blocksToCommit) == 0 { - return nil, nil - } - return c.getSequentialBlockData(ctx, blocksToCommit) -} - -func (c *Committer) getSequentialBlockDataToPublish(ctx context.Context) ([]common.BlockData, error) { - blocksToPublish, err := c.getBlockNumbersToPublish(ctx) - if err != nil { - return nil, fmt.Errorf("error determining blocks to publish: %v", err) - } - if len(blocksToPublish) == 0 { - return nil, nil - } - return c.getSequentialBlockData(ctx, blocksToPublish) -} - -func (c *Committer) publish(ctx context.Context) error { - blockData, err := c.getSequentialBlockDataToPublish(ctx) - if err != nil { - return err - } - if len(blockData) == 0 { - return nil - } - - if err := c.publisher.PublishBlockData(blockData); err != nil { - return err - } - - chainID := c.rpc.GetChainID() - highest := blockData[len(blockData)-1].Block.Number - if err := c.storage.OrchestratorStorage.SetLastPublishedBlockNumber(chainID, highest); err != nil { - return err - } - c.lastPublishedBlock.Store(highest.Uint64()) - return nil -} - -func (c *Committer) commit(ctx context.Context, blockData []common.BlockData) error { - blockNumbers := make([]*big.Int, len(blockData)) - highestBlock := blockData[0].Block - for i, block := range blockData { - blockNumbers[i] = block.Block.Number - if block.Block.Number.Cmp(highestBlock.Number) > 0 { - highestBlock = block.Block - } - } - log.Debug().Msgf("Committing %d blocks from %s to %s", len(blockNumbers), blockNumbers[0].String(), blockNumbers[len(blockNumbers)-1].String()) - - mainStorageStart := time.Now() - if err := c.storage.MainStorage.InsertBlockData(blockData); err != nil { - log.Error().Err(err).Msgf("Failed to commit blocks: %v", blockNumbers) - return fmt.Errorf("error saving data to main storage: %v", err) - } - log.Debug().Str("metric", "main_storage_insert_duration").Msgf("MainStorage.InsertBlockData duration: %f", time.Since(mainStorageStart).Seconds()) - metrics.MainStorageInsertDuration.Observe(time.Since(mainStorageStart).Seconds()) - - if config.Cfg.Publisher.Mode == "default" { - highest := highestBlock.Number.Uint64() - go func() { - if err := c.publisher.PublishBlockData(blockData); err != nil { - log.Error().Err(err).Msg("Failed to publish block data to kafka") - return - } - c.lastPublishedBlock.Store(highest) - }() - } - - c.lastCommittedBlock.Store(highestBlock.Number.Uint64()) - - // Update metrics for successful commits - metrics.SuccessfulCommits.Add(float64(len(blockData))) - metrics.LastCommittedBlock.Set(float64(highestBlock.Number.Int64())) - metrics.CommitterLagInSeconds.Set(float64(time.Since(highestBlock.Timestamp).Seconds())) - return nil -} diff --git a/internal/orchestrator/committer_test.go b/internal/orchestrator/committer_test.go deleted file mode 100644 index a8b18adb..00000000 --- a/internal/orchestrator/committer_test.go +++ /dev/null @@ -1 +0,0 @@ -package orchestrator diff --git a/internal/orchestrator/orchestrator.go b/internal/orchestrator/orchestrator.go deleted file mode 100644 index 13ec47e6..00000000 --- a/internal/orchestrator/orchestrator.go +++ /dev/null @@ -1,132 +0,0 @@ -package orchestrator - -import ( - "context" - "os" - "os/signal" - "sync" - "syscall" - - "github.com/rs/zerolog/log" - config "github.com/thirdweb-dev/indexer/configs" - "github.com/thirdweb-dev/indexer/internal/rpc" - "github.com/thirdweb-dev/indexer/internal/source" - "github.com/thirdweb-dev/indexer/internal/storage" - "github.com/thirdweb-dev/indexer/internal/worker" -) - -type Orchestrator struct { - rpc rpc.IRPCClient - storage storage.IStorage - worker *worker.Worker - poller *Poller - reorgHandlerEnabled bool - cancel context.CancelFunc - wg sync.WaitGroup -} - -func NewOrchestrator(rpc rpc.IRPCClient) (*Orchestrator, error) { - storage, err := storage.NewStorageConnector(&config.Cfg.Storage) - if err != nil { - return nil, err - } - - return &Orchestrator{ - rpc: rpc, - storage: storage, - reorgHandlerEnabled: config.Cfg.ReorgHandler.Enabled, - }, nil -} - -func (o *Orchestrator) Start() { - ctx, cancel := context.WithCancel(context.Background()) - o.cancel = cancel - - sigChan := make(chan os.Signal, 1) - signal.Notify(sigChan, syscall.SIGTERM, syscall.SIGINT) - - go func() { - sig := <-sigChan - log.Info().Msgf("Received signal %v, initiating graceful shutdown", sig) - o.cancel() - }() - - o.initializeWorkerAndPoller() - - o.wg.Add(1) - go func() { - defer o.wg.Done() - - o.poller.Start(ctx) - - // If the poller is terminated, cancel the orchestrator - log.Info().Msg("Poller completed") - o.cancel() - }() - - o.wg.Add(1) - go func() { - defer o.wg.Done() - - committer := NewCommitter(o.rpc, o.storage, o.poller) - committer.Start(ctx) - - // If the committer is terminated, cancel the orchestrator - log.Info().Msg("Committer completed") - o.cancel() - }() - - if o.reorgHandlerEnabled { - o.wg.Add(1) - go func() { - defer o.wg.Done() - reorgHandler := NewReorgHandler(o.rpc, o.storage) - reorgHandler.Start(ctx) - - log.Info().Msg("Reorg handler completed") - }() - } - - // The chain tracker is always running - o.wg.Add(1) - go func() { - defer o.wg.Done() - chainTracker := NewChainTracker(o.rpc) - chainTracker.Start(ctx) - - log.Info().Msg("Chain tracker completed") - }() - - // Waiting for all goroutines to complete - o.wg.Wait() - - if err := o.storage.Close(); err != nil { - log.Error().Err(err).Msg("Error closing storage connections") - } - - log.Info().Msg("Orchestrator shutdown complete") -} - -func (o *Orchestrator) initializeWorkerAndPoller() { - var s3, staging source.ISource - var err error - - chainId := o.rpc.GetChainID() - if config.Cfg.Poller.S3.Bucket != "" && config.Cfg.Poller.S3.Region != "" { - s3, err = source.NewS3Source(chainId, config.Cfg.Poller.S3) - if err != nil { - log.Fatal().Err(err).Msg("Error creating S3 source for worker") - return - } - } - - if o.storage.StagingStorage != nil { - if staging, err = source.NewStagingSource(chainId, o.storage.StagingStorage); err != nil { - log.Fatal().Err(err).Msg("Error creating Staging source for worker") - return - } - } - - o.worker = worker.NewWorkerWithSources(o.rpc, s3, staging) - o.poller = NewPoller(o.rpc, o.storage, WithPollerWorker(o.worker)) -} diff --git a/internal/orchestrator/poller.go b/internal/orchestrator/poller.go deleted file mode 100644 index 93f0fc3a..00000000 --- a/internal/orchestrator/poller.go +++ /dev/null @@ -1,501 +0,0 @@ -package orchestrator - -import ( - "context" - "fmt" - "math/big" - "sync" - "time" - - "github.com/rs/zerolog/log" - config "github.com/thirdweb-dev/indexer/configs" - "github.com/thirdweb-dev/indexer/internal/common" - "github.com/thirdweb-dev/indexer/internal/metrics" - "github.com/thirdweb-dev/indexer/internal/rpc" - "github.com/thirdweb-dev/indexer/internal/storage" - "github.com/thirdweb-dev/indexer/internal/worker" -) - -const ( - DEFAULT_PARALLEL_POLLERS = 5 -) - -type Poller struct { - chainId *big.Int - rpc rpc.IRPCClient - worker *worker.Worker - storage storage.IStorage - lastPolledBlock *big.Int - lastPolledBlockMutex sync.RWMutex - parallelPollers int - lookaheadBatches int - processingRanges map[string][]chan struct{} // Track ranges with notification channels - processingRangesMutex sync.RWMutex - queuedRanges map[string]bool // Track ranges queued but not yet processing - queuedRangesMutex sync.RWMutex - tasks chan []*big.Int - wg sync.WaitGroup - ctx context.Context - cancel context.CancelFunc -} - -type BlockNumberWithError struct { - BlockNumber *big.Int - Error error -} - -type PollerOption func(*Poller) - -func WithPollerWorker(cfg *worker.Worker) PollerOption { - return func(p *Poller) { - if cfg == nil { - return - } - - p.worker = cfg - } -} - -func NewPoller(rpc rpc.IRPCClient, storage storage.IStorage, opts ...PollerOption) *Poller { - parallelPollers := config.Cfg.Poller.ParallelPollers - if parallelPollers == 0 { - parallelPollers = DEFAULT_PARALLEL_POLLERS - } - - // Set the lookahead -> number of pollers + 2 - // effectively setting the minimum look ahead = 3 batches - lookaheadBatches := parallelPollers + 2 - - poller := &Poller{ - chainId: rpc.GetChainID(), - rpc: rpc, - storage: storage, - parallelPollers: parallelPollers, - lookaheadBatches: lookaheadBatches, - processingRanges: make(map[string][]chan struct{}), - queuedRanges: make(map[string]bool), - tasks: make(chan []*big.Int, parallelPollers+lookaheadBatches), - } - - for _, opt := range opts { - opt(poller) - } - - if poller.worker == nil { - poller.worker = worker.NewWorker(poller.rpc) - } - - poller.lastPolledBlock = big.NewInt(0) - - return poller -} - -var ErrNoNewBlocks = fmt.Errorf("no new blocks to poll") -var ErrBlocksProcessing = fmt.Errorf("blocks are being processed") - -func (p *Poller) Start(ctx context.Context) { - log.Debug().Msgf("Poller running with %d workers", p.parallelPollers) - - p.ctx, p.cancel = context.WithCancel(ctx) - - for i := 0; i < p.parallelPollers; i++ { - p.wg.Add(1) - go p.workerLoop() - } - - <-ctx.Done() - p.shutdown() -} - -// Poll forward to cache the blocks that may be requested -func (p *Poller) poll(ctx context.Context, blockNumbers []*big.Int) ([]common.BlockData, error) { - if len(blockNumbers) == 0 { - return nil, fmt.Errorf("no block numbers provided") - } - - // Mark this range as being processed - startBlock, endBlock := blockNumbers[0], blockNumbers[len(blockNumbers)-1] - rangeKey := p.getRangeKey(startBlock, endBlock) - - // Transition from queued to processing - p.unmarkRangeAsQueued(rangeKey) - - // Check if already processing - if p.isRangeProcessing(rangeKey) { - return nil, ErrBlocksProcessing - } - - p.markRangeAsProcessing(rangeKey) - defer p.unmarkRangeAsProcessing(rangeKey) - - blockData, highestBlockNumber := p.pollBlockData(ctx, blockNumbers) - if len(blockData) == 0 || highestBlockNumber == nil { - return nil, fmt.Errorf("no valid block data polled") - } - - if err := p.stageResults(blockData); err != nil { - log.Error().Err(err).Msg("error staging poll results") - return nil, err - } - - p.lastPolledBlockMutex.Lock() - if highestBlockNumber.Cmp(p.lastPolledBlock) > 0 { - p.lastPolledBlock = new(big.Int).Set(highestBlockNumber) - } - endBlockNumberFloat, _ := p.lastPolledBlock.Float64() - p.lastPolledBlockMutex.Unlock() - - metrics.PollerLastTriggeredBlock.Set(endBlockNumberFloat) - return blockData, nil -} - -func (p *Poller) Request(ctx context.Context, blockNumbers []*big.Int) []common.BlockData { - if len(blockNumbers) == 0 { - return nil - } - - startBlock, endBlock := blockNumbers[0], blockNumbers[len(blockNumbers)-1] - rangeKey := p.getRangeKey(startBlock, endBlock) - - p.lastPolledBlockMutex.RLock() - lastPolledBlock := new(big.Int).Set(p.lastPolledBlock) - p.lastPolledBlockMutex.RUnlock() - - // If requested blocks are already cached (polled), fetch from staging - if endBlock.Cmp(lastPolledBlock) <= 0 { - blockData, _ := p.pollBlockData(ctx, blockNumbers) - if len(blockData) > 0 { - go p.triggerLookahead(endBlock, int64(len(blockNumbers))) - } - return blockData - } - - // Check if this range is currently being processed - if p.isRangeProcessing(rangeKey) { - log.Debug().Msgf("Range %s is being processed, waiting for completion", rangeKey) - p.waitForRange(rangeKey) - // After waiting (or timeout), try to fetch from staging - blockData, _ := p.pollBlockData(ctx, blockNumbers) - if len(blockData) > 0 { - go p.triggerLookahead(endBlock, int64(len(blockNumbers))) - } - return blockData - } - - // Process and cache the requested range - blockData, err := p.poll(ctx, blockNumbers) - if err != nil { - if err == ErrBlocksProcessing { - // Another goroutine started processing this range, wait for it - log.Debug().Msgf("Range %s started processing by another goroutine, waiting", rangeKey) - p.waitForRange(rangeKey) - blockData, _ = p.pollBlockData(ctx, blockNumbers) - } else if err == ErrNoNewBlocks { - // This is expected, let it fails silently - return nil - } else { - log.Error().Err(err).Msgf("Error polling requested blocks: %s - %s", startBlock.String(), endBlock.String()) - return nil - } - } - - // Trigger lookahead if we have data - if len(blockData) > 0 { - go p.triggerLookahead(endBlock, int64(len(blockNumbers))) - } - return blockData -} - -func (p *Poller) pollBlockData(ctx context.Context, blockNumbers []*big.Int) ([]common.BlockData, *big.Int) { - if len(blockNumbers) == 0 { - return nil, nil - } - log.Debug().Msgf("Polling %d blocks starting from %s to %s", len(blockNumbers), blockNumbers[0], blockNumbers[len(blockNumbers)-1]) - - results := p.worker.Run(ctx, blockNumbers) - blockData := p.convertPollResultsToBlockData(results) - - var highestBlockNumber *big.Int - if len(blockData) > 0 { - highestBlockNumber = blockData[0].Block.Number - for _, block := range blockData { - if block.Block.Number.Cmp(highestBlockNumber) > 0 { - highestBlockNumber = new(big.Int).Set(block.Block.Number) - } - } - } - - return blockData, highestBlockNumber -} - -func (p *Poller) convertPollResultsToBlockData(results []rpc.GetFullBlockResult) []common.BlockData { - blockData := make([]common.BlockData, 0, len(results)) - for _, result := range results { - blockData = append(blockData, common.BlockData{ - Block: result.Data.Block, - Logs: result.Data.Logs, - Transactions: result.Data.Transactions, - Traces: result.Data.Traces, - }) - } - return blockData -} - -func (p *Poller) stageResults(blockData []common.BlockData) error { - if len(blockData) == 0 { - return nil - } - - startTime := time.Now() - - metrics.PolledBatchSize.Set(float64(len(blockData))) - - if err := p.storage.StagingStorage.InsertStagingData(blockData); err != nil { - log.Error().Err(err).Msgf("error inserting block data into staging") - return err - } - - log.Debug(). - Str("metric", "staging_insert_duration"). - Str("first_block", blockData[0].Block.Number.String()). - Str("last_block", blockData[len(blockData)-1].Block.Number.String()). - Msgf("InsertStagingData for %s - %s, duration: %f", - blockData[0].Block.Number.String(), blockData[len(blockData)-1].Block.Number.String(), - time.Since(startTime).Seconds()) - - metrics.StagingInsertDuration.Observe(time.Since(startTime).Seconds()) - return nil -} - -func (p *Poller) createBlockNumbersForRange(startBlock *big.Int, endBlock *big.Int) []*big.Int { - blockCount := new(big.Int).Sub(endBlock, startBlock).Int64() + 1 - blockNumbers := make([]*big.Int, blockCount) - for i := int64(0); i < blockCount; i++ { - blockNumbers[i] = new(big.Int).Add(startBlock, big.NewInt(i)) - } - return blockNumbers -} - -func (p *Poller) shutdown() { - p.cancel() - close(p.tasks) - p.wg.Wait() - log.Info().Msg("Poller shutting down") -} - -func (p *Poller) workerLoop() { - defer p.wg.Done() - for { - select { - case <-p.ctx.Done(): - return - case blockNumbers, ok := <-p.tasks: - if !ok { - return - } - p.processBatch(blockNumbers) - } - } -} - -func (p *Poller) processBatch(blockNumbers []*big.Int) { - if len(blockNumbers) == 0 { - return - } - - _, err := p.poll(p.ctx, blockNumbers) - if err != nil { - if err != ErrBlocksProcessing && err != ErrNoNewBlocks { - if len(blockNumbers) > 0 { - startBlock, endBlock := blockNumbers[0], blockNumbers[len(blockNumbers)-1] - log.Error().Err(err).Msgf("Failed to poll blocks %s - %s", startBlock.String(), endBlock.String()) - } - } - return - } -} - -func (p *Poller) triggerLookahead(currentEndBlock *big.Int, batchSize int64) { - // Use configurable lookahead batches - for i := 0; i < p.lookaheadBatches; i++ { - startBlock := new(big.Int).Add(currentEndBlock, big.NewInt(int64(i)*batchSize+1)) - endBlock := new(big.Int).Add(startBlock, big.NewInt(batchSize-1)) - - // Check if this range is already cached, queued, or being processed - rangeKey := p.getRangeKey(startBlock, endBlock) - if p.isRangeProcessing(rangeKey) || p.isRangeQueued(rangeKey) { - continue - } - - p.lastPolledBlockMutex.RLock() - lastPolled := new(big.Int).Set(p.lastPolledBlock) - p.lastPolledBlockMutex.RUnlock() - - if startBlock.Cmp(lastPolled) <= 0 { - continue // Already cached - } - - // Get latest block to ensure we don't exceed chain head - latestBlock, err := p.rpc.GetLatestBlockNumber(p.ctx) - if err != nil { - log.Error().Err(err).Msg("Failed to get latest block") - break - } - - if startBlock.Cmp(latestBlock) > 0 { - break // Would exceed chain head - } - - if endBlock.Cmp(latestBlock) > 0 { - endBlock = latestBlock - } - - blockNumbers := p.createBlockNumbersForRange(startBlock, endBlock) - - // Mark as queued before sending to channel - p.markRangeAsQueued(rangeKey) - - // Queue for processing - select { - case p.tasks <- blockNumbers: - log.Debug().Msgf("Queued lookahead batch %s - %s", startBlock.String(), endBlock.String()) - default: - // Queue is full, unmark and stop queueing - p.unmarkRangeAsQueued(rangeKey) - return - } - } -} - -func (p *Poller) getRangeKey(startBlock, endBlock *big.Int) string { - return fmt.Sprintf("%s-%s", startBlock.String(), endBlock.String()) -} - -// isRangeProcessing checks if a range is currently being processed -func (p *Poller) isRangeProcessing(rangeKey string) bool { - p.processingRangesMutex.RLock() - defer p.processingRangesMutex.RUnlock() - return len(p.processingRanges[rangeKey]) > 0 -} - -// isRangeQueued checks if a range is queued for processing -func (p *Poller) isRangeQueued(rangeKey string) bool { - p.queuedRangesMutex.RLock() - defer p.queuedRangesMutex.RUnlock() - return p.queuedRanges[rangeKey] -} - -// markRangeAsQueued marks a range as queued for processing -func (p *Poller) markRangeAsQueued(rangeKey string) { - p.queuedRangesMutex.Lock() - defer p.queuedRangesMutex.Unlock() - p.queuedRanges[rangeKey] = true -} - -// unmarkRangeAsQueued removes a range from the queued set -func (p *Poller) unmarkRangeAsQueued(rangeKey string) { - p.queuedRangesMutex.Lock() - defer p.queuedRangesMutex.Unlock() - delete(p.queuedRanges, rangeKey) -} - -func (p *Poller) markRangeAsProcessing(rangeKey string) chan struct{} { - p.processingRangesMutex.Lock() - defer p.processingRangesMutex.Unlock() - - // Create a notification channel for this range - notifyChan := make(chan struct{}) - - // Initialize the slice if it doesn't exist - if p.processingRanges[rangeKey] == nil { - p.processingRanges[rangeKey] = []chan struct{}{} - } - - // Store the notification channel - p.processingRanges[rangeKey] = append(p.processingRanges[rangeKey], notifyChan) - - return notifyChan -} - -func (p *Poller) unmarkRangeAsProcessing(rangeKey string) { - p.processingRangesMutex.Lock() - defer p.processingRangesMutex.Unlock() - - // Get all waiting channels for this range - waitingChans := p.processingRanges[rangeKey] - - // Notify all waiting goroutines - log.Debug().Msgf("Notifying %d waiters for Range %s processing completed", len(waitingChans), rangeKey) - for _, ch := range waitingChans { - close(ch) - } - - // Remove the range from processing - delete(p.processingRanges, rangeKey) -} - -// waitForRange waits for a range to finish processing with a timeout -func (p *Poller) waitForRange(rangeKey string) bool { - p.processingRangesMutex.Lock() - - // Check if range is being processed - waitingChans, isProcessing := p.processingRanges[rangeKey] - if !isProcessing || len(waitingChans) == 0 { - p.processingRangesMutex.Unlock() - return false // Not processing - } - - // Create a channel to wait on - waitChan := make(chan struct{}) - p.processingRanges[rangeKey] = append(p.processingRanges[rangeKey], waitChan) - p.processingRangesMutex.Unlock() - - // Wait for the range to complete, timeout, or context cancellation - select { - case <-waitChan: - log.Debug().Msgf("Got notification for range %s processing completed", rangeKey) - return true // Range completed - case <-p.ctx.Done(): - return false // Context cancelled - } -} - -// GetProcessingRanges returns a list of ranges currently being processed (for diagnostics) -func (p *Poller) GetProcessingRanges() []string { - p.processingRangesMutex.RLock() - defer p.processingRangesMutex.RUnlock() - - ranges := make([]string, 0, len(p.processingRanges)) - for rangeKey, waiters := range p.processingRanges { - ranges = append(ranges, fmt.Sprintf("%s (waiters: %d)", rangeKey, len(waiters))) - } - return ranges -} - -// GetQueuedRanges returns a list of ranges currently queued for processing (for diagnostics) -func (p *Poller) GetQueuedRanges() []string { - p.queuedRangesMutex.RLock() - defer p.queuedRangesMutex.RUnlock() - - ranges := make([]string, 0, len(p.queuedRanges)) - for rangeKey := range p.queuedRanges { - ranges = append(ranges, rangeKey) - } - return ranges -} - -// GetPollerStatus returns diagnostic information about the poller's current state -func (p *Poller) GetPollerStatus() map[string]interface{} { - p.lastPolledBlockMutex.RLock() - lastPolled := p.lastPolledBlock.String() - p.lastPolledBlockMutex.RUnlock() - - return map[string]interface{}{ - "last_polled_block": lastPolled, - "processing_ranges": p.GetProcessingRanges(), - "queued_ranges": p.GetQueuedRanges(), - "task_queue_size": len(p.tasks), - "task_queue_cap": cap(p.tasks), - "parallel_pollers": p.parallelPollers, - } -} diff --git a/internal/orchestrator/poller_test.go b/internal/orchestrator/poller_test.go deleted file mode 100644 index bf344a99..00000000 --- a/internal/orchestrator/poller_test.go +++ /dev/null @@ -1,13 +0,0 @@ -package orchestrator - -import ( - "testing" -) - -// All tests removed - need to be updated for new implementation -// The tests were failing due to missing mock expectations after refactoring - -func TestPollerPlaceholder(t *testing.T) { - // Placeholder test to keep the test file valid - t.Skip("Poller tests need to be rewritten for new implementation") -} diff --git a/internal/orchestrator/reorg_handler.go b/internal/orchestrator/reorg_handler.go deleted file mode 100644 index c72ee665..00000000 --- a/internal/orchestrator/reorg_handler.go +++ /dev/null @@ -1,294 +0,0 @@ -package orchestrator - -import ( - "context" - "fmt" - "math/big" - "sort" - "sync" - "time" - - "github.com/rs/zerolog/log" - config "github.com/thirdweb-dev/indexer/configs" - "github.com/thirdweb-dev/indexer/internal/common" - "github.com/thirdweb-dev/indexer/internal/metrics" - "github.com/thirdweb-dev/indexer/internal/publisher" - "github.com/thirdweb-dev/indexer/internal/rpc" - "github.com/thirdweb-dev/indexer/internal/storage" - "github.com/thirdweb-dev/indexer/internal/worker" -) - -type ReorgHandler struct { - rpc rpc.IRPCClient - storage storage.IStorage - triggerInterval int - blocksPerScan int - lastCheckedBlock *big.Int - worker *worker.Worker - publisher *publisher.Publisher -} - -const DEFAULT_REORG_HANDLER_INTERVAL = 1000 -const DEFAULT_REORG_HANDLER_BLOCKS_PER_SCAN = 100 - -func NewReorgHandler(rpc rpc.IRPCClient, storage storage.IStorage) *ReorgHandler { - triggerInterval := config.Cfg.ReorgHandler.Interval - if triggerInterval == 0 { - triggerInterval = DEFAULT_REORG_HANDLER_INTERVAL - } - blocksPerScan := config.Cfg.ReorgHandler.BlocksPerScan - if blocksPerScan == 0 { - blocksPerScan = DEFAULT_REORG_HANDLER_BLOCKS_PER_SCAN - } - return &ReorgHandler{ - rpc: rpc, - storage: storage, - worker: worker.NewWorker(rpc), - triggerInterval: triggerInterval, - blocksPerScan: blocksPerScan, - lastCheckedBlock: getInitialCheckedBlockNumber(storage, rpc.GetChainID()), - publisher: publisher.GetInstance(), - } -} - -func getInitialCheckedBlockNumber(storage storage.IStorage, chainId *big.Int) *big.Int { - configuredBn := big.NewInt(int64(config.Cfg.ReorgHandler.FromBlock)) - storedBn, err := storage.OrchestratorStorage.GetLastReorgCheckedBlockNumber(chainId) - if err != nil { - log.Debug().Err(err).Msgf("Error getting last reorg checked block number, using configured: %s", configuredBn) - return configuredBn - } - if storedBn.Sign() <= 0 { - log.Debug().Msgf("Last reorg checked block number not found, using configured: %s", configuredBn) - return configuredBn - } - log.Debug().Msgf("Last reorg checked block number found, using: %s", storedBn) - return storedBn -} - -func (rh *ReorgHandler) Start(ctx context.Context) { - interval := time.Duration(rh.triggerInterval) * time.Millisecond - ticker := time.NewTicker(interval) - defer ticker.Stop() - - log.Debug().Msgf("Reorg handler running") - for { - select { - case <-ctx.Done(): - log.Info().Msg("Reorg handler shutting down") - rh.publisher.Close() - return - case <-ticker.C: - mostRecentBlockChecked, err := rh.RunFromBlock(ctx, rh.lastCheckedBlock) - if err != nil { - log.Error().Err(err).Msgf("Error during reorg handling: %s", err.Error()) - continue - } - if mostRecentBlockChecked == nil { - continue - } - - rh.lastCheckedBlock = mostRecentBlockChecked - rh.storage.OrchestratorStorage.SetLastReorgCheckedBlockNumber(rh.rpc.GetChainID(), mostRecentBlockChecked) - metrics.ReorgHandlerLastCheckedBlock.Set(float64(mostRecentBlockChecked.Int64())) - } - } -} - -func (rh *ReorgHandler) RunFromBlock(ctx context.Context, latestCheckedBlock *big.Int) (lastCheckedBlock *big.Int, err error) { - fromBlock, toBlock, err := rh.getReorgCheckRange(latestCheckedBlock) - if err != nil { - return nil, err - } - if toBlock.Cmp(latestCheckedBlock) == 0 { - log.Debug().Msgf("Most recent (%s) and last checked (%s) block numbers are equal, skipping reorg check", toBlock.String(), latestCheckedBlock.String()) - return nil, nil - } - log.Debug().Msgf("Checking for reorgs from block %s to %s", fromBlock.String(), toBlock.String()) - blockHeaders, err := rh.storage.MainStorage.GetBlockHeadersDescending(rh.rpc.GetChainID(), fromBlock, toBlock) - if err != nil { - return nil, fmt.Errorf("error getting recent block headers: %w", err) - } - if len(blockHeaders) == 0 { - log.Warn().Msg("No block headers found during reorg handling") - return nil, nil - } - mostRecentBlockHeader := blockHeaders[0] - - firstMismatchIndex, err := findIndexOfFirstHashMismatch(blockHeaders) - if err != nil { - return nil, fmt.Errorf("error detecting reorgs: %w", err) - } - if firstMismatchIndex == -1 { - log.Debug().Msgf("No reorg detected, most recent block number checked: %s", mostRecentBlockHeader.Number.String()) - return mostRecentBlockHeader.Number, nil - } - - metrics.ReorgCounter.Inc() - reorgedBlockNumbers := make([]*big.Int, 0) - err = rh.findReorgedBlockNumbers(ctx, blockHeaders[firstMismatchIndex:], &reorgedBlockNumbers) - if err != nil { - return nil, fmt.Errorf("error finding reorged block numbers: %w", err) - } - - if len(reorgedBlockNumbers) == 0 { - log.Debug().Msgf("Reorg was detected, but no reorged block numbers found, most recent block number checked: %s", mostRecentBlockHeader.Number.String()) - return mostRecentBlockHeader.Number, nil - } - - err = rh.handleReorg(ctx, reorgedBlockNumbers) - if err != nil { - return nil, fmt.Errorf("error while handling reorg: %w", err) - } - return mostRecentBlockHeader.Number, nil -} - -func (rh *ReorgHandler) getReorgCheckRange(latestCheckedBlock *big.Int) (*big.Int, *big.Int, error) { - latestCommittedBlock, err := rh.storage.MainStorage.GetMaxBlockNumber(rh.rpc.GetChainID()) - if err != nil { - return nil, nil, fmt.Errorf("error getting latest committed block: %w", err) - } - if latestCheckedBlock.Cmp(latestCommittedBlock) > 0 { - log.Debug().Msgf("Committing has not reached the configured reorg check start block: %s (reorg start) > %s (last committed)", latestCheckedBlock.String(), latestCommittedBlock.String()) - return latestCheckedBlock, latestCheckedBlock, nil - } - - if new(big.Int).Sub(latestCommittedBlock, latestCheckedBlock).Cmp(big.NewInt(int64(rh.blocksPerScan))) < 0 { - // diff between latest committed and latest checked is less than blocksPerScan, so we will look back from the latest committed block - fromBlock := new(big.Int).Sub(latestCommittedBlock, big.NewInt(int64(rh.blocksPerScan))) - if fromBlock.Cmp(big.NewInt(0)) < 0 { - fromBlock = big.NewInt(0) - } - toBlock := new(big.Int).Set(latestCommittedBlock) - return fromBlock, toBlock, nil - } else { - // diff between latest committed and latest checked is greater or equal to blocksPerScan, so we will look forward from the latest checked block - fromBlock := new(big.Int).Set(latestCheckedBlock) - toBlock := new(big.Int).Add(fromBlock, big.NewInt(int64(rh.blocksPerScan))) - return fromBlock, toBlock, nil - } -} - -func findIndexOfFirstHashMismatch(blockHeadersDescending []common.BlockHeader) (int, error) { - for i := 0; i < len(blockHeadersDescending)-1; i++ { - currentBlock := blockHeadersDescending[i] - previousBlockInChain := blockHeadersDescending[i+1] - if currentBlock.Number.Cmp(previousBlockInChain.Number) == 0 { // unmerged block - continue - } - if currentBlock.Number.Cmp(new(big.Int).Add(previousBlockInChain.Number, big.NewInt(1))) != 0 { - return -1, fmt.Errorf("block headers are not sequential - cannot proceed with detecting reorgs. Comparing blocks: %s and %s", currentBlock.Number.String(), previousBlockInChain.Number.String()) - } - if currentBlock.ParentHash != previousBlockInChain.Hash { - return i + 1, nil - } - } - return -1, nil -} - -func (rh *ReorgHandler) findReorgedBlockNumbers(ctx context.Context, blockHeadersDescending []common.BlockHeader, reorgedBlockNumbers *[]*big.Int) error { - newBlocksByNumber, err := rh.getNewBlocksByNumber(ctx, blockHeadersDescending) - if err != nil { - return err - } - continueCheckingForReorgs := false - for i := 0; i < len(blockHeadersDescending); i++ { - blockHeader := blockHeadersDescending[i] - fetchedBlock, ok := newBlocksByNumber[blockHeader.Number.String()] - if !ok { - return fmt.Errorf("block not found: %s", blockHeader.Number.String()) - } - if blockHeader.ParentHash != fetchedBlock.ParentHash || blockHeader.Hash != fetchedBlock.Hash { - *reorgedBlockNumbers = append(*reorgedBlockNumbers, blockHeader.Number) - if i == len(blockHeadersDescending)-1 { - continueCheckingForReorgs = true // if last block in range is reorged, we should continue checking - } - } - } - if continueCheckingForReorgs { - fetchUntilBlock := blockHeadersDescending[len(blockHeadersDescending)-1].Number - fetchFromBlock := new(big.Int).Sub(fetchUntilBlock, big.NewInt(int64(rh.blocksPerScan))) - nextHeadersBatch, err := rh.storage.MainStorage.GetBlockHeadersDescending(rh.rpc.GetChainID(), fetchFromBlock, new(big.Int).Sub(fetchUntilBlock, big.NewInt(1))) // we sub 1 to not check the last block again - if err != nil { - return fmt.Errorf("error getting next headers batch: %w", err) - } - sort.Slice(nextHeadersBatch, func(i, j int) bool { - return nextHeadersBatch[i].Number.Cmp(nextHeadersBatch[j].Number) > 0 - }) - return rh.findReorgedBlockNumbers(ctx, nextHeadersBatch, reorgedBlockNumbers) - } - return nil -} - -func (rh *ReorgHandler) getNewBlocksByNumber(ctx context.Context, blockHeaders []common.BlockHeader) (map[string]common.Block, error) { - blockNumbers := make([]*big.Int, 0, len(blockHeaders)) - for _, header := range blockHeaders { - blockNumbers = append(blockNumbers, header.Number) - } - blockCount := len(blockNumbers) - chunks := common.SliceToChunks(blockNumbers, rh.rpc.GetBlocksPerRequest().Blocks) - - var wg sync.WaitGroup - resultsCh := make(chan []rpc.GetBlocksResult, len(chunks)) - - // TODO: move batching to rpc - log.Debug().Msgf("Reorg handler fetching %d blocks in %d chunks of max %d blocks", blockCount, len(chunks), rh.rpc.GetBlocksPerRequest().Blocks) - for _, chunk := range chunks { - wg.Add(1) - go func(chunk []*big.Int) { - defer wg.Done() - resultsCh <- rh.rpc.GetBlocks(ctx, chunk) - if config.Cfg.RPC.Blocks.BatchDelay > 0 { - time.Sleep(time.Duration(config.Cfg.RPC.Blocks.BatchDelay) * time.Millisecond) - } - }(chunk) - } - go func() { - wg.Wait() - close(resultsCh) - }() - - fetchedBlocksByNumber := make(map[string]common.Block) - for batchResults := range resultsCh { - for _, blockResult := range batchResults { - if blockResult.Error != nil { - return nil, fmt.Errorf("error fetching block %s: %w", blockResult.BlockNumber.String(), blockResult.Error) - } - fetchedBlocksByNumber[blockResult.BlockNumber.String()] = blockResult.Data - } - } - return fetchedBlocksByNumber, nil -} - -func (rh *ReorgHandler) handleReorg(ctx context.Context, reorgedBlockNumbers []*big.Int) error { - log.Debug().Msgf("Handling reorg for blocks %v", reorgedBlockNumbers) - results := rh.worker.Run(ctx, reorgedBlockNumbers) - data := make([]common.BlockData, 0, len(results)) - blocksToDelete := make([]*big.Int, 0, len(results)) - for _, result := range results { - if result.Error != nil { - return fmt.Errorf("cannot fix reorg: failed block %s: %w", result.BlockNumber.String(), result.Error) - } - data = append(data, common.BlockData{ - Block: result.Data.Block, - Logs: result.Data.Logs, - Transactions: result.Data.Transactions, - Traces: result.Data.Traces, - }) - blocksToDelete = append(blocksToDelete, result.BlockNumber) - } - - deletedBlockData, err := rh.storage.MainStorage.ReplaceBlockData(data) - if err != nil { - return fmt.Errorf("error replacing reorged data for blocks %v: %w", blocksToDelete, err) - } - if rh.publisher != nil { - // Publish block data asynchronously - go func() { - if err := rh.publisher.PublishReorg(deletedBlockData, data); err != nil { - log.Error().Err(err).Msg("Failed to publish reorg data to kafka") - } - }() - } - return nil -} diff --git a/internal/orchestrator/reorg_handler_test.go b/internal/orchestrator/reorg_handler_test.go deleted file mode 100644 index 88fffceb..00000000 --- a/internal/orchestrator/reorg_handler_test.go +++ /dev/null @@ -1,13 +0,0 @@ -package orchestrator - -import ( - "testing" -) - -// All tests removed - need to be updated for new implementation -// The tests were failing due to missing mock expectations after refactoring - -func TestReorgHandlerPlaceholder(t *testing.T) { - // Placeholder test to keep the test file valid - t.Skip("Reorg handler tests need to be rewritten for new implementation") -} diff --git a/internal/orchestrator/validator.go b/internal/orchestrator/validator.go deleted file mode 100644 index d8f75db3..00000000 --- a/internal/orchestrator/validator.go +++ /dev/null @@ -1,234 +0,0 @@ -package orchestrator - -import ( - "context" - "fmt" - "math/big" - - "github.com/rs/zerolog/log" - config "github.com/thirdweb-dev/indexer/configs" - "github.com/thirdweb-dev/indexer/internal/common" - "github.com/thirdweb-dev/indexer/internal/rpc" - "github.com/thirdweb-dev/indexer/internal/storage" - "github.com/thirdweb-dev/indexer/internal/validation" - "github.com/thirdweb-dev/indexer/internal/worker" -) - -type Validator struct { - storage storage.IStorage - rpc rpc.IRPCClient - worker *worker.Worker -} - -func NewValidator(rpcClient rpc.IRPCClient, s storage.IStorage, w *worker.Worker) *Validator { - return &Validator{ - rpc: rpcClient, - storage: s, - worker: w, - } -} - -/** - * Validate blocks in the range of startBlock to endBlock - * @param startBlock - The start block number (inclusive) - * @param endBlock - The end block number (inclusive) - * @return error - An error if the validation fails - */ -func (v *Validator) ValidateBlockRange(startBlock *big.Int, endBlock *big.Int) (validBlocks []common.BlockData, invalidBlocks []common.BlockData, err error) { - dbData, err := v.storage.MainStorage.GetValidationBlockData(v.rpc.GetChainID(), startBlock, endBlock) - if err != nil { - return nil, nil, err - } - validBlocks, invalidBlocks, err = v.ValidateBlocks(dbData) - if err != nil { - return nil, nil, err - } - return validBlocks, invalidBlocks, nil -} - -func (v *Validator) ValidateBlocks(blocks []common.BlockData) (validBlocks []common.BlockData, invalidBlocks []common.BlockData, err error) { - invalidBlocks = make([]common.BlockData, 0) - validBlocks = make([]common.BlockData, 0) - for _, blockData := range blocks { - valid, err := v.ValidateBlock(blockData) - if err != nil { - log.Error().Err(err).Msgf("Block verification failed for block %s", blockData.Block.Number) - return nil, nil, err - } - if valid { - validBlocks = append(validBlocks, blockData) - } else { - invalidBlocks = append(invalidBlocks, blockData) - } - } - return validBlocks, invalidBlocks, nil -} - -func (v *Validator) ValidateBlock(blockData common.BlockData) (valid bool, err error) { - if config.Cfg.Validation.Mode == "disabled" { - return true, nil - } - - // check that transaction count matches - if blockData.Block.TransactionCount != uint64(len(blockData.Transactions)) { - log.Error().Msgf("Block verification failed for block %s: transaction count mismatch: expected=%d, fetched from DB=%d", blockData.Block.Number, blockData.Block.TransactionCount, len(blockData.Transactions)) - return false, nil - } - - // check that logs exist if logsBloom is not empty - logsBloomAsNumber := new(big.Int) - logsBloomAsNumber.SetString(blockData.Block.LogsBloom[2:], 16) - if logsBloomAsNumber.Sign() != 0 && len(blockData.Logs) == 0 { - log.Error().Msgf("Block verification failed for block %s: logsBloom is not empty but no logs exist", blockData.Block.Number) - return false, nil - } - - // strict mode also validates logsBloom and transactionsRoot - if config.Cfg.Validation.Mode == "strict" { - // Calculate logsBloom from logs - calculatedLogsBloom := validation.CalculateLogsBloom(blockData.Logs) - // Compare calculated logsBloom with block's logsBloom - if calculatedLogsBloom != blockData.Block.LogsBloom { - log.Error().Msgf("Block verification failed for block %s: logsBloom mismatch: calculated=%s, block=%s", blockData.Block.Number, calculatedLogsBloom, blockData.Block.LogsBloom) - return false, nil - } - - // Check transactionsRoot - if blockData.Block.TransactionsRoot == "0x0000000000000000000000000000000000000000000000000000000000000000" { - // likely a zk chain and does not support tx root - return true, nil - } - - for _, tx := range blockData.Transactions { - if tx.TransactionType == 0x7E { - // TODO: Need to properly validate op-stack deposit transaction - return true, nil - } - if tx.TransactionType > 4 { // Currently supported types are 0-4 - log.Warn().Msgf("Skipping transaction root validation for block %s due to unsupported transaction type %d", blockData.Block.Number, tx.TransactionType) - return true, nil - } - } - - // Calculate transactionsRoot from transactions - calculatedTransactionsRoot, err := validation.CalculateTransactionsRoot(blockData.Transactions) - if err != nil { - return false, fmt.Errorf("failed to calculate transactionsRoot: %v", err) - } - - // Compare calculated transactionsRoot with block's transactionsRoot - if calculatedTransactionsRoot != blockData.Block.TransactionsRoot { - log.Error().Msgf("Block verification failed for block %s: transactionsRoot mismatch: calculated=%s, block=%s", blockData.Block.Number, calculatedTransactionsRoot, blockData.Block.TransactionsRoot) - return false, nil - } - } - - return true, nil -} - -func (v *Validator) FixBlocks(invalidBlocks []*big.Int, fixBatchSize int) error { - if len(invalidBlocks) == 0 { - log.Debug().Msg("No invalid blocks") - return nil - } - - if fixBatchSize == 0 { - fixBatchSize = len(invalidBlocks) - } - - log.Debug().Msgf("Fixing invalid blocks %d to %d", invalidBlocks[0], invalidBlocks[len(invalidBlocks)-1]) - - // Process blocks in batches - for i := 0; i < len(invalidBlocks); i += fixBatchSize { - end := i + fixBatchSize - if end > len(invalidBlocks) { - end = len(invalidBlocks) - } - batch := invalidBlocks[i:end] - - polledBlocksRun := v.worker.Run(context.Background(), batch) - polledBlocks := v.convertResultsToBlockData(polledBlocksRun) - log.Debug().Msgf("Batch of invalid blocks polled: %d to %d", batch[0], batch[len(batch)-1]) - - _, err := v.storage.MainStorage.ReplaceBlockData(polledBlocks) - if err != nil { - log.Error().Err(err).Msgf("Failed to replace blocks: %v", polledBlocks) - return err - } - } - log.Info().Msgf("Fixed %d blocks", len(invalidBlocks)) - return nil -} - -func (v *Validator) FindAndFixGaps(startBlock *big.Int, endBlock *big.Int) error { - missingBlockNumbers, err := v.storage.MainStorage.FindMissingBlockNumbers(v.rpc.GetChainID(), startBlock, endBlock) - if err != nil { - return err - } - if len(missingBlockNumbers) == 0 { - log.Debug().Msg("No missing blocks found") - return nil - } - log.Debug().Msgf("Found %d missing blocks: %v", len(missingBlockNumbers), missingBlockNumbers) - - // query missing blocks - polledBlocksRun := v.worker.Run(context.Background(), missingBlockNumbers) - polledBlocks := v.convertResultsToBlockData(polledBlocksRun) - log.Debug().Msgf("Missing blocks polled: %v", len(polledBlocks)) - - err = v.storage.MainStorage.InsertBlockData(polledBlocks) - if err != nil { - log.Error().Err(err).Msgf("Failed to insert missing blocks: %v", polledBlocks) - return err - } - - return nil -} - -func (v *Validator) convertResultsToBlockData(results []rpc.GetFullBlockResult) []common.BlockData { - blockData := make([]common.BlockData, 0, len(results)) - for _, result := range results { - blockData = append(blockData, common.BlockData{ - Block: result.Data.Block, - Logs: result.Data.Logs, - Transactions: result.Data.Transactions, - Traces: result.Data.Traces, - }) - } - return blockData -} - -func (v *Validator) EnsureValidBlocks(ctx context.Context, blocks []common.BlockData) ([]common.BlockData, error) { - validBlocks, invalidBlocks, err := v.ValidateBlocks(blocks) - if err != nil { - return nil, fmt.Errorf("validation failed: %w", err) - } - - // If all blocks are valid, return them - if len(invalidBlocks) == 0 { - return validBlocks, nil - } - - // Extract block numbers from invalid blocks - invalidBlockNumbers := make([]*big.Int, 0, len(invalidBlocks)) - for _, block := range invalidBlocks { - invalidBlockNumbers = append(invalidBlockNumbers, block.Block.Number) - } - - log.Debug().Msgf("Re-fetching %d invalid blocks for validation", len(invalidBlockNumbers)) - - // Re-fetch invalid blocks using worker - polledBlocksRun := v.worker.Run(ctx, invalidBlockNumbers) - refetchedBlocks := v.convertResultsToBlockData(polledBlocksRun) - - // Recursively validate the re-fetched blocks - revalidatedBlocks, err := v.EnsureValidBlocks(ctx, refetchedBlocks) - if err != nil { - return nil, fmt.Errorf("failed to ensure valid blocks after re-fetch: %w", err) - } - - // Combine the originally valid blocks with the newly validated blocks - allValidBlocks := append(validBlocks, revalidatedBlocks...) - - return allValidBlocks, nil -} diff --git a/internal/orchestrator/work_mode_monitor.go b/internal/orchestrator/work_mode_monitor.go deleted file mode 100644 index ee75c04e..00000000 --- a/internal/orchestrator/work_mode_monitor.go +++ /dev/null @@ -1,160 +0,0 @@ -package orchestrator - -import ( - "context" - "math/big" - "sync" - "time" - - "github.com/rs/zerolog/log" - "github.com/thirdweb-dev/indexer/internal/metrics" - "github.com/thirdweb-dev/indexer/internal/rpc" - "github.com/thirdweb-dev/indexer/internal/storage" -) - -type WorkMode string - -const ( - DEFAULT_WORK_MODE_CHECK_INTERVAL = 10 - DEFAULT_LIVE_MODE_THRESHOLD = 500 - WorkModeLive WorkMode = "live" - WorkModeBackfill WorkMode = "backfill" -) - -type WorkModeMonitor struct { - rpc rpc.IRPCClient - storage storage.IStorage - workModeChannels map[chan WorkMode]struct{} - channelsMutex sync.RWMutex - currentMode WorkMode - checkInterval time.Duration - liveModeThreshold *big.Int -} - -func NewWorkModeMonitor(rpc rpc.IRPCClient, storage storage.IStorage) *WorkModeMonitor { - checkInterval := DEFAULT_WORK_MODE_CHECK_INTERVAL - liveModeThreshold := DEFAULT_LIVE_MODE_THRESHOLD - log.Info().Msgf("Work mode monitor initialized with check interval %d and live mode threshold %d", checkInterval, liveModeThreshold) - return &WorkModeMonitor{ - rpc: rpc, - storage: storage, - workModeChannels: make(map[chan WorkMode]struct{}), - currentMode: "", - checkInterval: time.Duration(checkInterval) * time.Minute, - liveModeThreshold: big.NewInt(int64(liveModeThreshold)), - } -} - -// RegisterChannel adds a new channel to receive work mode updates -func (m *WorkModeMonitor) RegisterChannel(ch chan WorkMode) { - m.channelsMutex.Lock() - defer m.channelsMutex.Unlock() - - m.workModeChannels[ch] = struct{}{} - // Send current mode to the new channel only if it's not empty - if m.currentMode != "" { - select { - case ch <- m.currentMode: - log.Debug().Msg("Initial work mode sent to new channel") - default: - log.Warn().Msg("Failed to send initial work mode to new channel - channel full") - } - } -} - -// UnregisterChannel removes a channel from receiving work mode updates -func (m *WorkModeMonitor) UnregisterChannel(ch chan WorkMode) { - m.channelsMutex.Lock() - defer m.channelsMutex.Unlock() - - delete(m.workModeChannels, ch) -} - -func (m *WorkModeMonitor) updateWorkModeMetric(mode WorkMode) { - var value float64 - if mode == WorkModeLive { - value = 1 - } - metrics.CurrentWorkMode.Set(value) -} - -func (m *WorkModeMonitor) Start(ctx context.Context) { - // Perform immediate check - newMode, err := m.determineWorkMode(ctx) - if err != nil { - log.Error().Err(err).Msg("Error checking work mode during startup") - } else if newMode != m.currentMode { - log.Info().Msgf("Work mode changing from %s to %s during startup", m.currentMode, newMode) - m.currentMode = newMode - m.updateWorkModeMetric(newMode) - m.broadcastWorkMode(newMode) - } - - ticker := time.NewTicker(m.checkInterval) - defer ticker.Stop() - - log.Info().Msgf("Work mode monitor started with initial mode: %s", m.currentMode) - - for { - select { - case <-ctx.Done(): - log.Info().Msg("Work mode monitor shutting down") - return - case <-ticker.C: - newMode, err := m.determineWorkMode(ctx) - if err != nil { - log.Error().Err(err).Msg("Error checking work mode") - continue - } - - if newMode != m.currentMode { - log.Info().Msgf("Work mode changing from %s to %s", m.currentMode, newMode) - m.currentMode = newMode - m.updateWorkModeMetric(newMode) - m.broadcastWorkMode(newMode) - } - } - } -} - -func (m *WorkModeMonitor) broadcastWorkMode(mode WorkMode) { - m.channelsMutex.RLock() - defer m.channelsMutex.RUnlock() - - for ch := range m.workModeChannels { - select { - case ch <- mode: - log.Debug().Msg("Work mode change notification sent") - default: - if r := recover(); r != nil { - log.Warn().Msg("Work mode notification dropped - channel closed") - delete(m.workModeChannels, ch) - } - } - } -} - -func (m *WorkModeMonitor) determineWorkMode(ctx context.Context) (WorkMode, error) { - lastCommittedBlock, err := m.storage.MainStorage.GetMaxBlockNumber(m.rpc.GetChainID()) - if err != nil { - return "", err - } - - if lastCommittedBlock.Sign() == 0 { - log.Debug().Msg("No blocks committed yet, using backfill mode") - return WorkModeBackfill, nil - } - - latestBlock, err := m.rpc.GetLatestBlockNumber(ctx) - if err != nil { - return "", err - } - - blockDiff := new(big.Int).Sub(latestBlock, lastCommittedBlock) - log.Debug().Msgf("Committer is %d blocks behind the chain", blockDiff.Int64()) - if blockDiff.Cmp(m.liveModeThreshold) < 0 { - return WorkModeLive, nil - } - - return WorkModeBackfill, nil -} diff --git a/internal/publisher/publisher.go b/internal/publisher/publisher.go deleted file mode 100644 index 0df01c1f..00000000 --- a/internal/publisher/publisher.go +++ /dev/null @@ -1,410 +0,0 @@ -package publisher - -import ( - "context" - "crypto/tls" - "encoding/json" - "fmt" - "net" - "strings" - "sync" - "time" - - "github.com/rs/zerolog/log" - config "github.com/thirdweb-dev/indexer/configs" - "github.com/thirdweb-dev/indexer/internal/common" - "github.com/thirdweb-dev/indexer/internal/metrics" - "github.com/twmb/franz-go/pkg/kgo" - "github.com/twmb/franz-go/pkg/sasl/plain" -) - -type Publisher struct { - client *kgo.Client - mu sync.RWMutex -} - -var ( - instance *Publisher - once sync.Once -) - -type PublishableMessage[T common.BlockModel | common.TransactionModel | common.LogModel | common.TraceModel] struct { - Data T `json:"data"` - Status string `json:"status"` -} - -// GetInstance returns the singleton Publisher instance -func GetInstance() *Publisher { - once.Do(func() { - instance = &Publisher{} - if err := instance.initialize(); err != nil { - log.Error().Err(err).Msg("Failed to initialize publisher") - } - }) - return instance -} - -func (p *Publisher) initialize() error { - if !config.Cfg.Publisher.Enabled { - log.Debug().Msg("Publisher is disabled, skipping initialization") - return nil - } - - p.mu.Lock() - defer p.mu.Unlock() - - if config.Cfg.Publisher.Brokers == "" { - log.Info().Msg("No Kafka brokers configured, skipping publisher initialization") - return nil - } - - brokers := strings.Split(config.Cfg.Publisher.Brokers, ",") - opts := []kgo.Opt{ - kgo.SeedBrokers(brokers...), - kgo.AllowAutoTopicCreation(), - kgo.ProducerBatchCompression(kgo.SnappyCompression()), - kgo.ClientID(fmt.Sprintf("insight-indexer-%s", config.Cfg.RPC.ChainID)), - kgo.MaxBufferedRecords(1_000_000), - kgo.ProducerBatchMaxBytes(100 * 1024 * 1024), // 100MB - kgo.RecordPartitioner(kgo.UniformBytesPartitioner(1_000_000, false, false, nil)), - kgo.MetadataMaxAge(60 * time.Second), - kgo.DialTimeout(10 * time.Second), - } - - if config.Cfg.Publisher.Username != "" && config.Cfg.Publisher.Password != "" { - opts = append(opts, kgo.SASL(plain.Auth{ - User: config.Cfg.Publisher.Username, - Pass: config.Cfg.Publisher.Password, - }.AsMechanism())) - } - - if config.Cfg.Publisher.EnableTLS { - tlsDialer := &tls.Dialer{NetDialer: &net.Dialer{Timeout: 10 * time.Second}} - opts = append(opts, kgo.Dialer(tlsDialer.DialContext)) - } - - client, err := kgo.NewClient(opts...) - if err != nil { - return fmt.Errorf("failed to create Kafka client: %v", err) - } - - ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) - defer cancel() - - if err := client.Ping(ctx); err != nil { - client.Close() - return fmt.Errorf("failed to connect to Kafka: %v", err) - } - p.client = client - return nil -} - -func (p *Publisher) PublishBlockData(blockData []common.BlockData) error { - return p.publishBlockData(blockData, false) -} - -func (p *Publisher) PublishReorg(oldData []common.BlockData, newData []common.BlockData) error { - if err := p.publishBlockData(oldData, true); err != nil { - return fmt.Errorf("failed to publish old block data: %v", err) - } - - if err := p.publishBlockData(newData, false); err != nil { - return fmt.Errorf("failed to publish new block data: %v", err) - } - return nil -} - -func (p *Publisher) Close() error { - p.mu.Lock() - defer p.mu.Unlock() - - if p.client != nil { - p.client.Close() - log.Debug().Msg("Publisher client closed") - } - return nil -} - -func (p *Publisher) publishMessages(ctx context.Context, messages []*kgo.Record) error { - if len(messages) == 0 { - return nil - } - - if !config.Cfg.Publisher.Enabled { - log.Debug().Msg("Publisher is disabled, skipping publish") - return nil - } - - p.mu.RLock() - defer p.mu.RUnlock() - - if p.client == nil { - return nil // Skip if no client configured - } - - var wg sync.WaitGroup - wg.Add(len(messages)) - // Publish to all configured producers - for _, msg := range messages { - p.client.Produce(ctx, msg, func(_ *kgo.Record, err error) { - defer wg.Done() - if err != nil { - log.Error().Err(err).Msg("Failed to publish message to Kafka") - } - }) - } - wg.Wait() - - return nil -} - -func (p *Publisher) publishBlockData(blockData []common.BlockData, isReorg bool) error { - if p.client == nil || len(blockData) == 0 { - return nil - } - - publishStart := time.Now() - - // Prepare messages for blocks, events, transactions and traces - blockMessages := make([]*kgo.Record, len(blockData)) - var eventMessages []*kgo.Record - var txMessages []*kgo.Record - var traceMessages []*kgo.Record - - status := "new" - if isReorg { - status = "reverted" - } - - for i, data := range blockData { - // Block message - if config.Cfg.Publisher.Blocks.Enabled { - if blockMsg, err := p.createBlockMessage(data.Block, status); err == nil { - blockMessages[i] = blockMsg - } else { - return fmt.Errorf("failed to create block message: %v", err) - } - } - - // Event messages - if config.Cfg.Publisher.Events.Enabled { - for _, event := range data.Logs { - if p.shouldPublishEvent(event) { - if eventMsg, err := p.createEventMessage(event, status); err == nil { - eventMessages = append(eventMessages, eventMsg) - } else { - return fmt.Errorf("failed to create event message: %v", err) - } - } - } - } - - // Transaction messages - if config.Cfg.Publisher.Transactions.Enabled { - for _, tx := range data.Transactions { - if p.shouldPublishTransaction(tx) { - if txMsg, err := p.createTransactionMessage(tx, status); err == nil { - txMessages = append(txMessages, txMsg) - } else { - return fmt.Errorf("failed to create transaction message: %v", err) - } - } - } - } - - // Trace messages - if config.Cfg.Publisher.Traces.Enabled { - for _, trace := range data.Traces { - if traceMsg, err := p.createTraceMessage(trace, status); err == nil { - traceMessages = append(traceMessages, traceMsg) - } else { - return fmt.Errorf("failed to create trace message: %v", err) - } - } - } - } - - if config.Cfg.Publisher.Blocks.Enabled { - if err := p.publishMessages(context.Background(), blockMessages); err != nil { - return fmt.Errorf("failed to publish block messages: %v", err) - } - } - - if config.Cfg.Publisher.Events.Enabled { - if err := p.publishMessages(context.Background(), eventMessages); err != nil { - return fmt.Errorf("failed to publish event messages: %v", err) - } - } - - if config.Cfg.Publisher.Transactions.Enabled { - if err := p.publishMessages(context.Background(), txMessages); err != nil { - return fmt.Errorf("failed to publish transaction messages: %v", err) - } - } - - if config.Cfg.Publisher.Traces.Enabled { - if err := p.publishMessages(context.Background(), traceMessages); err != nil { - return fmt.Errorf("failed to publish trace messages: %v", err) - } - } - - log.Debug().Str("metric", "publish_duration").Msgf("Publisher.PublishBlockData duration: %f", time.Since(publishStart).Seconds()) - metrics.PublishDuration.Observe(time.Since(publishStart).Seconds()) - metrics.PublisherBlockCounter.Add(float64(len(blockData))) - metrics.LastPublishedBlock.Set(float64(blockData[len(blockData)-1].Block.Number.Int64())) - if isReorg { - metrics.PublisherReorgedBlockCounter.Add(float64(len(blockData))) - } - return nil -} - -func (p *Publisher) createBlockMessage(block common.Block, status string) (*kgo.Record, error) { - msg := PublishableMessage[common.BlockModel]{ - Data: block.Serialize(), - Status: status, - } - msgJson, err := json.Marshal(msg) - if err != nil { - return nil, fmt.Errorf("failed to marshal block data: %v", err) - } - return &kgo.Record{ - Topic: p.getTopicName("blocks"), - Key: []byte(fmt.Sprintf("block-%s-%s-%s", status, block.ChainId.String(), block.Hash)), - Value: msgJson, - Headers: []kgo.RecordHeader{ - { - Key: "chain_id", - Value: []byte(block.ChainId.String()), - }, - { - Key: "block_number", - Value: []byte(block.Number.String()), - }, - }, - }, nil -} - -func (p *Publisher) createTransactionMessage(tx common.Transaction, status string) (*kgo.Record, error) { - msg := PublishableMessage[common.TransactionModel]{ - Data: tx.Serialize(), - Status: status, - } - msgJson, err := json.Marshal(msg) - if err != nil { - return nil, fmt.Errorf("failed to marshal transaction data: %v", err) - } - return &kgo.Record{ - Topic: p.getTopicName("transactions"), - Key: []byte(fmt.Sprintf("transaction-%s-%s-%s", status, tx.ChainId.String(), tx.Hash)), - Value: msgJson, - }, nil -} - -func (p *Publisher) createTraceMessage(trace common.Trace, status string) (*kgo.Record, error) { - msg := PublishableMessage[common.TraceModel]{ - Data: trace.Serialize(), - Status: status, - } - msgJson, err := json.Marshal(msg) - if err != nil { - return nil, fmt.Errorf("failed to marshal trace data: %v", err) - } - traceAddressStr := make([]string, len(trace.TraceAddress)) - for i, addr := range trace.TraceAddress { - traceAddressStr[i] = fmt.Sprint(addr) - } - return &kgo.Record{ - Topic: p.getTopicName("traces"), - Key: []byte(fmt.Sprintf("trace-%s-%s-%s-%v", status, trace.ChainID.String(), trace.TransactionHash, strings.Join(traceAddressStr, ","))), - Value: msgJson, - }, nil -} - -func (p *Publisher) createEventMessage(event common.Log, status string) (*kgo.Record, error) { - msg := PublishableMessage[common.LogModel]{ - Data: event.Serialize(), - Status: status, - } - msgJson, err := json.Marshal(msg) - if err != nil { - return nil, fmt.Errorf("failed to marshal event data: %v", err) - } - return &kgo.Record{ - Topic: p.getTopicName("events"), - Key: []byte(fmt.Sprintf("event-%s-%s-%s-%d", status, event.ChainId.String(), event.TransactionHash, event.LogIndex)), - Value: msgJson, - }, nil -} - -func (p *Publisher) shouldPublishEvent(event common.Log) bool { - if len(config.Cfg.Publisher.Events.AddressFilter) > 0 { - for _, addr := range config.Cfg.Publisher.Events.AddressFilter { - if addr == event.Address { - return true - } - } - return false - } - - if len(config.Cfg.Publisher.Events.Topic0Filter) > 0 { - for _, topic0 := range config.Cfg.Publisher.Events.Topic0Filter { - if topic0 == event.Topic0 { - return true - } - } - return false - } - return true -} - -func (p *Publisher) shouldPublishTransaction(tx common.Transaction) bool { - if len(config.Cfg.Publisher.Transactions.ToFilter) > 0 { - for _, addr := range config.Cfg.Publisher.Transactions.ToFilter { - if addr == tx.ToAddress { - return true - } - } - return false - } - - if len(config.Cfg.Publisher.Transactions.FromFilter) > 0 { - for _, addr := range config.Cfg.Publisher.Transactions.FromFilter { - if addr == tx.FromAddress { - return true - } - } - return false - } - return true -} - -func (p *Publisher) getTopicName(entity string) string { - chainIdSuffix := "" - if config.Cfg.RPC.ChainID != "" { - chainIdSuffix = fmt.Sprintf(".%s", config.Cfg.RPC.ChainID) - } - switch entity { - case "blocks": - if config.Cfg.Publisher.Blocks.TopicName != "" { - return config.Cfg.Publisher.Blocks.TopicName - } - return fmt.Sprintf("insight.blocks%s", chainIdSuffix) - case "transactions": - if config.Cfg.Publisher.Transactions.TopicName != "" { - return config.Cfg.Publisher.Transactions.TopicName - } - return fmt.Sprintf("insight.transactions%s", chainIdSuffix) - case "traces": - if config.Cfg.Publisher.Traces.TopicName != "" { - return config.Cfg.Publisher.Traces.TopicName - } - return fmt.Sprintf("insight.traces%s", chainIdSuffix) - case "events": - if config.Cfg.Publisher.Events.TopicName != "" { - return config.Cfg.Publisher.Events.TopicName - } - return fmt.Sprintf("insight.events%s", chainIdSuffix) - default: - return "" - } -} diff --git a/internal/source/s3.go b/internal/source/s3.go deleted file mode 100644 index 962ff114..00000000 --- a/internal/source/s3.go +++ /dev/null @@ -1,1003 +0,0 @@ -package source - -import ( - "context" - "crypto/sha256" - "encoding/hex" - "encoding/json" - "fmt" - "io" - "math/big" - "os" - "path/filepath" - "sort" - "strings" - "sync" - "time" - - "github.com/aws/aws-sdk-go-v2/aws" - awsconfig "github.com/aws/aws-sdk-go-v2/config" - "github.com/aws/aws-sdk-go-v2/service/s3" - "github.com/parquet-go/parquet-go" - "github.com/rs/zerolog/log" - config "github.com/thirdweb-dev/indexer/configs" - "github.com/thirdweb-dev/indexer/internal/common" - "github.com/thirdweb-dev/indexer/internal/rpc" -) - -// FileMetadata represents cached information about S3 files -type FileMetadata struct { - Key string - MinBlock *big.Int - MaxBlock *big.Int - Size int64 -} - -type S3Source struct { - client *s3.Client - config *config.S3SourceConfig - chainId *big.Int - cacheDir string - - // Configurable settings - metadataTTL time.Duration // How long to cache metadata - fileCacheTTL time.Duration // How long to keep files in cache - maxCacheSize int64 // Max cache size in bytes - cleanupInterval time.Duration // How often to run cleanup - maxConcurrentDownloads int // Max concurrent S3 downloads - - // Metadata cache - metaMu sync.RWMutex - fileMetadata map[string]*FileMetadata // S3 key -> metadata - minBlock *big.Int - maxBlock *big.Int - metaLoaded bool - metaLoadTime time.Time // When metadata was last loaded - - // Local file cache - cacheMu sync.RWMutex - cacheMap map[string]time.Time // Track cache file access times - downloadMu sync.Mutex // Prevent duplicate downloads - - // Download tracking - downloading map[string]*sync.WaitGroup // Files currently downloading - - // Active use tracking - activeUseMu sync.RWMutex - activeUse map[string]int // Files currently being read (reference count) - - // Memory management - memorySem chan struct{} // Semaphore for memory-limited operations -} - -// ParquetBlockData represents the block data structure in parquet files -type ParquetBlockData struct { - ChainId uint64 `parquet:"chain_id"` - BlockNumber uint64 `parquet:"block_number"` - BlockHash string `parquet:"block_hash"` - BlockTimestamp int64 `parquet:"block_timestamp"` - Block []byte `parquet:"block_json"` - Transactions []byte `parquet:"transactions_json"` - Logs []byte `parquet:"logs_json"` - Traces []byte `parquet:"traces_json"` -} - -func NewS3Source(chainId *big.Int, cfg *config.S3SourceConfig) (*S3Source, error) { - // Apply defaults - if cfg.MetadataTTL == 0 { - cfg.MetadataTTL = 10 * time.Minute - } - if cfg.FileCacheTTL == 0 { - cfg.FileCacheTTL = 15 * time.Minute // 15 minutes - } - if cfg.MaxCacheSize == 0 { - cfg.MaxCacheSize = 5 * 1024 * 1024 * 1024 // Increased from 5GB to 10GB - } - if cfg.CleanupInterval == 0 { - cfg.CleanupInterval = 5 * time.Minute // 5 minutes - } - if cfg.MaxConcurrentDownloads == 0 { - cfg.MaxConcurrentDownloads = 3 - } - - awsCfg, err := awsconfig.LoadDefaultConfig(context.Background(), - awsconfig.WithRegion(cfg.Region), - ) - if err != nil { - return nil, fmt.Errorf("failed to load AWS config: %w", err) - } - - // Override with explicit credentials if provided - if cfg.AccessKeyID != "" && cfg.SecretAccessKey != "" { - awsCfg.Credentials = aws.CredentialsProviderFunc(func(ctx context.Context) (aws.Credentials, error) { - return aws.Credentials{ - AccessKeyID: cfg.AccessKeyID, - SecretAccessKey: cfg.SecretAccessKey, - }, nil - }) - } - - s3Client := s3.NewFromConfig(awsCfg, func(o *s3.Options) { - if cfg.Endpoint != "" { - o.BaseEndpoint = aws.String(cfg.Endpoint) - } - }) - - // Create cache directory - cacheDir := cfg.CacheDir - if cacheDir == "" { - cacheDir = filepath.Join(os.TempDir(), "s3-archive-cache", fmt.Sprintf("chain_%d", chainId.Uint64())) - } - if err := os.MkdirAll(cacheDir, 0755); err != nil { - return nil, fmt.Errorf("failed to create cache directory: %w", err) - } - - // Create memory semaphore with 10 concurrent operations by default - memoryOps := 10 - if cfg.MaxConcurrentDownloads > 0 { - memoryOps = cfg.MaxConcurrentDownloads * 2 - } - - archive := &S3Source{ - client: s3Client, - config: cfg, - chainId: chainId, - cacheDir: cacheDir, - metadataTTL: cfg.MetadataTTL, - fileCacheTTL: cfg.FileCacheTTL, - maxCacheSize: cfg.MaxCacheSize, - cleanupInterval: cfg.CleanupInterval, - maxConcurrentDownloads: cfg.MaxConcurrentDownloads, - fileMetadata: make(map[string]*FileMetadata), - cacheMap: make(map[string]time.Time), - downloading: make(map[string]*sync.WaitGroup), - activeUse: make(map[string]int), - memorySem: make(chan struct{}, memoryOps), - } - - // Start cache cleanup goroutine - go archive.cleanupCache() - - // Load metadata in background (optional) - if cfg.Bucket != "" { - go func() { - ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) - defer cancel() - if err := archive.loadMetadata(ctx); err != nil { - log.Warn().Err(err).Msg("Failed to preload S3 metadata") - } - }() - } - - return archive, nil -} - -func (s *S3Source) GetFullBlocks(ctx context.Context, blockNumbers []*big.Int) []rpc.GetFullBlockResult { - if len(blockNumbers) == 0 { - return nil - } - - // Ensure metadata is loaded - if err := s.ensureMetadataLoaded(ctx); err != nil { - log.Error().Err(err).Msg("Failed to load metadata") - return s.makeErrorResults(blockNumbers, err) - } - - // Group blocks by files that contain them - fileGroups := s.groupBlocksByFiles(blockNumbers) - - // Mark files as being actively used - s.activeUseMu.Lock() - for fileKey := range fileGroups { - s.activeUse[fileKey]++ - } - s.activeUseMu.Unlock() - - // Ensure we release the hold on files when done - defer func() { - s.activeUseMu.Lock() - for fileKey := range fileGroups { - s.activeUse[fileKey]-- - if s.activeUse[fileKey] <= 0 { - delete(s.activeUse, fileKey) - } - } - s.activeUseMu.Unlock() - - // Update access times to keep files in cache - s.cacheMu.Lock() - now := time.Now() - for fileKey := range fileGroups { - s.cacheMap[fileKey] = now - } - s.cacheMu.Unlock() - }() - - // Download required files and wait for ALL to be ready - if err := s.ensureFilesAvailable(ctx, fileGroups); err != nil { - log.Error().Err(err).Msg("Failed to ensure files are available") - return s.makeErrorResults(blockNumbers, err) - } - - // Read blocks from local files - at this point all files should be available - results := make([]rpc.GetFullBlockResult, 0, len(blockNumbers)) - resultMap := make(map[uint64]rpc.GetFullBlockResult) - - for fileKey, blocks := range fileGroups { - localPath := s.getCacheFilePath(fileKey) - - if !s.isFileCached(localPath) { - log.Error().Str("file", fileKey).Str("path", localPath).Msg("File disappeared after ensureFilesAvailable") - // Try to re-download the file synchronously as a last resort - if err := s.downloadFile(ctx, fileKey); err != nil { - log.Error().Err(err).Str("file", fileKey).Msg("Failed to re-download disappeared file") - for _, bn := range blocks { - resultMap[bn.Uint64()] = rpc.GetFullBlockResult{ - BlockNumber: bn, - Error: fmt.Errorf("file disappeared and re-download failed: %w", err), - } - } - continue - } - } - - // Read blocks from local file efficiently - fileResults, err := s.readBlocksFromLocalFile(localPath, blocks) - if err != nil { - log.Error().Err(err).Str("file", fileKey).Msg("Failed to read blocks from local file") - // Even if one file fails, continue with others - for _, bn := range blocks { - resultMap[bn.Uint64()] = rpc.GetFullBlockResult{ - BlockNumber: bn, - Error: fmt.Errorf("failed to read from file: %w", err), - } - } - continue - } - - for blockNum, result := range fileResults { - resultMap[blockNum] = result - } - } - - // Build ordered results - for _, bn := range blockNumbers { - if result, ok := resultMap[bn.Uint64()]; ok { - results = append(results, result) - } else { - results = append(results, rpc.GetFullBlockResult{ - BlockNumber: bn, - Error: fmt.Errorf("block %s not found", bn.String()), - }) - } - } - - return results -} - -func (s *S3Source) GetSupportedBlockRange(ctx context.Context) (minBlockNumber *big.Int, maxBlockNumber *big.Int, err error) { - if err := s.ensureMetadataLoaded(ctx); err != nil { - return nil, nil, err - } - - s.metaMu.RLock() - defer s.metaMu.RUnlock() - - if s.minBlock == nil || s.maxBlock == nil { - return big.NewInt(0), big.NewInt(0), fmt.Errorf("no blocks found for chain %d", s.chainId.Uint64()) - } - - return new(big.Int).Set(s.minBlock), new(big.Int).Set(s.maxBlock), nil -} - -func (s *S3Source) Close() { - // Clean up cache directory - if s.cacheDir != "" { - os.RemoveAll(s.cacheDir) - } -} - -// Metadata management - -func (s *S3Source) loadMetadata(ctx context.Context) error { - s.metaMu.Lock() - defer s.metaMu.Unlock() - - // Check if metadata is still fresh - if s.metaLoaded && time.Since(s.metaLoadTime) < s.metadataTTL { - return nil - } - - prefix := fmt.Sprintf("chain_%d/", s.chainId.Uint64()) - if s.config.Prefix != "" { - prefix = fmt.Sprintf("%s/%s", s.config.Prefix, prefix) - } - - paginator := s3.NewListObjectsV2Paginator(s.client, &s3.ListObjectsV2Input{ - Bucket: aws.String(s.config.Bucket), - Prefix: aws.String(prefix), - }) - - for paginator.HasMorePages() { - page, err := paginator.NextPage(ctx) - if err != nil { - return fmt.Errorf("failed to list S3 objects: %w", err) - } - - for _, obj := range page.Contents { - if obj.Key == nil || obj.Size == nil { - continue - } - - startBlock, endBlock := s.extractBlockRangeFromKey(*obj.Key) - if startBlock == nil || endBlock == nil { - continue - } - - // Store metadata - s.fileMetadata[*obj.Key] = &FileMetadata{ - Key: *obj.Key, - MinBlock: startBlock, - MaxBlock: endBlock, - Size: *obj.Size, - } - - // Update global min/max - if s.minBlock == nil || startBlock.Cmp(s.minBlock) < 0 { - s.minBlock = new(big.Int).Set(startBlock) - } - if s.maxBlock == nil || endBlock.Cmp(s.maxBlock) > 0 { - s.maxBlock = new(big.Int).Set(endBlock) - } - } - } - - s.metaLoaded = true - s.metaLoadTime = time.Now() - log.Info(). - Int("files", len(s.fileMetadata)). - Str("min_block", s.minBlock.String()). - Str("max_block", s.maxBlock.String()). - Dur("ttl", s.metadataTTL). - Msg("Loaded S3 metadata cache") - - return nil -} - -func (s *S3Source) ensureMetadataLoaded(ctx context.Context) error { - s.metaMu.RLock() - // Check if metadata is loaded and still fresh - if s.metaLoaded && time.Since(s.metaLoadTime) < s.metadataTTL { - s.metaMu.RUnlock() - return nil - } - s.metaMu.RUnlock() - - return s.loadMetadata(ctx) -} - -// File grouping and downloading - -func (s *S3Source) ensureFilesAvailable(ctx context.Context, fileGroups map[string][]*big.Int) error { - var wg sync.WaitGroup - errChan := make(chan error, len(fileGroups)) - - // Limit concurrent downloads - sem := make(chan struct{}, s.maxConcurrentDownloads) - - for fileKey := range fileGroups { - wg.Add(1) - go func(key string) { - defer wg.Done() - - // First check if file is already being downloaded by another goroutine - s.downloadMu.Lock() - if downloadWg, downloading := s.downloading[key]; downloading { - s.downloadMu.Unlock() - // Wait for the existing download to complete - downloadWg.Wait() - - // Verify file exists after waiting - localPath := s.getCacheFilePath(key) - if !s.isFileCached(localPath) { - errChan <- fmt.Errorf("file %s not available after waiting for download", key) - } else { - // Ensure file is tracked in cache map - s.ensureFileInCacheMap(key) - // Update access time for this file since we'll be using it - s.cacheMu.Lock() - s.cacheMap[key] = time.Now() - s.cacheMu.Unlock() - } - return - } - s.downloadMu.Unlock() - - // Check if file is already cached - localPath := s.getCacheFilePath(key) - if s.isFileCached(localPath) { - // Ensure file is in cache map (in case it was on disk but not tracked) - s.ensureFileInCacheMap(key) - // Update access time - s.cacheMu.Lock() - s.cacheMap[key] = time.Now() - s.cacheMu.Unlock() - return - } - - // Need to download the file - sem <- struct{}{} - defer func() { <-sem }() - - if err := s.downloadFile(ctx, key); err != nil { - errChan <- fmt.Errorf("failed to download %s: %w", key, err) - return - } - - // Verify file exists after download - if !s.isFileCached(localPath) { - errChan <- fmt.Errorf("file %s not cached after download", key) - } - }(fileKey) - } - - // Wait for all files to be available - wg.Wait() - close(errChan) - - // Collect any errors - var errors []string - for err := range errChan { - if err != nil { - errors = append(errors, err.Error()) - } - } - - if len(errors) > 0 { - return fmt.Errorf("failed to ensure files available: %s", strings.Join(errors, "; ")) - } - - return nil -} - -func (s *S3Source) groupBlocksByFiles(blockNumbers []*big.Int) map[string][]*big.Int { - s.metaMu.RLock() - defer s.metaMu.RUnlock() - - fileGroups := make(map[string][]*big.Int) - - for _, blockNum := range blockNumbers { - // Find files that contain this block - for _, meta := range s.fileMetadata { - if blockNum.Cmp(meta.MinBlock) >= 0 && blockNum.Cmp(meta.MaxBlock) <= 0 { - fileGroups[meta.Key] = append(fileGroups[meta.Key], blockNum) - break // Each block should only be in one file - } - } - } - - return fileGroups -} - -func (s *S3Source) downloadFile(ctx context.Context, fileKey string) error { - // Prevent duplicate downloads - s.downloadMu.Lock() - if wg, downloading := s.downloading[fileKey]; downloading { - s.downloadMu.Unlock() - wg.Wait() - return nil - } - - wg := &sync.WaitGroup{} - wg.Add(1) - s.downloading[fileKey] = wg - s.downloadMu.Unlock() - - defer func() { - wg.Done() - s.downloadMu.Lock() - delete(s.downloading, fileKey) - s.downloadMu.Unlock() - }() - - localPath := s.getCacheFilePath(fileKey) - - // Create temp file for atomic write - tempPath := localPath + ".tmp" - - // Download from S3 - result, err := s.client.GetObject(ctx, &s3.GetObjectInput{ - Bucket: aws.String(s.config.Bucket), - Key: aws.String(fileKey), - }) - if err != nil { - return fmt.Errorf("failed to download file: %w", err) - } - defer result.Body.Close() - - // Create directory if needed - dir := filepath.Dir(tempPath) - if err := os.MkdirAll(dir, 0755); err != nil { - return err - } - - // Write to temp file - file, err := os.Create(tempPath) - if err != nil { - return err - } - - _, err = io.Copy(file, result.Body) - file.Close() - - if err != nil { - os.Remove(tempPath) - return err - } - - // Atomic rename - if err := os.Rename(tempPath, localPath); err != nil { - os.Remove(tempPath) - return err - } - - // Don't build block index immediately - build on demand to save memory - // Block indices will be built lazily when needed - - // Update cache map - s.cacheMu.Lock() - s.cacheMap[fileKey] = time.Now() - s.cacheMu.Unlock() - - log.Info().Str("file", fileKey).Str("path", localPath).Msg("Downloaded file from S3") - - return nil -} - -// Optimized parquet reading - -func (s *S3Source) readBlocksFromLocalFile(filePath string, blockNumbers []*big.Int) (map[uint64]rpc.GetFullBlockResult, error) { - // Acquire memory semaphore to limit concurrent memory usage - s.memorySem <- struct{}{} - defer func() { <-s.memorySem }() - - // Update access time for this file - fileKey := s.getFileKeyFromPath(filePath) - if fileKey != "" { - s.cacheMu.Lock() - s.cacheMap[fileKey] = time.Now() - s.cacheMu.Unlock() - } - - file, err := os.Open(filePath) - if err != nil { - return nil, err - } - defer file.Close() - - stat, err := file.Stat() - if err != nil { - return nil, err - } - - // Create block map for quick lookup - blockMap := make(map[uint64]bool) - for _, bn := range blockNumbers { - blockMap[bn.Uint64()] = true - } - - // Use optimized parquet reading - pFile, err := parquet.OpenFile(file, stat.Size()) - if err != nil { - return nil, err - } - - results := make(map[uint64]rpc.GetFullBlockResult) - foundBlocks := make(map[uint64]bool) - - // Read row groups - for rgIdx, rg := range pFile.RowGroups() { - // Check if we've found all blocks already - if len(foundBlocks) == len(blockMap) { - break - } - - // Check row group statistics to see if it contains our blocks - if !s.rowGroupContainsBlocks(rg, blockMap) { - continue - } - - // Use row-by-row reading to avoid loading entire row group into memory - if err := s.readRowGroupStreamingly(rg, blockMap, foundBlocks, results); err != nil { - log.Warn(). - Err(err). - Int("row_group", rgIdx). - Str("file", filePath). - Msg("Error reading row group") - continue - } - } - - return results, nil -} - -// readRowGroupStreamingly reads a row group row-by-row to minimize memory usage -func (s *S3Source) readRowGroupStreamingly(rg parquet.RowGroup, blockMap map[uint64]bool, foundBlocks map[uint64]bool, results map[uint64]rpc.GetFullBlockResult) error { - reader := parquet.NewRowGroupReader(rg) - - // Process rows one at a time instead of loading all into memory - for { - // Read single row - row := make([]parquet.Row, 1) - n, err := reader.ReadRows(row) - if err == io.EOF || n == 0 { - break - } - if err != nil { - return fmt.Errorf("failed to read row: %w", err) - } - - if len(row[0]) < 8 { - continue // Not enough columns - } - - // Extract block number first to check if we need this row - blockNum := row[0][1].Uint64() // block_number is second column - - // Skip if not in requested blocks or already found - if !blockMap[blockNum] || foundBlocks[blockNum] { - continue - } - - // Build ParquetBlockData from row - pd := ParquetBlockData{ - ChainId: row[0][0].Uint64(), - BlockNumber: blockNum, - BlockHash: row[0][2].String(), - BlockTimestamp: row[0][3].Int64(), - Block: row[0][4].ByteArray(), - Transactions: row[0][5].ByteArray(), - Logs: row[0][6].ByteArray(), - Traces: row[0][7].ByteArray(), - } - - // Parse block data - result, err := s.parseBlockData(pd) - if err != nil { - log.Warn().Err(err).Uint64("block", pd.BlockNumber).Msg("Failed to parse block data") - continue - } - - results[pd.BlockNumber] = result - foundBlocks[pd.BlockNumber] = true - - // Check if we've found all blocks - if len(foundBlocks) == len(blockMap) { - break - } - } - - return nil -} - -func (s *S3Source) rowGroupContainsBlocks(rg parquet.RowGroup, blockMap map[uint64]bool) bool { - // Get the block_number column chunk - for i, col := range rg.Schema().Fields() { - if col.Name() == "block_number" { - chunk := rg.ColumnChunks()[i] - ci, _ := chunk.ColumnIndex() - if ci != nil { - // Check min/max values - for j := 0; j < ci.NumPages(); j++ { - minVal := ci.MinValue(j) - maxVal := ci.MaxValue(j) - - if minVal.IsNull() || maxVal.IsNull() { - continue - } - - minBlock := minVal.Uint64() - maxBlock := maxVal.Uint64() - - // Check if any requested blocks fall in this range - for blockNum := range blockMap { - if blockNum >= minBlock && blockNum <= maxBlock { - return true - } - } - } - } - break - } - } - - // If no statistics, assume it might contain blocks - return true -} - -func (s *S3Source) parseBlockData(pd ParquetBlockData) (rpc.GetFullBlockResult, error) { - var block common.Block - if err := json.Unmarshal(pd.Block, &block); err != nil { - return rpc.GetFullBlockResult{}, err - } - - var transactions []common.Transaction - if len(pd.Transactions) > 0 { - if err := json.Unmarshal(pd.Transactions, &transactions); err != nil { - log.Warn().Err(err).Uint64("block", pd.BlockNumber).Msg("Failed to unmarshal transactions") - } - } - - var logs []common.Log - if len(pd.Logs) > 0 { - if err := json.Unmarshal(pd.Logs, &logs); err != nil { - log.Warn().Err(err).Uint64("block", pd.BlockNumber).Msg("Failed to unmarshal logs") - } - } - - var traces []common.Trace - if len(pd.Traces) > 0 { - if err := json.Unmarshal(pd.Traces, &traces); err != nil { - log.Warn().Err(err).Uint64("block", pd.BlockNumber).Msg("Failed to unmarshal traces") - } - } - - return rpc.GetFullBlockResult{ - BlockNumber: new(big.Int).SetUint64(pd.BlockNumber), - Data: common.BlockData{ - Block: block, - Transactions: transactions, - Logs: logs, - Traces: traces, - }, - Error: nil, - }, nil -} - -// Helper functions - -func (s *S3Source) extractBlockRangeFromKey(key string) (*big.Int, *big.Int) { - parts := strings.Split(key, "/") - if len(parts) == 0 { - return nil, nil - } - - filename := parts[len(parts)-1] - if !strings.HasPrefix(filename, "blocks_") || !strings.HasSuffix(filename, ".parquet") { - return nil, nil - } - - rangeStr := strings.TrimPrefix(filename, "blocks_") - rangeStr = strings.TrimSuffix(rangeStr, ".parquet") - - rangeParts := strings.Split(rangeStr, "_") - if len(rangeParts) != 2 { - return nil, nil - } - - startBlock, ok1 := new(big.Int).SetString(rangeParts[0], 10) - endBlock, ok2 := new(big.Int).SetString(rangeParts[1], 10) - if !ok1 || !ok2 { - return nil, nil - } - - return startBlock, endBlock -} - -func (s *S3Source) getCacheFilePath(fileKey string) string { - // Create a safe filename from the S3 key - hash := sha256.Sum256([]byte(fileKey)) - filename := hex.EncodeToString(hash[:])[:16] + ".parquet" - return filepath.Join(s.cacheDir, filename) -} - -func (s *S3Source) getFileKeyFromPath(filePath string) string { - // Reverse lookup - find the key for a given cache path - s.cacheMu.RLock() - defer s.cacheMu.RUnlock() - - for key := range s.cacheMap { - if s.getCacheFilePath(key) == filePath { - return key - } - } - return "" -} - -func (s *S3Source) isFileCached(filePath string) bool { - // First check if file exists at all - info, err := os.Stat(filePath) - if err != nil { - return false - } - - // Check if file has content - if info.Size() == 0 { - return false - } - - // Check if a temp file exists (indicating incomplete download) - tempPath := filePath + ".tmp" - if _, err := os.Stat(tempPath); err == nil { - // Temp file exists, download is incomplete - return false - } - - // File exists, has content, and no temp file - it's cached - return true -} - -// ensureFileInCacheMap ensures a file that exists on disk is tracked in the cache map -func (s *S3Source) ensureFileInCacheMap(fileKey string) { - s.cacheMu.Lock() - defer s.cacheMu.Unlock() - - // If not in cache map, add it with current time - if _, exists := s.cacheMap[fileKey]; !exists { - localPath := s.getCacheFilePath(fileKey) - if info, err := os.Stat(localPath); err == nil { - // Use file modification time if it's recent, otherwise use current time - modTime := info.ModTime() - if time.Since(modTime) < s.fileCacheTTL { - s.cacheMap[fileKey] = modTime - } else { - s.cacheMap[fileKey] = time.Now() - } - log.Trace(). - Str("file", fileKey). - Time("access_time", s.cacheMap[fileKey]). - Msg("Added existing file to cache map") - } - } -} - -func (s *S3Source) makeErrorResults(blockNumbers []*big.Int, err error) []rpc.GetFullBlockResult { - results := make([]rpc.GetFullBlockResult, len(blockNumbers)) - for i, bn := range blockNumbers { - results[i] = rpc.GetFullBlockResult{ - BlockNumber: bn, - Error: err, - } - } - return results -} - -func (s *S3Source) cleanupCache() { - ticker := time.NewTicker(s.cleanupInterval) - defer ticker.Stop() - - for range ticker.C { - s.cacheMu.Lock() - s.downloadMu.Lock() - s.activeUseMu.RLock() - - // Remove files not accessed within the TTL - cutoff := time.Now().Add(-s.fileCacheTTL) - protectedCount := 0 - expiredCount := 0 - - for fileKey, accessTime := range s.cacheMap { - // Skip files that are currently being downloaded - if _, downloading := s.downloading[fileKey]; downloading { - protectedCount++ - continue - } - - // Skip files that are actively being used - if count, active := s.activeUse[fileKey]; active && count > 0 { - protectedCount++ - // Only log at trace level to reduce noise - log.Trace(). - Str("file", fileKey). - Int("ref_count", count). - Msg("Skipping actively used file in cleanup") - continue - } - - if accessTime.Before(cutoff) { - expiredCount++ - cacheFile := s.getCacheFilePath(fileKey) - log.Debug(). - Str("file", fileKey). - Str("path", cacheFile). - Time("last_access", accessTime). - Time("cutoff", cutoff). - Msg("Removing expired file from cache") - os.Remove(cacheFile) - delete(s.cacheMap, fileKey) - } - } - - s.activeUseMu.RUnlock() - s.downloadMu.Unlock() - s.cacheMu.Unlock() - - // Only log if something interesting happened (files were deleted) - if expiredCount > 0 { - log.Debug(). - Int("protected", protectedCount). - Int("expired", expiredCount). - Int("total_cached", len(s.cacheMap)). - Msg("Cache cleanup cycle completed - removed expired files") - } else if protectedCount > 0 { - // Use trace level for routine cleanup cycles with no deletions - log.Trace(). - Int("protected", protectedCount). - Int("total_cached", len(s.cacheMap)). - Msg("Cache cleanup cycle completed - no files expired") - } - - // Also check disk usage and remove oldest files if needed - s.enforceMaxCacheSize() - } -} - -func (s *S3Source) enforceMaxCacheSize() { - maxSize := s.maxCacheSize - - var totalSize int64 - var files []struct { - path string - key string - size int64 - access time.Time - } - - s.cacheMu.RLock() - for key, accessTime := range s.cacheMap { - path := s.getCacheFilePath(key) - if info, err := os.Stat(path); err == nil { - totalSize += info.Size() - files = append(files, struct { - path string - key string - size int64 - access time.Time - }{path, key, info.Size(), accessTime}) - } - } - s.cacheMu.RUnlock() - - if totalSize <= maxSize { - return - } - - log.Debug(). - Int64("total_size_mb", totalSize/(1024*1024)). - Int64("max_size_mb", maxSize/(1024*1024)). - Int("file_count", len(files)). - Msg("Cache size exceeded, removing old files") - - // Sort by access time (oldest first) - sort.Slice(files, func(i, j int) bool { - return files[i].access.Before(files[j].access) - }) - - // Remove oldest files until under limit - s.cacheMu.Lock() - s.downloadMu.Lock() - s.activeUseMu.RLock() - defer s.activeUseMu.RUnlock() - defer s.downloadMu.Unlock() - defer s.cacheMu.Unlock() - - for _, f := range files { - if totalSize <= maxSize { - break - } - - // Skip files that are currently being downloaded - if _, downloading := s.downloading[f.key]; downloading { - continue - } - - // Skip files that are actively being used - if count, active := s.activeUse[f.key]; active && count > 0 { - continue - } - - os.Remove(f.path) - delete(s.cacheMap, f.key) - totalSize -= f.size - } -} diff --git a/internal/source/source.go b/internal/source/source.go deleted file mode 100644 index 2b9ef85c..00000000 --- a/internal/source/source.go +++ /dev/null @@ -1,14 +0,0 @@ -package source - -import ( - "context" - "math/big" - - "github.com/thirdweb-dev/indexer/internal/rpc" -) - -type ISource interface { - GetFullBlocks(ctx context.Context, blockNumbers []*big.Int) []rpc.GetFullBlockResult - GetSupportedBlockRange(ctx context.Context) (minBlockNumber *big.Int, maxBlockNumber *big.Int, err error) - Close() -} diff --git a/internal/source/staging.go b/internal/source/staging.go deleted file mode 100644 index 0370163c..00000000 --- a/internal/source/staging.go +++ /dev/null @@ -1,65 +0,0 @@ -package source - -import ( - "context" - "fmt" - "math/big" - - "github.com/thirdweb-dev/indexer/internal/rpc" - "github.com/thirdweb-dev/indexer/internal/storage" -) - -type StagingSource struct { - chainId *big.Int - storage storage.IStagingStorage -} - -func NewStagingSource(chainId *big.Int, storage storage.IStagingStorage) (*StagingSource, error) { - return &StagingSource{ - chainId: chainId, - storage: storage, - }, nil -} - -func (s *StagingSource) GetFullBlocks(ctx context.Context, blockNumbers []*big.Int) []rpc.GetFullBlockResult { - if len(blockNumbers) == 0 { - return nil - } - - blockData, err := s.storage.GetStagingData(storage.QueryFilter{BlockNumbers: blockNumbers, ChainId: s.chainId}) - if err != nil { - return nil - } - - results := make([]rpc.GetFullBlockResult, 0, len(blockData)) - resultMap := make(map[uint64]rpc.GetFullBlockResult) - for _, data := range blockData { - resultMap[data.Block.Number.Uint64()] = rpc.GetFullBlockResult{ - BlockNumber: data.Block.Number, - Data: data, - Error: nil, - } - } - - for _, bn := range blockNumbers { - if result, ok := resultMap[bn.Uint64()]; ok { - results = append(results, result) - } else { - results = append(results, rpc.GetFullBlockResult{ - BlockNumber: bn, - Error: fmt.Errorf("block %s not found", bn.String()), - }) - } - } - - return results -} - -func (s *StagingSource) GetSupportedBlockRange(ctx context.Context) (minBlockNumber *big.Int, maxBlockNumber *big.Int, err error) { - return s.storage.GetStagingDataBlockRange(s.chainId) -} - -func (s *StagingSource) Close() { - // Clean up cache directory - s.storage.Close() -} diff --git a/internal/storage/badger.go b/internal/storage/badger.go deleted file mode 100644 index 94c0d1bb..00000000 --- a/internal/storage/badger.go +++ /dev/null @@ -1,645 +0,0 @@ -package storage - -import ( - "bytes" - "encoding/gob" - "fmt" - "math/big" - "os" - "path/filepath" - "sort" - "strings" - "sync" - "time" - - "github.com/dgraph-io/badger/v4" - "github.com/dgraph-io/badger/v4/options" - "github.com/rs/zerolog/log" - config "github.com/thirdweb-dev/indexer/configs" - "github.com/thirdweb-dev/indexer/internal/common" -) - -type BadgerConnector struct { - db *badger.DB - mu sync.RWMutex - gcTicker *time.Ticker - stopGC chan struct{} - - // Configuration - stagingDataTTL time.Duration // TTL for staging data entries - gcInterval time.Duration // Interval for running garbage collection - cacheRefreshInterval time.Duration // Interval for refreshing range cache - cacheStalenessTimeout time.Duration // Timeout before considering cache entry stale - - // In-memory block range cache - // NOTE: Staging data has a TTL. The cache is refreshed periodically - // to detect expired entries and update min/max ranges accordingly. - // Badger doesn't provide expiry notifications, so we rely on periodic scanning. - rangeCache map[string]*blockRange // chainId -> range - rangeCacheMu sync.RWMutex - rangeUpdateChan chan string // channel for triggering background updates - stopRangeUpdate chan struct{} -} - -type blockRange struct { - min *big.Int - max *big.Int - lastUpdated time.Time -} - -func NewBadgerConnector(cfg *config.BadgerConfig) (*BadgerConnector, error) { - path := cfg.Path - if path == "" { - path = filepath.Join(os.TempDir(), "insight-staging-badger") - } - opts := badger.DefaultOptions(path) - - opts.ValueLogFileSize = 1 * 1024 * 1024 * 1024 // 1GB - opts.BaseTableSize = 128 * 1024 * 1024 // 128MB - opts.BaseLevelSize = 128 * 1024 * 1024 // 128MB - opts.LevelSizeMultiplier = 2 - opts.NumMemtables = 4 // 512MB - opts.MemTableSize = opts.BaseTableSize // 128MB per memtable - opts.NumLevelZeroTables = 8 - opts.NumLevelZeroTablesStall = 24 - opts.NumCompactors = 2 // More compactors for parallel compaction - opts.CompactL0OnClose = true // Compact L0 tables on close - opts.ValueLogMaxEntries = 1000000 // More entries per value log - opts.ValueThreshold = 1024 // Store values > 1024 bytes in value log - opts.IndexCacheSize = 128 * 1024 * 1024 // 128MB index cache - opts.BlockCacheSize = 128 * 1024 * 1024 // 128MB block cache - opts.SyncWrites = false - opts.DetectConflicts = false - opts.Compression = options.ZSTD - - opts.Logger = nil // Disable badger's internal logging - - db, err := badger.Open(opts) - if err != nil { - return nil, fmt.Errorf("failed to open badger db: %w", err) - } - - bc := &BadgerConnector{ - db: db, - stopGC: make(chan struct{}), - rangeCache: make(map[string]*blockRange), - rangeUpdateChan: make(chan string, 5), - stopRangeUpdate: make(chan struct{}), - stagingDataTTL: 10 * time.Minute, - gcInterval: 60 * time.Second, - cacheRefreshInterval: 60 * time.Second, - cacheStalenessTimeout: 120 * time.Second, - } - - // Start GC routine - bc.gcTicker = time.NewTicker(bc.gcInterval) - go bc.runGC() - - // Start range cache update routine - go bc.runRangeCacheUpdater() - - return bc, nil -} - -func (bc *BadgerConnector) runGC() { - for { - select { - case <-bc.gcTicker.C: - err := bc.db.RunValueLogGC(0.5) - if err != nil && err != badger.ErrNoRewrite { - log.Debug().Err(err).Msg("BadgerDB GC error") - } - case <-bc.stopGC: - return - } - } -} - -// runRangeCacheUpdater runs in the background to validate cache entries -func (bc *BadgerConnector) runRangeCacheUpdater() { - ticker := time.NewTicker(bc.cacheRefreshInterval) - defer ticker.Stop() - - for { - select { - case chainIdStr := <-bc.rangeUpdateChan: - bc.updateRangeForChain(chainIdStr) - - case <-ticker.C: - bc.refreshStaleRanges() - - case <-bc.stopRangeUpdate: - return - } - } -} - -func (bc *BadgerConnector) updateRangeForChain(chainIdStr string) { - chainId, ok := new(big.Int).SetString(chainIdStr, 10) - if !ok { - return - } - - // Scan the actual data to find min/max - var minBlock, maxBlock *big.Int - prefix := blockKeyRange(chainId) - - err := bc.db.View(func(txn *badger.Txn) error { - opts := badger.DefaultIteratorOptions - opts.Prefix = []byte(prefix) - it := txn.NewIterator(opts) - defer it.Close() - - for it.Rewind(); it.Valid(); it.Next() { - key := string(it.Item().Key()) - parts := strings.Split(key, ":") - if len(parts) != 3 { - continue - } - - blockNum, ok := new(big.Int).SetString(parts[2], 10) - if !ok { - continue - } - - if minBlock == nil || blockNum.Cmp(minBlock) < 0 { - minBlock = blockNum - } - if maxBlock == nil || blockNum.Cmp(maxBlock) > 0 { - maxBlock = blockNum - } - } - return nil - }) - - if err != nil { - log.Error().Err(err).Str("chainId", chainIdStr).Msg("Failed to update range cache") - return - } - - // Update cache - bc.rangeCacheMu.Lock() - if minBlock != nil && maxBlock != nil { - bc.rangeCache[chainIdStr] = &blockRange{ - min: minBlock, - max: maxBlock, - lastUpdated: time.Now(), - } - } else { - // No data, remove from cache - delete(bc.rangeCache, chainIdStr) - } - bc.rangeCacheMu.Unlock() -} - -func (bc *BadgerConnector) refreshStaleRanges() { - bc.rangeCacheMu.RLock() - staleChains := []string{} - now := time.Now() - for chainId, r := range bc.rangeCache { - if now.Sub(r.lastUpdated) > bc.cacheStalenessTimeout { - staleChains = append(staleChains, chainId) - } - } - bc.rangeCacheMu.RUnlock() - - // Update stale entries - for _, chainId := range staleChains { - select { - case bc.rangeUpdateChan <- chainId: - // Queued for update - default: - // Channel full, skip this update - } - } -} - -func (bc *BadgerConnector) Close() error { - if bc.gcTicker != nil { - bc.gcTicker.Stop() - close(bc.stopGC) - } - select { - case <-bc.stopRangeUpdate: - default: - close(bc.stopRangeUpdate) - } - return bc.db.Close() -} - -// Key construction helpers -func blockKey(chainId *big.Int, blockNumber *big.Int) []byte { - return []byte(fmt.Sprintf("blockdata:%s:%s", chainId.String(), blockNumber.String())) -} - -func blockKeyRange(chainId *big.Int) []byte { - return []byte(fmt.Sprintf("blockdata:%s:", chainId.String())) -} - -func lastReorgKey(chainId *big.Int) []byte { - return []byte(fmt.Sprintf("reorg:%s", chainId.String())) -} - -func lastPublishedKey(chainId *big.Int) []byte { - return []byte(fmt.Sprintf("publish:%s", chainId.String())) -} - -func lastCommittedKey(chainId *big.Int) []byte { - return []byte(fmt.Sprintf("commit:%s", chainId.String())) -} - -func (bc *BadgerConnector) GetLastReorgCheckedBlockNumber(chainId *big.Int) (*big.Int, error) { - bc.mu.RLock() - defer bc.mu.RUnlock() - - var blockNumber *big.Int - err := bc.db.View(func(txn *badger.Txn) error { - item, err := txn.Get(lastReorgKey(chainId)) - if err == badger.ErrKeyNotFound { - return nil - } - if err != nil { - return err - } - - return item.Value(func(val []byte) error { - blockNumber = new(big.Int).SetBytes(val) - return nil - }) - }) - - if blockNumber == nil { - return big.NewInt(0), nil - } - return blockNumber, err -} - -func (bc *BadgerConnector) SetLastReorgCheckedBlockNumber(chainId *big.Int, blockNumber *big.Int) error { - bc.mu.Lock() - defer bc.mu.Unlock() - - return bc.db.Update(func(txn *badger.Txn) error { - return txn.Set(lastReorgKey(chainId), blockNumber.Bytes()) - }) -} - -// IStagingStorage implementation -func (bc *BadgerConnector) InsertStagingData(data []common.BlockData) error { - bc.mu.Lock() - defer bc.mu.Unlock() - - // Track min/max blocks per chain for cache update - chainRanges := make(map[string]struct { - min *big.Int - max *big.Int - }) - - err := bc.db.Update(func(txn *badger.Txn) error { - // Insert block data and track ranges - for _, blockData := range data { - key := blockKey(blockData.Block.ChainId, blockData.Block.Number) - - var buf bytes.Buffer - if err := gob.NewEncoder(&buf).Encode(blockData); err != nil { - return err - } - - // Set with configured TTL for staging data - entry := badger.NewEntry(key, buf.Bytes()).WithTTL(bc.stagingDataTTL) - if err := txn.SetEntry(entry); err != nil { - return err - } - - // Track min/max for this chain - chainStr := blockData.Block.ChainId.String() - if r, exists := chainRanges[chainStr]; exists { - if blockData.Block.Number.Cmp(r.min) < 0 { - chainRanges[chainStr] = struct { - min *big.Int - max *big.Int - }{blockData.Block.Number, r.max} - } - if blockData.Block.Number.Cmp(r.max) > 0 { - chainRanges[chainStr] = struct { - min *big.Int - max *big.Int - }{r.min, blockData.Block.Number} - } - } else { - chainRanges[chainStr] = struct { - min *big.Int - max *big.Int - }{blockData.Block.Number, blockData.Block.Number} - } - } - - return nil - }) - - if err != nil { - return err - } - - // Update in-memory cache - bc.rangeCacheMu.Lock() - defer bc.rangeCacheMu.Unlock() - - for chainStr, newRange := range chainRanges { - existing, exists := bc.rangeCache[chainStr] - if exists { - // Update existing range - if newRange.min.Cmp(existing.min) < 0 { - existing.min = newRange.min - } - if newRange.max.Cmp(existing.max) > 0 { - existing.max = newRange.max - } - existing.lastUpdated = time.Now() - } else { - // Create new range entry - bc.rangeCache[chainStr] = &blockRange{ - min: newRange.min, - max: newRange.max, - lastUpdated: time.Now(), - } - // Trigger background update to ensure accuracy - select { - case bc.rangeUpdateChan <- chainStr: - default: - // Channel full, will be updated in next periodic scan - } - } - } - - return nil -} - -func (bc *BadgerConnector) GetStagingData(qf QueryFilter) ([]common.BlockData, error) { - bc.mu.RLock() - defer bc.mu.RUnlock() - - var results []common.BlockData - - if len(qf.BlockNumbers) > 0 { - // Fetch specific blocks - err := bc.db.View(func(txn *badger.Txn) error { - for _, blockNum := range qf.BlockNumbers { - key := blockKey(qf.ChainId, blockNum) - item, err := txn.Get(key) - if err == badger.ErrKeyNotFound { - continue - } - if err != nil { - return err - } - - err = item.Value(func(val []byte) error { - var blockData common.BlockData - if err := gob.NewDecoder(bytes.NewReader(val)).Decode(&blockData); err != nil { - return err - } - results = append(results, blockData) - return nil - }) - if err != nil { - return err - } - } - return nil - }) - return results, err - } - - // Range query - prefix := blockKeyRange(qf.ChainId) - - err := bc.db.View(func(txn *badger.Txn) error { - opts := badger.DefaultIteratorOptions - opts.Prefix = []byte(prefix) - it := txn.NewIterator(opts) - defer it.Close() - - count := 0 - for it.Rewind(); it.Valid(); it.Next() { - if qf.Offset > 0 && count < qf.Offset { - count++ - continue - } - - item := it.Item() - err := item.Value(func(val []byte) error { - var blockData common.BlockData - if err := gob.NewDecoder(bytes.NewReader(val)).Decode(&blockData); err != nil { - return err - } - - // Apply filters - if qf.StartBlock != nil && blockData.Block.Number.Cmp(qf.StartBlock) < 0 { - return nil - } - if qf.EndBlock != nil && blockData.Block.Number.Cmp(qf.EndBlock) > 0 { - return nil - } - - results = append(results, blockData) - return nil - }) - if err != nil { - return err - } - - count++ - if qf.Limit > 0 && len(results) >= qf.Limit { - break - } - } - return nil - }) - - // Sort by block number - sort.Slice(results, func(i, j int) bool { - return results[i].Block.Number.Cmp(results[j].Block.Number) < 0 - }) - - return results, err -} - -func (bc *BadgerConnector) GetLastPublishedBlockNumber(chainId *big.Int) (*big.Int, error) { - bc.mu.RLock() - defer bc.mu.RUnlock() - - var blockNumber *big.Int - err := bc.db.View(func(txn *badger.Txn) error { - item, err := txn.Get(lastPublishedKey(chainId)) - if err == badger.ErrKeyNotFound { - return nil - } - if err != nil { - return err - } - - return item.Value(func(val []byte) error { - blockNumber = new(big.Int).SetBytes(val) - return nil - }) - }) - - if blockNumber == nil { - return big.NewInt(0), nil - } - return blockNumber, err -} - -func (bc *BadgerConnector) SetLastPublishedBlockNumber(chainId *big.Int, blockNumber *big.Int) error { - bc.mu.Lock() - defer bc.mu.Unlock() - - return bc.db.Update(func(txn *badger.Txn) error { - return txn.Set(lastPublishedKey(chainId), blockNumber.Bytes()) - }) -} - -func (bc *BadgerConnector) GetLastCommittedBlockNumber(chainId *big.Int) (*big.Int, error) { - bc.mu.RLock() - defer bc.mu.RUnlock() - - var blockNumber *big.Int - err := bc.db.View(func(txn *badger.Txn) error { - item, err := txn.Get(lastCommittedKey(chainId)) - if err == badger.ErrKeyNotFound { - return nil - } - if err != nil { - return err - } - - return item.Value(func(val []byte) error { - blockNumber = new(big.Int).SetBytes(val) - return nil - }) - }) - - if blockNumber == nil { - return big.NewInt(0), nil - } - return blockNumber, err -} - -func (bc *BadgerConnector) SetLastCommittedBlockNumber(chainId *big.Int, blockNumber *big.Int) error { - bc.mu.Lock() - defer bc.mu.Unlock() - - return bc.db.Update(func(txn *badger.Txn) error { - return txn.Set(lastCommittedKey(chainId), blockNumber.Bytes()) - }) -} - -func (bc *BadgerConnector) DeleteStagingDataOlderThan(chainId *big.Int, blockNumber *big.Int) error { - bc.mu.Lock() - defer bc.mu.Unlock() - - prefix := blockKeyRange(chainId) - var deletedSome bool - - err := bc.db.Update(func(txn *badger.Txn) error { - opts := badger.DefaultIteratorOptions - opts.Prefix = []byte(prefix) - it := txn.NewIterator(opts) - defer it.Close() - - var keysToDelete [][]byte - - for it.Rewind(); it.Valid(); it.Next() { - key := string(it.Item().Key()) - parts := strings.Split(key, ":") - if len(parts) != 3 { - continue - } - - blockNum, ok := new(big.Int).SetString(parts[2], 10) - if !ok { - continue - } - - if blockNum.Cmp(blockNumber) <= 0 { - keysToDelete = append(keysToDelete, it.Item().KeyCopy(nil)) - } - } - - for _, key := range keysToDelete { - if err := txn.Delete(key); err != nil { - return err - } - deletedSome = true - } - - return nil - }) - - if err != nil { - return err - } - - // Update cache if we deleted something - if deletedSome { - chainStr := chainId.String() - bc.rangeCacheMu.Lock() - if entry, exists := bc.rangeCache[chainStr]; exists { - // Check if we need to update min - if entry.min.Cmp(blockNumber) <= 0 { - // The new minimum must be blockNumber + 1 or higher - newMin := new(big.Int).Add(blockNumber, big.NewInt(1)) - // Only update if the new min is still <= max - if newMin.Cmp(entry.max) <= 0 { - entry.min = newMin - entry.lastUpdated = time.Now() - } else { - // No blocks remaining, remove from cache - delete(bc.rangeCache, chainStr) - } - } - } - bc.rangeCacheMu.Unlock() - - // Trigger background update to ensure accuracy - select { - case bc.rangeUpdateChan <- chainStr: - default: - // Channel full, will be updated in next periodic scan - } - } - - return nil -} - -// GetStagingDataBlockRange returns the minimum and maximum block numbers stored for a given chain -func (bc *BadgerConnector) GetStagingDataBlockRange(chainId *big.Int) (*big.Int, *big.Int, error) { - chainStr := chainId.String() - - // Check cache - bc.rangeCacheMu.RLock() - if entry, exists := bc.rangeCache[chainStr]; exists { - // Always return cached values - they're updated live during insert/delete - min := new(big.Int).Set(entry.min) - max := new(big.Int).Set(entry.max) - bc.rangeCacheMu.RUnlock() - return min, max, nil - } - bc.rangeCacheMu.RUnlock() - - // Cache miss - do synchronous update to populate cache - bc.updateRangeForChain(chainStr) - - // Return newly cached value - bc.rangeCacheMu.RLock() - defer bc.rangeCacheMu.RUnlock() - - if entry, exists := bc.rangeCache[chainStr]; exists { - min := new(big.Int).Set(entry.min) - max := new(big.Int).Set(entry.max) - return min, max, nil - } - - // No data found - return nil, nil, nil -} diff --git a/internal/storage/block_buffer.go b/internal/storage/block_buffer.go deleted file mode 100644 index 90c6ed81..00000000 --- a/internal/storage/block_buffer.go +++ /dev/null @@ -1,282 +0,0 @@ -package storage - -import ( - "bytes" - "encoding/gob" - "fmt" - "math/big" - "sync" - - "github.com/rs/zerolog/log" - "github.com/thirdweb-dev/indexer/internal/common" -) - -// BlockBuffer manages buffering of block data with size and count limits -type BlockBuffer struct { - mu sync.RWMutex - data []common.BlockData - sizeBytes int64 - maxSizeBytes int64 - maxBlocks int -} - -// IBlockBuffer defines the interface for block buffer implementations -type IBlockBuffer interface { - Add(blocks []common.BlockData) bool - Flush() []common.BlockData - ShouldFlush() bool - Size() (int64, int) - IsEmpty() bool - GetData() []common.BlockData - GetBlocksInRange(chainId *big.Int, startBlock, endBlock *big.Int) []common.BlockData - GetBlockByNumber(chainId *big.Int, blockNumber *big.Int) *common.BlockData - GetMaxBlockNumber(chainId *big.Int) *big.Int - Clear() - Stats() BufferStats - Close() error -} - -// NewBlockBuffer creates a new in-memory block buffer -func NewBlockBuffer(maxSizeMB int64, maxBlocks int) *BlockBuffer { - return &BlockBuffer{ - data: make([]common.BlockData, 0), - maxSizeBytes: maxSizeMB * 1024 * 1024, - maxBlocks: maxBlocks, - } -} - -// NewBlockBufferWithBadger creates a new Badger-backed block buffer for better memory management -// This uses ephemeral storage with optimized settings for caching -func NewBlockBufferWithBadger(maxSizeMB int64, maxBlocks int) (IBlockBuffer, error) { - return NewBadgerBlockBuffer(maxSizeMB, maxBlocks) -} - -// Add adds blocks to the buffer and returns true if flush is needed -func (b *BlockBuffer) Add(blocks []common.BlockData) bool { - if len(blocks) == 0 { - return false - } - - b.mu.Lock() - defer b.mu.Unlock() - - // Calculate actual size by marshaling the entire batch once - // This gives us accurate size with minimal overhead since we marshal once per Add call - var actualSize int64 - var buf bytes.Buffer - enc := gob.NewEncoder(&buf) - - // Marshal all blocks to get actual serialized size - if err := enc.Encode(blocks); err != nil { - // If encoding fails, use estimation as fallback - log.Warn().Err(err).Msg("Failed to marshal blocks for size calculation, buffer size is not reported correctly") - } else { - actualSize = int64(buf.Len()) - } - - // Add to buffer - b.data = append(b.data, blocks...) - b.sizeBytes += actualSize - - log.Debug(). - Int("block_count", len(blocks)). - Int64("actual_size_bytes", actualSize). - Int64("total_size_bytes", b.sizeBytes). - Int("total_blocks", len(b.data)). - Msg("Added blocks to buffer") - - // Check if flush is needed - return b.shouldFlushLocked() -} - -// Flush removes all data from the buffer and returns it -func (b *BlockBuffer) Flush() []common.BlockData { - b.mu.Lock() - defer b.mu.Unlock() - - if len(b.data) == 0 { - return nil - } - - // Take ownership of data - data := b.data - b.data = make([]common.BlockData, 0) - b.sizeBytes = 0 - - log.Info(). - Int("block_count", len(data)). - Msg("Flushing buffer") - - return data -} - -// ShouldFlush checks if the buffer should be flushed based on configured thresholds -func (b *BlockBuffer) ShouldFlush() bool { - b.mu.RLock() - defer b.mu.RUnlock() - return b.shouldFlushLocked() -} - -// Size returns the current buffer size in bytes and block count -func (b *BlockBuffer) Size() (int64, int) { - b.mu.RLock() - defer b.mu.RUnlock() - return b.sizeBytes, len(b.data) -} - -// IsEmpty returns true if the buffer is empty -func (b *BlockBuffer) IsEmpty() bool { - b.mu.RLock() - defer b.mu.RUnlock() - return len(b.data) == 0 -} - -// GetData returns a copy of the current buffer data -func (b *BlockBuffer) GetData() []common.BlockData { - b.mu.RLock() - defer b.mu.RUnlock() - - result := make([]common.BlockData, len(b.data)) - copy(result, b.data) - return result -} - -// GetBlocksInRange returns blocks from the buffer that fall within the given range -func (b *BlockBuffer) GetBlocksInRange(chainId *big.Int, startBlock, endBlock *big.Int) []common.BlockData { - b.mu.RLock() - defer b.mu.RUnlock() - - var result []common.BlockData - for _, block := range b.data { - if block.Block.ChainId.Cmp(chainId) == 0 { - blockNum := block.Block.Number - if blockNum.Cmp(startBlock) >= 0 && blockNum.Cmp(endBlock) <= 0 { - result = append(result, block) - } - } - } - return result -} - -// GetBlockByNumber returns a specific block from the buffer if it exists -func (b *BlockBuffer) GetBlockByNumber(chainId *big.Int, blockNumber *big.Int) *common.BlockData { - b.mu.RLock() - defer b.mu.RUnlock() - - for _, block := range b.data { - if block.Block.ChainId.Cmp(chainId) == 0 && block.Block.Number.Cmp(blockNumber) == 0 { - blockCopy := block - return &blockCopy - } - } - return nil -} - -// GetMaxBlockNumber returns the maximum block number for a chain in the buffer -func (b *BlockBuffer) GetMaxBlockNumber(chainId *big.Int) *big.Int { - b.mu.RLock() - defer b.mu.RUnlock() - - var maxBlock *big.Int - for _, block := range b.data { - if block.Block.ChainId.Cmp(chainId) == 0 { - if maxBlock == nil || block.Block.Number.Cmp(maxBlock) > 0 { - maxBlock = new(big.Int).Set(block.Block.Number) - } - } - } - return maxBlock -} - -// Clear empties the buffer without returning data -func (b *BlockBuffer) Clear() { - b.mu.Lock() - defer b.mu.Unlock() - - b.data = make([]common.BlockData, 0) - b.sizeBytes = 0 -} - -// Stats returns statistics about the buffer -func (b *BlockBuffer) Stats() BufferStats { - b.mu.RLock() - defer b.mu.RUnlock() - - stats := BufferStats{ - BlockCount: len(b.data), - SizeBytes: b.sizeBytes, - ChainCount: 0, - ChainStats: make(map[uint64]ChainStats), - } - - // Calculate per-chain statistics - for _, block := range b.data { - chainId := block.Block.ChainId.Uint64() - chainStat := stats.ChainStats[chainId] - - if chainStat.MinBlock == nil || block.Block.Number.Cmp(chainStat.MinBlock) < 0 { - chainStat.MinBlock = new(big.Int).Set(block.Block.Number) - } - if chainStat.MaxBlock == nil || block.Block.Number.Cmp(chainStat.MaxBlock) > 0 { - chainStat.MaxBlock = new(big.Int).Set(block.Block.Number) - } - chainStat.BlockCount++ - - stats.ChainStats[chainId] = chainStat - } - - stats.ChainCount = len(stats.ChainStats) - return stats -} - -// Private methods - -func (b *BlockBuffer) shouldFlushLocked() bool { - // Check size limit - if b.maxSizeBytes > 0 && b.sizeBytes >= b.maxSizeBytes { - return true - } - - // Check block count limit - if b.maxBlocks > 0 && len(b.data) >= b.maxBlocks { - return true - } - - return false -} - -// BufferStats contains statistics about the buffer -type BufferStats struct { - BlockCount int - SizeBytes int64 - ChainCount int - ChainStats map[uint64]ChainStats -} - -// ChainStats contains per-chain statistics -type ChainStats struct { - BlockCount int - MinBlock *big.Int - MaxBlock *big.Int -} - -// String returns a string representation of buffer stats -func (s BufferStats) String() string { - return fmt.Sprintf("BufferStats{blocks=%d, size=%dMB, chains=%d}", - s.BlockCount, s.SizeBytes/(1024*1024), s.ChainCount) -} - -// Close closes the buffer (no-op for in-memory buffer) -func (b *BlockBuffer) Close() error { - b.mu.Lock() - defer b.mu.Unlock() - - // Clear the buffer to free memory - b.data = nil - b.sizeBytes = 0 - - return nil -} - -// Ensure BlockBuffer implements IBlockBuffer interface -var _ IBlockBuffer = (*BlockBuffer)(nil) diff --git a/internal/storage/block_buffer_badger.go b/internal/storage/block_buffer_badger.go deleted file mode 100644 index 5c883a50..00000000 --- a/internal/storage/block_buffer_badger.go +++ /dev/null @@ -1,477 +0,0 @@ -package storage - -import ( - "bytes" - "encoding/gob" - "fmt" - "math/big" - "os" - "sync" - "time" - - "github.com/dgraph-io/badger/v4" - "github.com/dgraph-io/badger/v4/options" - "github.com/rs/zerolog/log" - "github.com/thirdweb-dev/indexer/internal/common" -) - -// BadgerBlockBuffer manages buffering of block data using Badger as an ephemeral cache -type BadgerBlockBuffer struct { - mu sync.RWMutex - db *badger.DB - tempDir string - maxSizeBytes int64 - maxBlocks int - blockCount int - gcTicker *time.Ticker - stopGC chan struct{} - - // Chain metadata cache for O(1) lookups - chainMetadata map[uint64]*ChainMetadata -} - -// ChainMetadata tracks per-chain statistics for fast lookups -type ChainMetadata struct { - MinBlock *big.Int - MaxBlock *big.Int - BlockCount int -} - -// NewBadgerBlockBuffer creates a new Badger-backed block buffer with ephemeral storage -func NewBadgerBlockBuffer(maxSizeMB int64, maxBlocks int) (*BadgerBlockBuffer, error) { - // Create temporary directory for ephemeral storage - tempDir, err := os.MkdirTemp("", "blockbuffer-badger-*") - if err != nil { - return nil, fmt.Errorf("failed to create temp dir: %w", err) - } - - // Configure Badger with optimized settings for ephemeral cache - opts := badger.DefaultOptions(tempDir) - - // Memory optimization settings (similar to badger.go but tuned for ephemeral use) - opts.ValueLogFileSize = 256 * 1024 * 1024 // 256MB (smaller for cache) - opts.BaseTableSize = 64 * 1024 * 1024 // 64MB - opts.BaseLevelSize = 64 * 1024 * 1024 // 64MB - opts.LevelSizeMultiplier = 10 // Aggressive growth - opts.NumMemtables = 5 // ~320MB - opts.MemTableSize = opts.BaseTableSize // 64MB per memtable - opts.NumLevelZeroTables = 5 - opts.NumLevelZeroTablesStall = 10 - opts.SyncWrites = false // No durability needed for cache - opts.DetectConflicts = false // No ACID needed - opts.NumCompactors = 2 // Less compactors for cache - opts.CompactL0OnClose = false // Don't compact on close (ephemeral) - opts.ValueLogMaxEntries = 100000 // Smaller for cache - opts.ValueThreshold = 1024 // Store values > 512 bytes in value log - opts.IndexCacheSize = 128 * 1024 * 1024 // 128MB index cache - opts.BlockCacheSize = 64 * 1024 * 1024 // 64MB block cache - opts.Compression = options.ZSTD - opts.Logger = nil // Disable badger's internal logging - - // Ephemeral-specific settings - opts.InMemory = false // Use disk but in temp directory - opts.ReadOnly = false - opts.MetricsEnabled = false - - db, err := badger.Open(opts) - if err != nil { - os.RemoveAll(tempDir) - return nil, fmt.Errorf("failed to open badger db: %w", err) - } - - b := &BadgerBlockBuffer{ - db: db, - tempDir: tempDir, - maxSizeBytes: maxSizeMB * 1024 * 1024, - maxBlocks: maxBlocks, - stopGC: make(chan struct{}), - chainMetadata: make(map[uint64]*ChainMetadata), - } - - // Start GC routine with faster interval for cache - b.gcTicker = time.NewTicker(30 * time.Second) - go b.runGC() - - return b, nil -} - -// Add adds blocks to the buffer and returns true if flush is needed -func (b *BadgerBlockBuffer) Add(blocks []common.BlockData) bool { - if len(blocks) == 0 { - return false - } - - b.mu.Lock() - defer b.mu.Unlock() - - err := b.db.Update(func(txn *badger.Txn) error { - for _, block := range blocks { - key := b.makeKey(block.Block.ChainId, block.Block.Number) - - var buf bytes.Buffer - if err := gob.NewEncoder(&buf).Encode(block); err != nil { - return err - } - - if err := txn.Set(key, buf.Bytes()); err != nil { - return err - } - } - return nil - }) - - if err != nil { - log.Error().Err(err).Msg("Failed to add blocks to badger buffer") - return false - } - - // Update counters - b.blockCount += len(blocks) - - // Update chain metadata for O(1) lookups - for _, block := range blocks { - chainId := block.Block.ChainId.Uint64() - meta, exists := b.chainMetadata[chainId] - if !exists { - meta = &ChainMetadata{ - MinBlock: new(big.Int).Set(block.Block.Number), - MaxBlock: new(big.Int).Set(block.Block.Number), - BlockCount: 1, - } - b.chainMetadata[chainId] = meta - } else { - if block.Block.Number.Cmp(meta.MinBlock) < 0 { - meta.MinBlock = new(big.Int).Set(block.Block.Number) - } - if block.Block.Number.Cmp(meta.MaxBlock) > 0 { - meta.MaxBlock = new(big.Int).Set(block.Block.Number) - } - meta.BlockCount++ - } - } - - // Check if flush is needed - return b.shouldFlushLocked() -} - -// Flush removes all data from the buffer and returns it -func (b *BadgerBlockBuffer) Flush() []common.BlockData { - b.mu.Lock() - defer b.mu.Unlock() - - if b.blockCount == 0 { - return nil - } - - var result []common.BlockData - - // Read all data - err := b.db.View(func(txn *badger.Txn) error { - opts := badger.DefaultIteratorOptions - opts.PrefetchValues = true - opts.PrefetchSize = 100 - it := txn.NewIterator(opts) - defer it.Close() - - for it.Rewind(); it.Valid(); it.Next() { - item := it.Item() - err := item.Value(func(val []byte) error { - var blockData common.BlockData - if err := gob.NewDecoder(bytes.NewReader(val)).Decode(&blockData); err != nil { - return err - } - result = append(result, blockData) - return nil - }) - if err != nil { - log.Error().Err(err).Msg("Failed to decode block data during flush") - } - } - return nil - }) - - if err != nil { - log.Error().Err(err).Msg("Failed to read blocks during flush") - } - - // Clear the database - err = b.db.DropAll() - if err != nil { - log.Error().Err(err).Msg("Failed to clear badger buffer") - } - - // Reset counters and metadata - oldCount := b.blockCount - b.blockCount = 0 - b.chainMetadata = make(map[uint64]*ChainMetadata) - - log.Info(). - Int("block_count", oldCount). - Msg("Flushing badger buffer") - - return result -} - -// ShouldFlush checks if the buffer should be flushed based on configured thresholds -func (b *BadgerBlockBuffer) ShouldFlush() bool { - b.mu.RLock() - defer b.mu.RUnlock() - return b.shouldFlushLocked() -} - -// Size returns the current buffer size in bytes and block count -func (b *BadgerBlockBuffer) Size() (int64, int) { - b.mu.RLock() - defer b.mu.RUnlock() - - // Get actual size from Badger's LSM tree - lsm, _ := b.db.Size() - return lsm, b.blockCount -} - -// IsEmpty returns true if the buffer is empty -func (b *BadgerBlockBuffer) IsEmpty() bool { - b.mu.RLock() - defer b.mu.RUnlock() - return b.blockCount == 0 -} - -// GetData returns a copy of the current buffer data -func (b *BadgerBlockBuffer) GetData() []common.BlockData { - b.mu.RLock() - defer b.mu.RUnlock() - - var result []common.BlockData - - err := b.db.View(func(txn *badger.Txn) error { - opts := badger.DefaultIteratorOptions - opts.PrefetchValues = true - it := txn.NewIterator(opts) - defer it.Close() - - for it.Rewind(); it.Valid(); it.Next() { - item := it.Item() - err := item.Value(func(val []byte) error { - var blockData common.BlockData - if err := gob.NewDecoder(bytes.NewReader(val)).Decode(&blockData); err != nil { - return err - } - result = append(result, blockData) - return nil - }) - if err != nil { - log.Error().Err(err).Msg("Failed to decode block data") - } - } - return nil - }) - - if err != nil { - log.Error().Err(err).Msg("Failed to get data from badger buffer") - } - - return result -} - -// GetBlocksInRange returns blocks from the buffer that fall within the given range -func (b *BadgerBlockBuffer) GetBlocksInRange(chainId *big.Int, startBlock, endBlock *big.Int) []common.BlockData { - b.mu.RLock() - defer b.mu.RUnlock() - - var result []common.BlockData - prefix := b.makePrefix(chainId) - - err := b.db.View(func(txn *badger.Txn) error { - opts := badger.DefaultIteratorOptions - opts.Prefix = prefix - it := txn.NewIterator(opts) - defer it.Close() - - for it.Rewind(); it.Valid(); it.Next() { - item := it.Item() - err := item.Value(func(val []byte) error { - var blockData common.BlockData - if err := gob.NewDecoder(bytes.NewReader(val)).Decode(&blockData); err != nil { - return err - } - - blockNum := blockData.Block.Number - if blockNum.Cmp(startBlock) >= 0 && blockNum.Cmp(endBlock) <= 0 { - result = append(result, blockData) - } - return nil - }) - if err != nil { - log.Error().Err(err).Msg("Failed to decode block data in range") - } - } - return nil - }) - - if err != nil { - log.Error().Err(err).Msg("Failed to get blocks in range from badger buffer") - } - - return result -} - -// GetBlockByNumber returns a specific block from the buffer if it exists -func (b *BadgerBlockBuffer) GetBlockByNumber(chainId *big.Int, blockNumber *big.Int) *common.BlockData { - b.mu.RLock() - defer b.mu.RUnlock() - - var result *common.BlockData - key := b.makeKey(chainId, blockNumber) - - err := b.db.View(func(txn *badger.Txn) error { - item, err := txn.Get(key) - if err == badger.ErrKeyNotFound { - return nil - } - if err != nil { - return err - } - - return item.Value(func(val []byte) error { - var blockData common.BlockData - if err := gob.NewDecoder(bytes.NewReader(val)).Decode(&blockData); err != nil { - return err - } - result = &blockData - return nil - }) - }) - - if err != nil && err != badger.ErrKeyNotFound { - log.Error().Err(err).Msg("Failed to get block by number from badger buffer") - } - - return result -} - -// GetMaxBlockNumber returns the maximum block number for a chain in the buffer -func (b *BadgerBlockBuffer) GetMaxBlockNumber(chainId *big.Int) *big.Int { - b.mu.RLock() - defer b.mu.RUnlock() - - // O(1) lookup using cached metadata - meta, exists := b.chainMetadata[chainId.Uint64()] - if !exists || meta.MaxBlock == nil { - return nil - } - - // Return a copy to prevent external modification - return new(big.Int).Set(meta.MaxBlock) -} - -// Clear empties the buffer without returning data -func (b *BadgerBlockBuffer) Clear() { - b.mu.Lock() - defer b.mu.Unlock() - - err := b.db.DropAll() - if err != nil { - log.Error().Err(err).Msg("Failed to clear badger buffer") - } - - b.blockCount = 0 - b.chainMetadata = make(map[uint64]*ChainMetadata) -} - -// Stats returns statistics about the buffer -func (b *BadgerBlockBuffer) Stats() BufferStats { - b.mu.RLock() - defer b.mu.RUnlock() - - // Get actual size from Badger - lsm, _ := b.db.Size() - - stats := BufferStats{ - BlockCount: b.blockCount, - SizeBytes: lsm, - ChainCount: len(b.chainMetadata), - ChainStats: make(map[uint64]ChainStats), - } - - // Use cached metadata for O(1) stats generation - for chainId, meta := range b.chainMetadata { - if meta.MinBlock != nil && meta.MaxBlock != nil { - stats.ChainStats[chainId] = ChainStats{ - BlockCount: meta.BlockCount, - MinBlock: new(big.Int).Set(meta.MinBlock), - MaxBlock: new(big.Int).Set(meta.MaxBlock), - } - } - } - - return stats -} - -// Close closes the buffer and cleans up resources -func (b *BadgerBlockBuffer) Close() error { - b.mu.Lock() - defer b.mu.Unlock() - - // Stop GC routine - if b.gcTicker != nil { - b.gcTicker.Stop() - close(b.stopGC) - } - - // Close database - if err := b.db.Close(); err != nil { - log.Error().Err(err).Msg("Failed to close badger buffer database") - } - - // Clean up temporary directory - if err := os.RemoveAll(b.tempDir); err != nil { - log.Error().Err(err).Msg("Failed to remove temp directory") - } - - return nil -} - -// Private methods - -func (b *BadgerBlockBuffer) shouldFlushLocked() bool { - // Check size limit using Badger's actual size - if b.maxSizeBytes > 0 { - lsm, _ := b.db.Size() - if lsm >= b.maxSizeBytes { - return true - } - } - - // Check block count limit - if b.maxBlocks > 0 && b.blockCount >= b.maxBlocks { - return true - } - - return false -} - -func (b *BadgerBlockBuffer) makeKey(chainId *big.Int, blockNumber *big.Int) []byte { - // Use padded format to ensure lexicographic ordering matches numeric ordering - return fmt.Appendf(nil, "block:%s:%s", chainId.String(), blockNumber.String()) -} - -func (b *BadgerBlockBuffer) makePrefix(chainId *big.Int) []byte { - return fmt.Appendf(nil, "block:%s:", chainId.String()) -} - -func (b *BadgerBlockBuffer) runGC() { - for { - select { - case <-b.gcTicker.C: - err := b.db.RunValueLogGC(0.7) // More aggressive GC for cache - if err != nil && err != badger.ErrNoRewrite { - log.Debug().Err(err).Msg("BadgerBlockBuffer GC error") - } - case <-b.stopGC: - return - } - } -} - -// Ensure BadgerBlockBuffer implements IBlockBuffer interface -var _ IBlockBuffer = (*BadgerBlockBuffer)(nil) diff --git a/internal/storage/block_buffer_badger_test.go b/internal/storage/block_buffer_badger_test.go deleted file mode 100644 index b10e8d82..00000000 --- a/internal/storage/block_buffer_badger_test.go +++ /dev/null @@ -1,144 +0,0 @@ -package storage - -import ( - "math/big" - "testing" - "time" - - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - "github.com/thirdweb-dev/indexer/internal/common" -) - -func TestBadgerBlockBufferMetadataOptimization(t *testing.T) { - // Create a new Badger buffer - buffer, err := NewBadgerBlockBuffer(10, 1000) // 10MB, 1000 blocks max - require.NoError(t, err) - defer buffer.Close() - - chainId := big.NewInt(1) - - // Add blocks - blocks := []common.BlockData{ - { - Block: common.Block{ - ChainId: chainId, - Number: big.NewInt(100), - Hash: "0x1234", - }, - }, - { - Block: common.Block{ - ChainId: chainId, - Number: big.NewInt(101), - Hash: "0x5678", - }, - }, - { - Block: common.Block{ - ChainId: chainId, - Number: big.NewInt(99), - Hash: "0xabcd", - }, - }, - } - - buffer.Add(blocks) - - // Test O(1) GetMaxBlockNumber - start := time.Now() - maxBlock := buffer.GetMaxBlockNumber(chainId) - elapsed := time.Since(start) - - assert.NotNil(t, maxBlock) - assert.Equal(t, big.NewInt(101), maxBlock) - assert.Less(t, elapsed, time.Millisecond, "GetMaxBlockNumber should be O(1) and very fast") - - // Test O(1) Stats - start = time.Now() - stats := buffer.Stats() - elapsed = time.Since(start) - - assert.Equal(t, 3, stats.BlockCount) - assert.Equal(t, 1, stats.ChainCount) - chainStats := stats.ChainStats[1] - assert.Equal(t, 3, chainStats.BlockCount) - assert.Equal(t, big.NewInt(99), chainStats.MinBlock) - assert.Equal(t, big.NewInt(101), chainStats.MaxBlock) - assert.Less(t, elapsed, time.Millisecond, "Stats should be O(1) and very fast") - - // Test metadata is updated after flush - buffer.Flush() - maxBlock = buffer.GetMaxBlockNumber(chainId) - assert.Nil(t, maxBlock) - - // Add new blocks and verify metadata is rebuilt - newBlocks := []common.BlockData{ - { - Block: common.Block{ - ChainId: chainId, - Number: big.NewInt(200), - Hash: "0xffff", - }, - }, - } - buffer.Add(newBlocks) - - maxBlock = buffer.GetMaxBlockNumber(chainId) - assert.NotNil(t, maxBlock) - assert.Equal(t, big.NewInt(200), maxBlock) -} - -func BenchmarkBadgerBlockBufferGetMaxBlockNumber(b *testing.B) { - buffer, err := NewBadgerBlockBuffer(100, 10000) - require.NoError(b, err) - defer buffer.Close() - - chainId := big.NewInt(1) - - // Add many blocks - for i := 0; i < 1000; i++ { - blocks := []common.BlockData{ - { - Block: common.Block{ - ChainId: chainId, - Number: big.NewInt(int64(i)), - Hash: "0x1234", - }, - }, - } - buffer.Add(blocks) - } - - b.ResetTimer() - for i := 0; i < b.N; i++ { - _ = buffer.GetMaxBlockNumber(chainId) - } -} - -func BenchmarkBadgerBlockBufferStats(b *testing.B) { - buffer, err := NewBadgerBlockBuffer(100, 10000) - require.NoError(b, err) - defer buffer.Close() - - // Add blocks for multiple chains - for chainId := 1; chainId <= 5; chainId++ { - for i := 0; i < 100; i++ { - blocks := []common.BlockData{ - { - Block: common.Block{ - ChainId: big.NewInt(int64(chainId)), - Number: big.NewInt(int64(i)), - Hash: "0x1234", - }, - }, - } - buffer.Add(blocks) - } - } - - b.ResetTimer() - for i := 0; i < b.N; i++ { - _ = buffer.Stats() - } -} diff --git a/internal/storage/block_buffer_pebble.go b/internal/storage/block_buffer_pebble.go deleted file mode 100644 index 662d0add..00000000 --- a/internal/storage/block_buffer_pebble.go +++ /dev/null @@ -1,495 +0,0 @@ -package storage - -import ( - "bytes" - "encoding/gob" - "fmt" - "math/big" - "os" - "sync" - - "github.com/cockroachdb/pebble" - "github.com/rs/zerolog/log" - "github.com/thirdweb-dev/indexer/internal/common" -) - -// PebbleBlockBuffer manages buffering of block data using Pebble as an ephemeral cache -type PebbleBlockBuffer struct { - mu sync.RWMutex - db *pebble.DB - tempDir string - maxSizeBytes int64 - maxBlocks int - blockCount int - - // Chain metadata cache for O(1) lookups - chainMetadata map[uint64]*PebbleChainMetadata -} - -// PebbleChainMetadata tracks per-chain statistics for fast lookups -type PebbleChainMetadata struct { - MinBlock *big.Int - MaxBlock *big.Int - BlockCount int -} - -// NewPebbleBlockBuffer creates a new Pebble-backed block buffer with ephemeral storage -func NewPebbleBlockBuffer(maxSizeMB int64, maxBlocks int) (*PebbleBlockBuffer, error) { - tempDir, err := os.MkdirTemp("", "blockbuffer-pebble-*") - if err != nil { - return nil, fmt.Errorf("failed to create temp dir: %w", err) - } - - cache := pebble.NewCache(64 << 20) // Small cache for buffering - defer cache.Unref() - - opts := &pebble.Options{ - MemTableSize: 64 << 20, // 64MB per memtable - MemTableStopWritesThreshold: 4, // ~256MB total - L0CompactionThreshold: 8, // Balance between write amplification and compaction - L0StopWritesThreshold: 24, - - // Compaction settings for cache workload - MaxConcurrentCompactions: func() int { return 1 }, - - // Cache settings - smaller since this is ephemeral - Cache: cache, - - // File sizes optimized for cache - Levels: make([]pebble.LevelOptions, 7), - - DisableWAL: false, - - // Disable verbose logging - Logger: nil, - } - - // Configure level-specific options - for i := range opts.Levels { - opts.Levels[i] = pebble.LevelOptions{ - BlockSize: 128 << 10, // 128KB blocks (smaller for cache) - IndexBlockSize: 256 << 10, // 256KB index blocks - FilterPolicy: nil, // Disable bloom filters for ephemeral cache (save memory) - } - if i == 0 { - // L0 gets smaller files for faster compaction - opts.Levels[i].TargetFileSize = 64 << 20 // 64MB - opts.Levels[i].Compression = pebble.SnappyCompression - } else { - // Other levels grow exponentially - opts.Levels[i].TargetFileSize = min( - opts.Levels[i-1].TargetFileSize*4, - 1<<30, // 1GB cap - ) - opts.Levels[i].Compression = pebble.SnappyCompression - } - } - - db, err := pebble.Open(tempDir, opts) - if err != nil { - os.RemoveAll(tempDir) - return nil, fmt.Errorf("failed to open pebble db: %w", err) - } - - b := &PebbleBlockBuffer{ - db: db, - tempDir: tempDir, - maxSizeBytes: maxSizeMB * 1024 * 1024, - maxBlocks: maxBlocks, - chainMetadata: make(map[uint64]*PebbleChainMetadata), - } - - return b, nil -} - -// Add adds blocks to the buffer and returns true if flush is needed -func (b *PebbleBlockBuffer) Add(blocks []common.BlockData) bool { - if len(blocks) == 0 { - return false - } - - b.mu.Lock() - defer b.mu.Unlock() - - batch := b.db.NewBatch() - defer batch.Close() - - for _, block := range blocks { - key := b.makeKey(block.Block.ChainId, block.Block.Number) - - var buf bytes.Buffer - if err := gob.NewEncoder(&buf).Encode(block); err != nil { - log.Error().Err(err).Msg("Failed to encode block data") - continue - } - - if err := batch.Set(key, buf.Bytes(), nil); err != nil { - log.Error().Err(err).Msg("Failed to set block in batch") - continue - } - } - - if err := batch.Commit(pebble.Sync); err != nil { - log.Error().Err(err).Msg("Failed to add blocks to pebble buffer") - return false - } - - // Update counters - b.blockCount += len(blocks) - - // Update chain metadata for O(1) lookups - for _, block := range blocks { - chainId := block.Block.ChainId.Uint64() - meta, exists := b.chainMetadata[chainId] - if !exists { - meta = &PebbleChainMetadata{ - MinBlock: new(big.Int).Set(block.Block.Number), - MaxBlock: new(big.Int).Set(block.Block.Number), - BlockCount: 1, - } - b.chainMetadata[chainId] = meta - } else { - if block.Block.Number.Cmp(meta.MinBlock) < 0 { - meta.MinBlock = new(big.Int).Set(block.Block.Number) - } - if block.Block.Number.Cmp(meta.MaxBlock) > 0 { - meta.MaxBlock = new(big.Int).Set(block.Block.Number) - } - meta.BlockCount++ - } - } - - // Check if flush is needed - return b.shouldFlushLocked() -} - -// Flush removes all data from the buffer and returns it -func (b *PebbleBlockBuffer) Flush() []common.BlockData { - b.mu.Lock() - defer b.mu.Unlock() - - if b.blockCount == 0 { - return nil - } - - var result []common.BlockData - - // Read all data - iter, err := b.db.NewIter(nil) - if err != nil { - log.Error().Err(err).Msg("Failed to create iterator for flush") - return nil - } - defer iter.Close() - - for iter.First(); iter.Valid(); iter.Next() { - val, err := iter.ValueAndErr() - if err != nil { - log.Error().Err(err).Msg("Failed to get value during flush") - continue - } - - var blockData common.BlockData - if err := gob.NewDecoder(bytes.NewReader(val)).Decode(&blockData); err != nil { - log.Error().Err(err).Msg("Failed to decode block data during flush") - continue - } - result = append(result, blockData) - } - - if err := iter.Error(); err != nil { - log.Error().Err(err).Msg("Iterator error during flush") - } - - // Clear the database - // In Pebble, we need to delete all keys - batch := b.db.NewBatch() - defer batch.Close() - - // Re-iterate to delete all keys - iter2, err := b.db.NewIter(nil) - if err != nil { - log.Error().Err(err).Msg("Failed to create iterator for deletion") - return result - } - defer iter2.Close() - - for iter2.First(); iter2.Valid(); iter2.Next() { - if err := batch.Delete(iter2.Key(), nil); err != nil { - log.Error().Err(err).Msg("Failed to delete key during flush") - } - } - - if err := batch.Commit(pebble.Sync); err != nil { - log.Error().Err(err).Msg("Failed to clear pebble buffer") - } - - // Reset counters and metadata - oldCount := b.blockCount - b.blockCount = 0 - b.chainMetadata = make(map[uint64]*PebbleChainMetadata) - - log.Info(). - Int("block_count", oldCount). - Msg("Flushing pebble buffer") - - return result -} - -// ShouldFlush checks if the buffer should be flushed based on configured thresholds -func (b *PebbleBlockBuffer) ShouldFlush() bool { - b.mu.RLock() - defer b.mu.RUnlock() - return b.shouldFlushLocked() -} - -// Size returns the current buffer size in bytes and block count -func (b *PebbleBlockBuffer) Size() (int64, int) { - b.mu.RLock() - defer b.mu.RUnlock() - - // Get metrics from Pebble - metrics := b.db.Metrics() - totalSize := int64(metrics.DiskSpaceUsage()) - - return totalSize, b.blockCount -} - -// IsEmpty returns true if the buffer is empty -func (b *PebbleBlockBuffer) IsEmpty() bool { - b.mu.RLock() - defer b.mu.RUnlock() - return b.blockCount == 0 -} - -// GetData returns a copy of the current buffer data -func (b *PebbleBlockBuffer) GetData() []common.BlockData { - b.mu.RLock() - defer b.mu.RUnlock() - - var result []common.BlockData - - iter, err := b.db.NewIter(nil) - if err != nil { - log.Error().Err(err).Msg("Failed to create iterator for GetData") - return nil - } - defer iter.Close() - - for iter.First(); iter.Valid(); iter.Next() { - val, err := iter.ValueAndErr() - if err != nil { - log.Error().Err(err).Msg("Failed to get value") - continue - } - - var blockData common.BlockData - if err := gob.NewDecoder(bytes.NewReader(val)).Decode(&blockData); err != nil { - log.Error().Err(err).Msg("Failed to decode block data") - continue - } - result = append(result, blockData) - } - - if err := iter.Error(); err != nil { - log.Error().Err(err).Msg("Iterator error in GetData") - } - - return result -} - -// GetBlocksInRange returns blocks from the buffer that fall within the given range -func (b *PebbleBlockBuffer) GetBlocksInRange(chainId *big.Int, startBlock, endBlock *big.Int) []common.BlockData { - b.mu.RLock() - defer b.mu.RUnlock() - - var result []common.BlockData - prefix := b.makePrefix(chainId) - - // Create iterator with prefix bounds - iter, err := b.db.NewIter(&pebble.IterOptions{ - LowerBound: prefix, - UpperBound: append(prefix, 0xff), - }) - if err != nil { - log.Error().Err(err).Msg("Failed to create iterator for range query") - return nil - } - defer iter.Close() - - for iter.First(); iter.Valid(); iter.Next() { - val, err := iter.ValueAndErr() - if err != nil { - log.Error().Err(err).Msg("Failed to get value in range") - continue - } - - var blockData common.BlockData - if err := gob.NewDecoder(bytes.NewReader(val)).Decode(&blockData); err != nil { - log.Error().Err(err).Msg("Failed to decode block data in range") - continue - } - - blockNum := blockData.Block.Number - if blockNum.Cmp(startBlock) >= 0 && blockNum.Cmp(endBlock) <= 0 { - result = append(result, blockData) - } - } - - if err := iter.Error(); err != nil { - log.Error().Err(err).Msg("Iterator error in GetBlocksInRange") - } - - return result -} - -// GetBlockByNumber returns a specific block from the buffer if it exists -func (b *PebbleBlockBuffer) GetBlockByNumber(chainId *big.Int, blockNumber *big.Int) *common.BlockData { - b.mu.RLock() - defer b.mu.RUnlock() - - key := b.makeKey(chainId, blockNumber) - - val, closer, err := b.db.Get(key) - if err == pebble.ErrNotFound { - return nil - } - if err != nil { - log.Error().Err(err).Msg("Failed to get block by number from pebble buffer") - return nil - } - defer closer.Close() - - var blockData common.BlockData - if err := gob.NewDecoder(bytes.NewReader(val)).Decode(&blockData); err != nil { - log.Error().Err(err).Msg("Failed to decode block data") - return nil - } - - return &blockData -} - -// GetMaxBlockNumber returns the maximum block number for a chain in the buffer -func (b *PebbleBlockBuffer) GetMaxBlockNumber(chainId *big.Int) *big.Int { - b.mu.RLock() - defer b.mu.RUnlock() - - // O(1) lookup using cached metadata - meta, exists := b.chainMetadata[chainId.Uint64()] - if !exists || meta.MaxBlock == nil { - return nil - } - - // Return a copy to prevent external modification - return new(big.Int).Set(meta.MaxBlock) -} - -// Clear empties the buffer without returning data -func (b *PebbleBlockBuffer) Clear() { - b.mu.Lock() - defer b.mu.Unlock() - - // Delete all keys - batch := b.db.NewBatch() - defer batch.Close() - - iter, err := b.db.NewIter(nil) - if err != nil { - log.Error().Err(err).Msg("Failed to create iterator for clear") - return - } - defer iter.Close() - - for iter.First(); iter.Valid(); iter.Next() { - if err := batch.Delete(iter.Key(), nil); err != nil { - log.Error().Err(err).Msg("Failed to delete key during clear") - } - } - - if err := batch.Commit(pebble.Sync); err != nil { - log.Error().Err(err).Msg("Failed to clear pebble buffer") - } - - b.blockCount = 0 - b.chainMetadata = make(map[uint64]*PebbleChainMetadata) -} - -// Stats returns statistics about the buffer -func (b *PebbleBlockBuffer) Stats() BufferStats { - b.mu.RLock() - defer b.mu.RUnlock() - - // Get metrics from Pebble - metrics := b.db.Metrics() - totalSize := int64(metrics.DiskSpaceUsage()) - - stats := BufferStats{ - BlockCount: b.blockCount, - SizeBytes: totalSize, - ChainCount: len(b.chainMetadata), - ChainStats: make(map[uint64]ChainStats), - } - - // Use cached metadata for O(1) stats generation - for chainId, meta := range b.chainMetadata { - if meta.MinBlock != nil && meta.MaxBlock != nil { - stats.ChainStats[chainId] = ChainStats{ - BlockCount: meta.BlockCount, - MinBlock: new(big.Int).Set(meta.MinBlock), - MaxBlock: new(big.Int).Set(meta.MaxBlock), - } - } - } - - return stats -} - -// Close closes the buffer and cleans up resources -func (b *PebbleBlockBuffer) Close() error { - b.mu.Lock() - defer b.mu.Unlock() - - // Close database - if err := b.db.Close(); err != nil { - log.Error().Err(err).Msg("Failed to close pebble buffer database") - } - - // Clean up temporary directory - if err := os.RemoveAll(b.tempDir); err != nil { - log.Error().Err(err).Msg("Failed to remove temp directory") - } - - return nil -} - -// Private methods - -func (b *PebbleBlockBuffer) shouldFlushLocked() bool { - // Check size limit using Pebble's metrics - if b.maxSizeBytes > 0 { - metrics := b.db.Metrics() - totalSize := int64(metrics.DiskSpaceUsage()) - if totalSize >= b.maxSizeBytes { - return true - } - } - - // Check block count limit - if b.maxBlocks > 0 && b.blockCount >= b.maxBlocks { - return true - } - - return false -} - -func (b *PebbleBlockBuffer) makeKey(chainId *big.Int, blockNumber *big.Int) []byte { - // Use padded format to ensure lexicographic ordering matches numeric ordering - return fmt.Appendf(nil, "block:%s:%s", chainId.String(), blockNumber.String()) -} - -func (b *PebbleBlockBuffer) makePrefix(chainId *big.Int) []byte { - return fmt.Appendf(nil, "block:%s:", chainId.String()) -} - -// Ensure PebbleBlockBuffer implements IBlockBuffer interface -var _ IBlockBuffer = (*PebbleBlockBuffer)(nil) diff --git a/internal/storage/clickhouse.go b/internal/storage/clickhouse.go deleted file mode 100644 index 6ef0c093..00000000 --- a/internal/storage/clickhouse.go +++ /dev/null @@ -1,1836 +0,0 @@ -package storage - -import ( - "context" - "crypto/tls" - "database/sql" - "encoding/json" - "fmt" - "math/big" - "reflect" - "strings" - "sync" - "time" - - "github.com/ClickHouse/clickhouse-go/v2" - "github.com/ClickHouse/clickhouse-go/v2/lib/driver" - ethereum "github.com/ethereum/go-ethereum/common" - zLog "github.com/rs/zerolog/log" - config "github.com/thirdweb-dev/indexer/configs" - "github.com/thirdweb-dev/indexer/internal/common" - "github.com/thirdweb-dev/indexer/internal/metrics" -) - -type ClickHouseConnector struct { - conn clickhouse.Conn - cfg *config.ClickhouseConfig -} - -type InsertOptions struct { - AsDeleted bool -} - -var DEFAULT_MAX_ROWS_PER_INSERT = 100000 -var ZERO_BYTES_66 = strings.Repeat("\x00", 66) -var ZERO_BYTES_10 = strings.Repeat("\x00", 10) -var ZERO_BYTES_42 = strings.Repeat("\x00", 42) - -var defaultBlockFields = []string{ - "chain_id", "block_number", "hash", "parent_hash", "block_timestamp", "nonce", - "sha3_uncles", "mix_hash", "miner", "state_root", "transactions_root", "logs_bloom", - "receipts_root", "difficulty", "total_difficulty", "size", "extra_data", "gas_limit", - "gas_used", "transaction_count", "base_fee_per_gas", "withdrawals_root", -} - -var defaultTransactionFields = []string{ - "chain_id", "hash", "nonce", "block_hash", "block_number", "block_timestamp", - "transaction_index", "from_address", "to_address", "value", "gas", "gas_price", - "data", "function_selector", "max_fee_per_gas", "max_priority_fee_per_gas", - "max_fee_per_blob_gas", "blob_versioned_hashes", "transaction_type", "r", "s", "v", - "access_list", "authorization_list", "contract_address", "gas_used", "cumulative_gas_used", - "effective_gas_price", "blob_gas_used", "blob_gas_price", "logs_bloom", "status", -} - -var defaultLogFields = []string{ - "chain_id", "block_number", "block_hash", "block_timestamp", "transaction_hash", - "transaction_index", "log_index", "address", "data", "topic_0", "topic_1", "topic_2", "topic_3", -} - -var defaultTraceFields = []string{ - "chain_id", "block_number", "block_hash", "block_timestamp", "transaction_hash", - "transaction_index", "subtraces", "trace_address", "type", "call_type", "error", - "from_address", "to_address", "gas", "gas_used", "input", "output", "value", "author", - "reward_type", "refund_address", -} - -func NewClickHouseConnector(cfg *config.ClickhouseConfig) (*ClickHouseConnector, error) { - conn, err := connectDB(cfg) - // Question: Should we add the table setup here? - if err != nil { - return nil, err - } - if cfg.MaxRowsPerInsert == 0 { - cfg.MaxRowsPerInsert = DEFAULT_MAX_ROWS_PER_INSERT - } - return &ClickHouseConnector{ - conn: conn, - cfg: cfg, - }, nil -} - -// Close closes the ClickHouse connection -func (c *ClickHouseConnector) Close() error { - if c.conn != nil { - return c.conn.Close() - } - return nil -} - -func connectDB(cfg *config.ClickhouseConfig) (clickhouse.Conn, error) { - port := cfg.Port - if port == 0 { - return nil, fmt.Errorf("invalid CLICKHOUSE_PORT: %d", port) - } - - conn, err := clickhouse.Open(&clickhouse.Options{ - Addr: []string{fmt.Sprintf("%s:%d", cfg.Host, port)}, - Protocol: clickhouse.Native, - TLS: func() *tls.Config { - if cfg.DisableTLS { - return nil - } - return &tls.Config{} - }(), - Auth: clickhouse.Auth{ - Username: cfg.Username, - Password: cfg.Password, - }, - MaxOpenConns: cfg.MaxOpenConns, - MaxIdleConns: cfg.MaxIdleConns, - Compression: func() *clickhouse.Compression { - c := &clickhouse.Compression{} - if cfg.EnableCompression { - zLog.Debug().Msg("ClickHouse LZ4 compression is enabled") - c.Method = clickhouse.CompressionLZ4 - } - return c - }(), - Settings: func() clickhouse.Settings { - settings := clickhouse.Settings{ - "do_not_merge_across_partitions_select_final": "1", - "use_skip_indexes_if_final": "1", - "optimize_move_to_prewhere_if_final": "1", - } - if cfg.EnableParallelViewProcessing { - settings["parallel_view_processing"] = "1" - } - if cfg.AsyncInsert { - settings["async_insert"] = "1" - settings["wait_for_async_insert"] = "1" - settings["async_insert_busy_timeout_ms"] = "3000" // 3 seconds - settings["async_insert_max_data_size"] = "104857600" // 100MB - settings["async_insert_max_query_number"] = "100000" // 100k - } - return settings - }(), - }) - if err != nil { - return nil, err - } - return conn, nil -} - -func (c *ClickHouseConnector) GetBlocks(qf QueryFilter, fields ...string) (QueryResult[common.Block], error) { - if len(fields) == 0 { - fields = c.getChainSpecificFields(qf.ChainId, "blocks", defaultBlockFields) - } - return executeQuery[common.Block](c, "blocks", strings.Join(fields, ", "), qf, scanBlock) -} - -func (c *ClickHouseConnector) GetTransactions(qf QueryFilter, fields ...string) (QueryResult[common.Transaction], error) { - if len(fields) == 0 { - fields = c.getChainSpecificFields(qf.ChainId, "transactions", defaultTransactionFields) - } - return executeQuery[common.Transaction](c, "transactions", strings.Join(fields, ", "), qf, scanTransaction) -} - -func (c *ClickHouseConnector) GetLogs(qf QueryFilter, fields ...string) (QueryResult[common.Log], error) { - if len(fields) == 0 { - fields = c.getChainSpecificFields(qf.ChainId, "logs", defaultLogFields) - } - return executeQuery[common.Log](c, "logs", strings.Join(fields, ", "), qf, scanLog) -} - -func (c *ClickHouseConnector) GetTraces(qf QueryFilter, fields ...string) (QueryResult[common.Trace], error) { - if len(fields) == 0 { - fields = c.getChainSpecificFields(qf.ChainId, "traces", defaultTraceFields) - } - return executeQuery[common.Trace](c, "traces", strings.Join(fields, ", "), qf, scanTrace) -} - -func (c *ClickHouseConnector) GetAggregations(table string, qf QueryFilter) (QueryResult[interface{}], error) { - // Build the SELECT clause with aggregates - selectColumns := strings.Join(append(qf.GroupBy, qf.Aggregates...), ", ") - - // Use the new query building logic - query := c.buildQuery(table, selectColumns, qf) - - // Execute the query - rows, err := c.conn.Query(context.Background(), query) - if err != nil { - return QueryResult[interface{}]{}, err - } - defer rows.Close() - - columnNames := rows.Columns() - columnTypes := rows.ColumnTypes() - - // Collect results - initialize as empty array to ensure we always return an array - aggregates := make([]map[string]interface{}, 0) - for rows.Next() { - values := make([]interface{}, len(columnNames)) - - // Assign Go types based on ClickHouse types - for i, colType := range columnTypes { - dbType := colType.DatabaseTypeName() - values[i] = mapClickHouseTypeToGoType(dbType) - } - - if err := rows.Scan(values...); err != nil { - return QueryResult[interface{}]{}, fmt.Errorf("failed to scan row: %w", err) - } - - // Prepare the result map for the current row - result := make(map[string]interface{}) - for i, colName := range columnNames { - valuePtr := values[i] - value := getUnderlyingValue(valuePtr) - - // Convert *big.Int to string - if bigIntValue, ok := value.(big.Int); ok { - result[colName] = BigInt{Int: bigIntValue} - } else { - result[colName] = value - } - } - - aggregates = append(aggregates, result) - } - - if err := rows.Err(); err != nil { - return QueryResult[interface{}]{}, fmt.Errorf("row iteration error: %w", err) - } - - return QueryResult[interface{}]{Data: nil, Aggregates: aggregates}, nil -} - -func executeQuery[T any](c *ClickHouseConnector, table, columns string, qf QueryFilter, scanFunc func(driver.Rows) (T, error)) (QueryResult[T], error) { - query := c.buildQuery(table, columns, qf) - - rows, err := c.conn.Query(context.Background(), query) - if err != nil { - return QueryResult[T]{}, err - } - defer rows.Close() - - queryResult := QueryResult[T]{ - Data: []T{}, - } - - for rows.Next() { - item, err := scanFunc(rows) - if err != nil { - return QueryResult[T]{}, err - } - queryResult.Data = append(queryResult.Data, item) - } - - return queryResult, nil -} - -func (c *ClickHouseConnector) buildQuery(table, columns string, qf QueryFilter) string { - var query string - - // Check if we need to handle wallet address with UNION for transactions - if table == "transactions" && qf.WalletAddress != "" { - query = c.buildUnionQuery(table, columns, qf) - } else { - query = c.buildStandardQuery(table, columns, qf) - } - - // Apply post-query clauses to ALL queries - query = c.addPostQueryClauses(query, qf) - - return query -} - -func (c *ClickHouseConnector) buildStandardQuery(table, columns string, qf QueryFilter) string { - tableName := c.getTableName(qf.ChainId, table) - query := fmt.Sprintf("SELECT %s FROM %s.%s", columns, c.cfg.Database, tableName) - if qf.ForceConsistentData { - query += " FINAL" - } - - whereClauses := c.buildWhereClauses(table, qf) - - // Add WHERE clause to query if there are any conditions - if len(whereClauses) > 0 { - query += " WHERE " + strings.Join(whereClauses, " AND ") - } - - return query -} - -func (c *ClickHouseConnector) buildUnionQuery(table, columns string, qf QueryFilter) string { - tableName := c.getTableName(qf.ChainId, table) - - // Build base where clauses (excluding wallet address) - baseWhereClauses := c.buildWhereClauses(table, qf) - - // Create two separate queries for from_address and to_address - fromQuery := fmt.Sprintf("SELECT %s FROM %s.%s", columns, c.cfg.Database, tableName) - if qf.ForceConsistentData { - fromQuery += " FINAL" - } - - toQuery := fmt.Sprintf("SELECT %s FROM %s.%s", columns, c.cfg.Database, tableName) - if qf.ForceConsistentData { - toQuery += " FINAL" - } - - // Add base where clauses to both queries - if len(baseWhereClauses) > 0 { - baseWhereClause := strings.Join(baseWhereClauses, " AND ") - fromQuery += " WHERE " + baseWhereClause + " AND from_address = '" + strings.ToLower(qf.WalletAddress) + "'" - toQuery += " WHERE " + baseWhereClause + " AND to_address = '" + strings.ToLower(qf.WalletAddress) + "'" - } else { - fromQuery += " WHERE from_address = '" + strings.ToLower(qf.WalletAddress) + "'" - toQuery += " WHERE to_address = '" + strings.ToLower(qf.WalletAddress) + "'" - } - - // Apply ORDER BY to both queries for consistent results - if qf.SortBy != "" { - fromQuery += fmt.Sprintf(" ORDER BY %s %s", qf.SortBy, qf.SortOrder) - toQuery += fmt.Sprintf(" ORDER BY %s %s", qf.SortBy, qf.SortOrder) - } - - // Apply LIMIT to each individual query to avoid loading too much data - // We use a higher limit to ensure we get enough results after UNION - individualLimit := qf.Limit * 2 // Double the limit to account for potential duplicates - if qf.Page >= 0 && qf.Limit > 0 { - offset := qf.Page * qf.Limit - fromQuery += fmt.Sprintf(" LIMIT %d OFFSET %d", individualLimit, offset) - toQuery += fmt.Sprintf(" LIMIT %d OFFSET %d", individualLimit, offset) - } else if qf.Limit > 0 { - fromQuery += fmt.Sprintf(" LIMIT %d", individualLimit) - toQuery += fmt.Sprintf(" LIMIT %d", individualLimit) - } - - // Combine with UNION - unionQuery := fmt.Sprintf("(%s) UNION ALL (%s)", fromQuery, toQuery) - - return unionQuery -} - -func (c *ClickHouseConnector) addPostQueryClauses(query string, qf QueryFilter) string { - // Add GROUP BY clause if needed (for aggregations) - if len(qf.GroupBy) > 0 { - groupByClause := fmt.Sprintf(" GROUP BY %s", strings.Join(qf.GroupBy, ", ")) - // For UNION queries, we need to wrap the entire query in a subquery to apply GROUP BY - if strings.Contains(query, "UNION ALL") { - query = fmt.Sprintf("SELECT * FROM (%s) %s", query, groupByClause) - } else { - // For standard queries, just append GROUP BY - query += groupByClause - } - } - - // For UNION queries, ORDER BY and LIMIT are already applied to individual queries - // For standard queries, apply ORDER BY and LIMIT - if !strings.Contains(query, "UNION ALL") { - // Add ORDER BY clause - if qf.SortBy != "" { - query += fmt.Sprintf(" ORDER BY %s %s", qf.SortBy, qf.SortOrder) - } - - // Add limit clause - if qf.Page >= 0 && qf.Limit > 0 { - offset := qf.Page * qf.Limit - query += fmt.Sprintf(" LIMIT %d OFFSET %d", qf.Limit, offset) - } else if qf.Limit > 0 { - query += fmt.Sprintf(" LIMIT %d", qf.Limit) - } - } else { - // For UNION queries, we need to apply final LIMIT after the UNION - // This ensures we get exactly the requested number of results - if qf.Page >= 0 && qf.Limit > 0 { - offset := qf.Page * qf.Limit - query = fmt.Sprintf("SELECT * FROM (%s) LIMIT %d OFFSET %d", query, qf.Limit, offset) - } else if qf.Limit > 0 { - query = fmt.Sprintf("SELECT * FROM (%s) LIMIT %d", query, qf.Limit) - } - } - - // Add settings at the very end - // Build settings string for ClickHouse query optimization - var settings []string - if c.cfg.MaxQueryTime > 0 { - settings = append(settings, fmt.Sprintf("max_execution_time = %d", c.cfg.MaxQueryTime)) - } - if c.cfg.MaxMemoryUsage > 0 { - settings = append(settings, fmt.Sprintf("max_memory_usage = %d", c.cfg.MaxMemoryUsage)) - } - - if len(settings) > 0 { - query += " SETTINGS " + strings.Join(settings, ", ") - } - - return query -} - -func (c *ClickHouseConnector) buildWhereClauses(table string, qf QueryFilter) []string { - whereClauses := []string{} - - if qf.ChainId != nil && qf.ChainId.Sign() > 0 { - whereClauses = append(whereClauses, createFilterClause("chain_id", qf.ChainId.String())) - } - - blockNumbersClause := createBlockNumbersClause(qf.BlockNumbers) - if blockNumbersClause != "" { - whereClauses = append(whereClauses, blockNumbersClause) - } - - contractAddressClause := createContractAddressClause(table, qf.ContractAddress) - if contractAddressClause != "" { - whereClauses = append(whereClauses, contractAddressClause) - } - - // Skip wallet address clause for UNION queries as it's handled separately - if table != "transactions" && qf.WalletAddress != "" { - walletAddressClause := createWalletAddressClause(table, qf.WalletAddress) - if walletAddressClause != "" { - whereClauses = append(whereClauses, walletAddressClause) - } - } - - fromAddressClause := createFromAddressClause(table, qf.FromAddress) - if fromAddressClause != "" { - whereClauses = append(whereClauses, fromAddressClause) - } - - signatureClause := createSignatureClause(table, qf.Signature) - if signatureClause != "" { - whereClauses = append(whereClauses, signatureClause) - } - - // Add filter params - for key, value := range qf.FilterParams { - whereClauses = append(whereClauses, createFilterClause(key, strings.ToLower(value))) - } - - return whereClauses -} - -func createFilterClause(key, value string) string { - // if the key includes topic_0, topic_1, topic_2, topic_3, apply left padding to the value - if strings.Contains(key, "topic_") { - value = getTopicValueFormat(value) - } - - suffix := key[len(key)-3:] - switch suffix { - case "gte": - return fmt.Sprintf("%s >= '%s'", key[:len(key)-4], value) - case "lte": - return fmt.Sprintf("%s <= '%s'", key[:len(key)-4], value) - case "_lt": - return fmt.Sprintf("%s < '%s'", key[:len(key)-3], value) - case "_gt": - return fmt.Sprintf("%s > '%s'", key[:len(key)-3], value) - case "_ne": - return fmt.Sprintf("%s != '%s'", key[:len(key)-3], value) - case "_in": - return fmt.Sprintf("%s IN (%s)", key[:len(key)-3], value) - default: - return fmt.Sprintf("%s = '%s'", key, value) - } -} - -func createContractAddressClause(table, contractAddress string) string { - contractAddress = strings.ToLower(contractAddress) - // This needs to move to a query param that accept multiple addresses - if table == "logs" { - if contractAddress != "" { - return fmt.Sprintf("address = '%s'", contractAddress) - } - } else if table == "transactions" { - if contractAddress != "" { - return fmt.Sprintf("to_address = '%s'", contractAddress) - } - } - return "" -} - -func createWalletAddressClause(table, walletAddress string) string { - walletAddress = strings.ToLower(walletAddress) - if table != "transactions" || walletAddress == "" { - return "" - } - return fmt.Sprintf("(from_address = '%s' OR to_address = '%s')", walletAddress, walletAddress) -} - -func createFromAddressClause(table, fromAddress string) string { - if fromAddress == "" { - return "" - } - fromAddress = strings.ToLower(fromAddress) - if table == "transactions" { - return fmt.Sprintf("from_address = '%s'", fromAddress) - } - return "" -} - -func createBlockNumbersClause(blockNumbers []*big.Int) string { - if len(blockNumbers) > 0 { - return fmt.Sprintf("block_number IN (%s)", getBlockNumbersStringArray(blockNumbers)) - } - return "" -} - -func createSignatureClause(table, signature string) string { - if signature == "" { - return "" - } - if table == "logs" { - return fmt.Sprintf("topic_0 = '%s'", signature) - } else if table == "transactions" { - return fmt.Sprintf("function_selector = '%s'", signature) - } - return "" -} - -func getTopicValueFormat(topic string) string { - if topic == "" { - // if there is no indexed topic, indexer stores an empty string - // we shouldn't pad and hexify such an argument then - return "" - } - asBytes := ethereum.FromHex(topic) - // ensure the byte slice is exactly 32 bytes by left-padding with zeros - asPadded := ethereum.LeftPadBytes(asBytes, 32) - result := ethereum.BytesToHash(asPadded).Hex() - return result -} - -func scanTransaction(rows driver.Rows) (common.Transaction, error) { - var tx common.Transaction - err := rows.ScanStruct(&tx) - if err != nil { - return common.Transaction{}, fmt.Errorf("error scanning transaction: %w", err) - } - if tx.FunctionSelector == ZERO_BYTES_10 { - tx.FunctionSelector = "" - } - if tx.ToAddress == ZERO_BYTES_42 { - tx.ToAddress = "" - } - return tx, nil -} - -func scanLog(rows driver.Rows) (common.Log, error) { - var log common.Log - err := rows.ScanStruct(&log) - if err != nil { - return common.Log{}, fmt.Errorf("error scanning log: %w", err) - } - return log, nil -} - -func scanBlock(rows driver.Rows) (common.Block, error) { - var block common.Block - err := rows.ScanStruct(&block) - if err != nil { - return common.Block{}, fmt.Errorf("error scanning block: %w", err) - } - - if block.WithdrawalsRoot == ZERO_BYTES_66 { - block.WithdrawalsRoot = "" - } - - return block, nil -} - -func scanTrace(rows driver.Rows) (common.Trace, error) { - var trace common.Trace - err := rows.ScanStruct(&trace) - if err != nil { - return common.Trace{}, fmt.Errorf("error scanning trace: %w", err) - } - return trace, nil -} - -func (c *ClickHouseConnector) GetMaxBlockNumber(chainId *big.Int) (maxBlockNumber *big.Int, err error) { - tableName := c.getTableName(chainId, "blocks") - query := fmt.Sprintf("SELECT block_number FROM %s.%s WHERE chain_id = ? ORDER BY block_number DESC LIMIT 1", c.cfg.Database, tableName) - err = c.conn.QueryRow(context.Background(), query, chainId).Scan(&maxBlockNumber) - if err != nil { - if err == sql.ErrNoRows { - return big.NewInt(0), nil - } - return nil, err - } - return maxBlockNumber, nil -} - -func (c *ClickHouseConnector) GetMaxBlockNumberInRange(chainId *big.Int, startBlock *big.Int, endBlock *big.Int) (maxBlockNumber *big.Int, err error) { - tableName := c.getTableName(chainId, "blocks") - query := fmt.Sprintf("SELECT block_number FROM %s.%s WHERE chain_id = ? AND block_number >= ? AND block_number <= ? ORDER BY block_number DESC LIMIT 1", c.cfg.Database, tableName) - err = c.conn.QueryRow(context.Background(), query, chainId, startBlock, endBlock).Scan(&maxBlockNumber) - if err != nil { - if err == sql.ErrNoRows { - return big.NewInt(0), nil - } - return nil, err - } - return maxBlockNumber, nil -} - -func (c *ClickHouseConnector) GetBlockCount(chainId *big.Int, startBlock *big.Int, endBlock *big.Int) (blockCount *big.Int, err error) { - tableName := c.getTableName(chainId, "blocks") - query := fmt.Sprintf("SELECT COUNT(DISTINCT block_number) FROM %s.%s WHERE chain_id = ? AND block_number >= ? AND block_number <= ?", c.cfg.Database, tableName) - err = c.conn.QueryRow(context.Background(), query, chainId, startBlock, endBlock).Scan(&blockCount) - if err != nil { - if err == sql.ErrNoRows { - return big.NewInt(0), nil - } - return nil, err - } - return blockCount, nil -} - -func (c *ClickHouseConnector) getMaxBlockNumberConsistent(chainId *big.Int) (maxBlockNumber *big.Int, err error) { - tableName := c.getTableName(chainId, "blocks") - query := fmt.Sprintf("SELECT block_number FROM %s.%s WHERE chain_id = ? ORDER BY block_number DESC LIMIT 1 SETTINGS select_sequential_consistency = 1", c.cfg.Database, tableName) - err = c.conn.QueryRow(context.Background(), query, chainId).Scan(&maxBlockNumber) - if err != nil { - if err == sql.ErrNoRows { - return big.NewInt(0), nil - } - return nil, err - } - return maxBlockNumber, nil -} - -func getLimitClause(limit int) string { - if limit == 0 { - return "" - } - return fmt.Sprintf(" LIMIT %d", limit) -} - -func getBlockNumbersStringArray(blockNumbers []*big.Int) string { - blockNumbersString := "" - for _, blockNumber := range blockNumbers { - blockNumbersString += fmt.Sprintf("%s,", blockNumber.String()) - } - return blockNumbersString -} - -func (c *ClickHouseConnector) InsertStagingData(data []common.BlockData) error { - query := `INSERT INTO ` + c.cfg.Database + `.block_data (chain_id, block_number, data)` - batch, err := c.conn.PrepareBatch(context.Background(), query) - if err != nil { - return err - } - defer batch.Close() - for _, blockData := range data { - blockDataJSON, err := json.Marshal(blockData) - if err != nil { - return err - } - err = batch.Append( - blockData.Block.ChainId, - blockData.Block.Number, - blockDataJSON, - ) - if err != nil { - return err - } - } - return batch.Send() -} - -func (c *ClickHouseConnector) GetStagingData(qf QueryFilter) ([]common.BlockData, error) { - var query string - - if len(qf.BlockNumbers) > 0 { - query = fmt.Sprintf("SELECT data FROM %s.block_data FINAL WHERE block_number IN (%s) AND is_deleted = 0", - c.cfg.Database, getBlockNumbersStringArray(qf.BlockNumbers)) - } else if qf.StartBlock != nil && qf.EndBlock != nil { - query = fmt.Sprintf("SELECT data FROM %s.block_data FINAL WHERE block_number BETWEEN %s AND %s AND is_deleted = 0", - c.cfg.Database, qf.StartBlock.String(), qf.EndBlock.String()) - } else { - return nil, fmt.Errorf("either BlockNumbers or StartBlock/EndBlock must be provided") - } - - if qf.ChainId.Sign() != 0 { - query += fmt.Sprintf(" AND chain_id = %s", qf.ChainId.String()) - } - - query += getLimitClause(int(qf.Limit)) - - rows, err := c.conn.Query(context.Background(), query) - if err != nil { - return nil, err - } - defer rows.Close() - - blockDataList := make([]common.BlockData, 0) - for rows.Next() { - var blockDataJson string - err := rows.Scan( - &blockDataJson, - ) - if err != nil { - zLog.Error().Err(err).Msg("Error scanning block data") - return nil, err - } - blockData := common.BlockData{} - err = json.Unmarshal([]byte(blockDataJson), &blockData) - if err != nil { - return nil, err - } - blockDataList = append(blockDataList, blockData) - } - return blockDataList, nil -} - -func (c *ClickHouseConnector) GetLastPublishedBlockNumber(chainId *big.Int) (*big.Int, error) { - query := fmt.Sprintf("SELECT cursor_value FROM %s.cursors FINAL WHERE cursor_type = 'publish'", c.cfg.Database) - if chainId.Sign() > 0 { - query += fmt.Sprintf(" AND chain_id = %s", chainId.String()) - } - var blockNumberString string - err := c.conn.QueryRow(context.Background(), query).Scan(&blockNumberString) - if err != nil { - if err == sql.ErrNoRows { - return big.NewInt(0), nil - } - return nil, err - } - blockNumber, ok := new(big.Int).SetString(blockNumberString, 10) - if !ok { - return nil, fmt.Errorf("failed to parse block number: %s", blockNumberString) - } - return blockNumber, nil -} - -func (c *ClickHouseConnector) SetLastPublishedBlockNumber(chainId *big.Int, blockNumber *big.Int) error { - query := fmt.Sprintf("INSERT INTO %s.cursors (chain_id, cursor_type, cursor_value) VALUES (%s, 'publish', '%s')", c.cfg.Database, chainId, blockNumber.String()) - return c.conn.Exec(context.Background(), query) -} - -func (c *ClickHouseConnector) GetLastCommittedBlockNumber(chainId *big.Int) (*big.Int, error) { - query := fmt.Sprintf("SELECT cursor_value FROM %s.cursors FINAL WHERE cursor_type = 'commit'", c.cfg.Database) - if chainId.Sign() > 0 { - query += fmt.Sprintf(" AND chain_id = %s", chainId.String()) - } - var blockNumberString string - err := c.conn.QueryRow(context.Background(), query).Scan(&blockNumberString) - if err != nil { - if err == sql.ErrNoRows { - return big.NewInt(0), nil - } - return nil, err - } - blockNumber, ok := new(big.Int).SetString(blockNumberString, 10) - if !ok { - return nil, fmt.Errorf("failed to parse block number: %s", blockNumberString) - } - return blockNumber, nil -} - -func (c *ClickHouseConnector) SetLastCommittedBlockNumber(chainId *big.Int, blockNumber *big.Int) error { - query := fmt.Sprintf("INSERT INTO %s.cursors (chain_id, cursor_type, cursor_value) VALUES (%s, 'commit', '%s')", c.cfg.Database, chainId, blockNumber.String()) - return c.conn.Exec(context.Background(), query) -} - -func (c *ClickHouseConnector) GetLastReorgCheckedBlockNumber(chainId *big.Int) (*big.Int, error) { - query := fmt.Sprintf("SELECT cursor_value FROM %s.cursors FINAL WHERE cursor_type = 'reorg'", c.cfg.Database) - if chainId.Sign() > 0 { - query += fmt.Sprintf(" AND chain_id = %s", chainId.String()) - } - var blockNumberString string - err := c.conn.QueryRow(context.Background(), query).Scan(&blockNumberString) - if err != nil { - return nil, err - } - blockNumber, ok := new(big.Int).SetString(blockNumberString, 10) - if !ok { - return nil, fmt.Errorf("failed to parse block number: %s", blockNumberString) - } - return blockNumber, nil -} - -func (c *ClickHouseConnector) SetLastReorgCheckedBlockNumber(chainId *big.Int, blockNumber *big.Int) error { - query := fmt.Sprintf("INSERT INTO %s.cursors (chain_id, cursor_type, cursor_value) VALUES (%s, 'reorg', '%s')", c.cfg.Database, chainId, blockNumber.String()) - err := c.conn.Exec(context.Background(), query) - return err -} - -func (c *ClickHouseConnector) GetBlockHeadersDescending(chainId *big.Int, from *big.Int, to *big.Int) (blockHeaders []common.BlockHeader, err error) { - tableName := c.getTableName(chainId, "blocks") - query := fmt.Sprintf("SELECT block_number, hash, parent_hash FROM %s.%s FINAL WHERE chain_id = ? AND block_number >= ? AND block_number <= ? ORDER BY block_number DESC", c.cfg.Database, tableName) - - rows, err := c.conn.Query(context.Background(), query, chainId, from, to) - if err != nil { - return nil, err - } - defer rows.Close() - - for rows.Next() { - var blockHeader common.BlockHeader - err := rows.Scan(&blockHeader.Number, &blockHeader.Hash, &blockHeader.ParentHash) - if err != nil { - return nil, err - } - blockHeaders = append(blockHeaders, blockHeader) - } - return blockHeaders, nil -} - -func (c *ClickHouseConnector) ReplaceBlockData(data []common.BlockData) ([]common.BlockData, error) { - if len(data) == 0 { - return nil, nil - } - chainId := data[0].Block.ChainId - - var fetchErr error - var fetchErrMutex sync.Mutex - var deletedDataMutex sync.Mutex - var wg sync.WaitGroup - wg.Add(4) - // Create a map to store block data that will be deleted - deletedBlockDataByNumber := make(map[string]common.BlockData) - - blockNumbers := make([]*big.Int, len(data)) - for i, blockData := range data { - blockNumbers[i] = blockData.Block.Number - } - go func() { - defer wg.Done() - blocksQueryResult, err := c.GetBlocks(QueryFilter{ - ChainId: chainId, - BlockNumbers: blockNumbers, - ForceConsistentData: true, - }, "*") - if err != nil { - fetchErrMutex.Lock() - fetchErr = fmt.Errorf("error fetching blocks: %v", err) - fetchErrMutex.Unlock() - } - for _, block := range blocksQueryResult.Data { - deletedDataMutex.Lock() - deletedData := deletedBlockDataByNumber[block.Number.String()] - block.Sign = -1 - deletedData.Block = block - deletedBlockDataByNumber[block.Number.String()] = deletedData - deletedDataMutex.Unlock() - } - }() - - go func() { - defer wg.Done() - logsQueryResult, err := c.GetLogs(QueryFilter{ - ChainId: chainId, - BlockNumbers: blockNumbers, - ForceConsistentData: true, - }, "*") - if err != nil { - fetchErrMutex.Lock() - fetchErr = fmt.Errorf("error fetching logs: %v", err) - fetchErrMutex.Unlock() - } - for _, log := range logsQueryResult.Data { - deletedDataMutex.Lock() - deletedData := deletedBlockDataByNumber[log.BlockNumber.String()] - log.Sign = -1 - deletedData.Logs = append(deletedData.Logs, log) - deletedBlockDataByNumber[log.BlockNumber.String()] = deletedData - deletedDataMutex.Unlock() - } - }() - - go func() { - defer wg.Done() - txsQueryResult, err := c.GetTransactions(QueryFilter{ - ChainId: chainId, - BlockNumbers: blockNumbers, - ForceConsistentData: true, - }, "*") - if err != nil { - fetchErrMutex.Lock() - fetchErr = fmt.Errorf("error fetching transactions: %v", err) - fetchErrMutex.Unlock() - } - for _, tx := range txsQueryResult.Data { - deletedDataMutex.Lock() - deletedData := deletedBlockDataByNumber[tx.BlockNumber.String()] - tx.Sign = -1 - deletedData.Transactions = append(deletedData.Transactions, tx) - deletedBlockDataByNumber[tx.BlockNumber.String()] = deletedData - deletedDataMutex.Unlock() - } - }() - - go func() { - defer wg.Done() - tracesQueryResult, err := c.GetTraces(QueryFilter{ - ChainId: chainId, - BlockNumbers: blockNumbers, - ForceConsistentData: true, - }, "*") - if err != nil { - fetchErrMutex.Lock() - fetchErr = fmt.Errorf("error fetching traces: %v", err) - fetchErrMutex.Unlock() - } - for _, trace := range tracesQueryResult.Data { - deletedDataMutex.Lock() - deletedData := deletedBlockDataByNumber[trace.BlockNumber.String()] - trace.Sign = -1 - deletedData.Traces = append(deletedData.Traces, trace) - deletedBlockDataByNumber[trace.BlockNumber.String()] = deletedData - deletedDataMutex.Unlock() - } - }() - - wg.Wait() - - if fetchErr != nil { - return nil, fetchErr - } - deletedBlockData := make([]common.BlockData, 0, len(deletedBlockDataByNumber)) - for _, deletedData := range deletedBlockDataByNumber { - deletedBlockData = append(deletedBlockData, deletedData) - data = append(data, deletedData) - } - - insertErr := c.InsertBlockData(data) - if insertErr != nil { - return nil, insertErr - } - return deletedBlockData, nil -} - -func (c *ClickHouseConnector) InsertBlockData(data []common.BlockData) error { - if len(data) == 0 { - return nil - } - - chainId := data[0].Block.ChainId - tableName := c.getTableName(chainId, "inserts_null_table") - columns := []string{ - "chain_id", "block", "transactions", "logs", "traces", "sign", "insert_timestamp", - } - query := fmt.Sprintf("INSERT INTO %s.%s (%s)", c.cfg.Database, tableName, strings.Join(columns, ", ")) - for i := 0; i < len(data); i += c.cfg.MaxRowsPerInsert { - end := i + c.cfg.MaxRowsPerInsert - if end > len(data) { - end = len(data) - } - - batch, err := c.conn.PrepareBatch(context.Background(), query) - if err != nil { - return err - } - defer batch.Close() - - txsCount := 0 - logsCount := 0 - tracesCount := 0 - for _, blockData := range data[i:end] { - block := blockData.Block - - // Prepare block tuple - blockTuple := []interface{}{ - block.Number, - block.Timestamp, - block.Hash, - block.ParentHash, - block.Sha3Uncles, - block.Nonce, - block.MixHash, - block.Miner, - block.StateRoot, - block.TransactionsRoot, - block.ReceiptsRoot, - block.LogsBloom, - block.Size, - block.ExtraData, - block.Difficulty, - block.TotalDifficulty, - block.TransactionCount, - block.GasLimit, - block.GasUsed, - block.WithdrawalsRoot, - block.BaseFeePerGas, - } - - // Prepare transactions array - transactions := make([][]interface{}, len(blockData.Transactions)) - txsCount += len(blockData.Transactions) - for j, tx := range blockData.Transactions { - transactions[j] = []interface{}{ - tx.Hash, - tx.Nonce, - tx.BlockHash, - tx.BlockNumber, - tx.BlockTimestamp, - tx.TransactionIndex, - tx.FromAddress, - tx.ToAddress, - tx.Value, - tx.Gas, - tx.GasPrice, - tx.Data, - tx.FunctionSelector, - tx.MaxFeePerGas, - tx.MaxPriorityFeePerGas, - tx.MaxFeePerBlobGas, - tx.BlobVersionedHashes, - tx.TransactionType, - tx.R, - tx.S, - tx.V, - tx.AccessListJson, - tx.AuthorizationListJson, - tx.ContractAddress, - tx.GasUsed, - tx.CumulativeGasUsed, - tx.EffectiveGasPrice, - tx.BlobGasUsed, - tx.BlobGasPrice, - tx.LogsBloom, - tx.Status, - } - } - - // Prepare logs array - logs := make([][]interface{}, len(blockData.Logs)) - logsCount += len(blockData.Logs) - for j, log := range blockData.Logs { - logs[j] = []interface{}{ - log.BlockNumber, - log.BlockHash, - log.BlockTimestamp, - log.TransactionHash, - log.TransactionIndex, - log.LogIndex, - log.Address, - log.Data, - log.Topic0, - log.Topic1, - log.Topic2, - log.Topic3, - } - } - - // Prepare traces array - traces := make([][]interface{}, len(blockData.Traces)) - tracesCount += len(blockData.Traces) - for j, trace := range blockData.Traces { - traces[j] = []interface{}{ - trace.BlockNumber, - trace.BlockHash, - trace.BlockTimestamp, - trace.TransactionHash, - trace.TransactionIndex, - trace.Subtraces, - trace.TraceAddress, - trace.TraceType, - trace.CallType, - trace.Error, - trace.FromAddress, - trace.ToAddress, - trace.Gas, - trace.GasUsed, - trace.Input, - trace.Output, - trace.Value, - trace.Author, - trace.RewardType, - trace.RefundAddress, - } - } - - sign := int8(1) - if block.Sign == -1 { - sign = block.Sign - } - insertTimestamp := time.Now() - if !block.InsertTimestamp.IsZero() { - insertTimestamp = block.InsertTimestamp - } - // Append the row to the batch - if err := batch.Append( - block.ChainId, - blockTuple, - transactions, - logs, - traces, - sign, - insertTimestamp, - ); err != nil { - return err - } - } - - if err := batch.Send(); err != nil { - // if insert errors, it can actually still succeed in the background - // so we need to check if the consistent highest block matches the batch before we return an error - var highestBlockInBatch *big.Int - for _, blockData := range data[i:end] { - if highestBlockInBatch == nil || blockData.Block.Number.Cmp(highestBlockInBatch) > 0 { - highestBlockInBatch = blockData.Block.Number - } - } - - time.Sleep(500 * time.Millisecond) - - // Check if this matches the max consistent block - maxConsistentBlock, maxBlockErr := c.getMaxBlockNumberConsistent(chainId) - if maxBlockErr != nil || maxConsistentBlock.Cmp(highestBlockInBatch) != 0 { - if maxBlockErr != nil { - zLog.Error().Err(maxBlockErr).Msgf("Error getting consistent max block number for chain %s", chainId.String()) - } - return err - } else { - zLog.Info().Err(err).Msgf("Failure while inserting block data, but insert still succeeded") - } - } - metrics.ClickHouseTransactionsInserted.Add(float64(txsCount)) - metrics.ClickHouseLogsInserted.Add(float64(logsCount)) - metrics.ClickHouseTracesInserted.Add(float64(tracesCount)) - metrics.ClickHouseMainStorageRowsInserted.Add(float64(end - i)) - metrics.ClickHouseMainStorageInsertOperations.Inc() - } - - return nil -} - -func mapClickHouseTypeToGoType(dbType string) interface{} { - // Handle LowCardinality types - if strings.HasPrefix(dbType, "LowCardinality(") { - dbType = dbType[len("LowCardinality(") : len(dbType)-1] - } - - // Handle Nullable types - isNullable := false - if strings.HasPrefix(dbType, "Nullable(") { - isNullable = true - dbType = dbType[len("Nullable(") : len(dbType)-1] - } - - // Handle Array types - if strings.HasPrefix(dbType, "Array(") { - elementType := dbType[len("Array(") : len(dbType)-1] - // For arrays, we'll use slices of pointers to the element type - switch elementType { - case "String", "FixedString": - return new([]*string) - case "Int8", "Int16", "Int32", "Int64": - return new([]*int64) - case "UInt8", "UInt16", "UInt32", "UInt64": - return new([]*uint64) - case "Float32", "Float64": - return new([]*float64) - case "Decimal", "Decimal32", "Decimal64", "Decimal128", "Decimal256": - return new([]*big.Float) - // Add more cases as needed - default: - return new([]interface{}) - } - } - - // Handle parameterized types by extracting the base type - baseType := dbType - if idx := strings.Index(dbType, "("); idx != -1 { - baseType = dbType[:idx] - } - - // Map basic data types - switch baseType { - // Signed integers - case "Int8": - if isNullable { - return new(*int8) - } - return new(int8) - case "Int16": - if isNullable { - return new(*int16) - } - return new(int16) - case "Int32": - if isNullable { - return new(*int32) - } - return new(int32) - case "Int64": - if isNullable { - return new(*int64) - } - return new(int64) - // Unsigned integers - case "UInt8": - if isNullable { - return new(*uint8) - } - return new(uint8) - case "UInt16": - if isNullable { - return new(*uint16) - } - return new(uint16) - case "UInt32": - if isNullable { - return new(*uint32) - } - return new(uint32) - case "UInt64": - if isNullable { - return new(*uint64) - } - return new(uint64) - // Floating-point numbers - case "Float32": - if isNullable { - return new(*float32) - } - return new(float32) - case "Float64": - if isNullable { - return new(*float64) - } - return new(float64) - // Decimal types - case "Decimal", "Decimal32", "Decimal64", "Decimal128", "Decimal256": - if isNullable { - return new(*big.Float) - } - return new(big.Float) - // String types - case "String", "FixedString", "UUID", "IPv4", "IPv6": - if isNullable { - return new(*string) - } - return new(string) - // Enums - case "Enum8", "Enum16": - if isNullable { - return new(*string) - } - return new(string) - // Date and time types - case "Date", "Date32", "DateTime", "DateTime64": - if isNullable { - return new(*time.Time) - } - return new(time.Time) - // Big integers - case "Int128", "UInt128", "Int256", "UInt256": - if isNullable { - return new(*big.Int) - } - return new(big.Int) - default: - // For unknown types, use interface{} - return new(interface{}) - } -} - -type BigInt struct { - big.Int -} - -func (b BigInt) MarshalJSON() ([]byte, error) { - return []byte(`"` + b.String() + `"`), nil -} - -func getUnderlyingValue(valuePtr interface{}) interface{} { - v := reflect.ValueOf(valuePtr) - - // Handle nil values - if !v.IsValid() { - return nil - } - - // Handle pointers and interfaces - for { - if v.Kind() == reflect.Ptr || v.Kind() == reflect.Interface { - if v.IsNil() { - return nil - } - v = v.Elem() - continue - } - break - } - - return v.Interface() -} - -func (c *ClickHouseConnector) getChainSpecificFields(chainId *big.Int, entityType string, defaultFields []string) []string { - if c.cfg.ChainBasedConfig == nil { - return defaultFields - } - - chainFields, exists := c.cfg.ChainBasedConfig[chainId.String()] - if !exists { - return defaultFields - } - - config, exists := chainFields[entityType] - if !exists { - return defaultFields - } - - if len(config.DefaultSelectFields) > 0 { - return config.DefaultSelectFields - } - - return defaultFields -} - -func (c *ClickHouseConnector) getTableName(chainId *big.Int, defaultTable string) string { - if c.cfg.ChainBasedConfig == nil { - return defaultTable - } - - chainFields, exists := c.cfg.ChainBasedConfig[chainId.String()] - if !exists { - return defaultTable - } - - config, exists := chainFields[defaultTable] - if !exists { - return defaultTable - } - - if len(config.TableName) > 0 { - return config.TableName - } - - return defaultTable -} - -func (c *ClickHouseConnector) GetTokenTransfers(qf TransfersQueryFilter, fields ...string) (QueryResult[common.TokenTransfer], error) { - columns := "token_type, chain_id, token_address, from_address, to_address, block_number, block_timestamp, transaction_hash, token_id, amount, log_index, sign, insert_timestamp" - if len(fields) > 0 { - columns = strings.Join(fields, ", ") - } - query := fmt.Sprintf("SELECT %s FROM %s.token_transfers WHERE chain_id = ?", columns, c.cfg.Database) - - if len(qf.TokenTypes) > 0 { - tokenTypesStr := "" - tokenTypesLen := len(qf.TokenTypes) - for i := 0; i < tokenTypesLen-1; i++ { - tokenTypesStr += fmt.Sprintf("'%s',", qf.TokenTypes[i]) - } - tokenTypesStr += fmt.Sprintf("'%s'", qf.TokenTypes[tokenTypesLen-1]) - query += fmt.Sprintf(" AND token_type in (%s)", tokenTypesStr) - } - - if qf.WalletAddress != "" { - query += fmt.Sprintf(" AND (from_address = '%s' OR to_address = '%s')", qf.WalletAddress, qf.WalletAddress) - } - if qf.TokenAddress != "" { - query += fmt.Sprintf(" AND token_address = '%s'", qf.TokenAddress) - } - if qf.TransactionHash != "" { - query += fmt.Sprintf(" AND transaction_hash = '%s'", qf.TransactionHash) - } - - if len(qf.TokenIds) > 0 { - tokenIdsStr := "" - tokenIdsLen := len(qf.TokenIds) - for i := 0; i < tokenIdsLen-1; i++ { - tokenIdsStr += fmt.Sprintf("%s,", qf.TokenIds[i].String()) - } - tokenIdsStr += qf.TokenIds[tokenIdsLen-1].String() - query += fmt.Sprintf(" AND token_id in (%s)", tokenIdsStr) - } - - if qf.StartBlockNumber != nil { - query += fmt.Sprintf(" AND block_number >= %s", qf.StartBlockNumber.String()) - } - if qf.EndBlockNumber != nil { - query += fmt.Sprintf(" AND block_number <= %s", qf.EndBlockNumber.String()) - } - - if len(qf.GroupBy) > 0 { - query += fmt.Sprintf(" GROUP BY %s", strings.Join(qf.GroupBy, ", ")) - } - - // Add ORDER BY clause - if qf.SortBy != "" { - query += fmt.Sprintf(" ORDER BY %s %s", qf.SortBy, qf.SortOrder) - } - - // Add limit clause - if qf.Page >= 0 && qf.Limit > 0 { - offset := qf.Page * qf.Limit - query += fmt.Sprintf(" LIMIT %d OFFSET %d", qf.Limit, offset) - } else if qf.Limit > 0 { - query += fmt.Sprintf(" LIMIT %d", qf.Limit) - } - rows, err := c.conn.Query(context.Background(), query, qf.ChainId) - if err != nil { - return QueryResult[common.TokenTransfer]{}, err - } - defer rows.Close() - - queryResult := QueryResult[common.TokenTransfer]{ - Data: []common.TokenTransfer{}, - } - - for rows.Next() { - var tt common.TokenTransfer - err := rows.ScanStruct(&tt) - if err != nil { - return QueryResult[common.TokenTransfer]{}, err - } - queryResult.Data = append(queryResult.Data, tt) - } - - return queryResult, nil -} - -func (c *ClickHouseConnector) GetTokenBalances(qf BalancesQueryFilter, fields ...string) (QueryResult[common.TokenBalance], error) { - columns := "chain_id, token_type, address, owner, token_id, balance" - if len(fields) > 0 { - columns = strings.Join(fields, ", ") - } - query := fmt.Sprintf("SELECT %s FROM %s.token_balances WHERE chain_id = ?", columns, c.cfg.Database) - - if len(qf.TokenTypes) > 0 { - tokenTypesStr := "" - tokenTypesLen := len(qf.TokenTypes) - for i := 0; i < tokenTypesLen-1; i++ { - tokenTypesStr += fmt.Sprintf("'%s',", qf.TokenTypes[i]) - } - tokenTypesStr += fmt.Sprintf("'%s'", qf.TokenTypes[tokenTypesLen-1]) - query += fmt.Sprintf(" AND token_type in (%s)", tokenTypesStr) - } - - if qf.Owner != "" { - query += fmt.Sprintf(" AND owner = '%s'", qf.Owner) - } - if qf.TokenAddress != "" { - query += fmt.Sprintf(" AND address = '%s'", qf.TokenAddress) - } - - if len(qf.TokenIds) > 0 { - tokenIdsStr := "" - tokenIdsLen := len(qf.TokenIds) - for i := 0; i < tokenIdsLen-1; i++ { - tokenIdsStr += fmt.Sprintf("%s,", qf.TokenIds[i].String()) - } - tokenIdsStr += qf.TokenIds[tokenIdsLen-1].String() - query += fmt.Sprintf(" AND token_id in (%s)", tokenIdsStr) - } - - isBalanceAggregated := false - for _, field := range fields { - if strings.Contains(field, "balance") && strings.TrimSpace(field) != "balance" { - isBalanceAggregated = true - break - } - } - balanceCondition := ">=" - if qf.ZeroBalance { - balanceCondition = ">" - } - if !isBalanceAggregated { - query += fmt.Sprintf(" AND balance %s 0", balanceCondition) - } - - if len(qf.GroupBy) > 0 { - query += fmt.Sprintf(" GROUP BY %s", strings.Join(qf.GroupBy, ", ")) - - if isBalanceAggregated { - query += fmt.Sprintf(" HAVING balance %s 0", balanceCondition) - } - } - - // Add ORDER BY clause - if qf.SortBy != "" { - query += fmt.Sprintf(" ORDER BY %s %s", qf.SortBy, qf.SortOrder) - } - - // Add limit clause - if qf.Page >= 0 && qf.Limit > 0 { - offset := qf.Page * qf.Limit - query += fmt.Sprintf(" LIMIT %d OFFSET %d", qf.Limit, offset) - } else if qf.Limit > 0 { - query += fmt.Sprintf(" LIMIT %d", qf.Limit) - } - - rows, err := c.conn.Query(context.Background(), query, qf.ChainId) - if err != nil { - return QueryResult[common.TokenBalance]{}, err - } - defer rows.Close() - - queryResult := QueryResult[common.TokenBalance]{ - Data: []common.TokenBalance{}, - } - - for rows.Next() { - var tb common.TokenBalance - err := rows.ScanStruct(&tb) - if err != nil { - return QueryResult[common.TokenBalance]{}, err - } - queryResult.Data = append(queryResult.Data, tb) - } - - return queryResult, nil -} - -func (c *ClickHouseConnector) GetValidationBlockData(chainId *big.Int, startBlock *big.Int, endBlock *big.Int) (blocks []common.BlockData, err error) { - if startBlock == nil || endBlock == nil { - return nil, fmt.Errorf("start block and end block must not be nil") - } - - if startBlock.Cmp(endBlock) > 0 { - return nil, fmt.Errorf("start block must be less than or equal to end block") - } - - blockNumbers := make([]*big.Int, 0) - for i := new(big.Int).Set(startBlock); i.Cmp(endBlock) <= 0; i.Add(i, big.NewInt(1)) { - blockNumbers = append(blockNumbers, new(big.Int).Set(i)) - } - // Get blocks, logs and transactions concurrently - type blockResult struct { - blocks []common.Block - err error - } - - type logResult struct { - logMap map[string][]common.Log // blockNumber -> logs - err error - } - - type txResult struct { - txMap map[string][]common.Transaction // blockNumber -> transactions - err error - } - - blocksChan := make(chan blockResult) - logsChan := make(chan logResult) - txsChan := make(chan txResult) - - // Launch goroutines for concurrent fetching - go func() { - blocksResult, err := c.GetBlocks(QueryFilter{ - ChainId: chainId, - BlockNumbers: blockNumbers, - ForceConsistentData: true, - }, "chain_id", "block_number", "transactions_root", "receipts_root", "logs_bloom", "transaction_count") - blocksChan <- blockResult{blocks: blocksResult.Data, err: err} - }() - - go func() { - logsResult, err := c.GetLogs(QueryFilter{ - ChainId: chainId, - BlockNumbers: blockNumbers, - ForceConsistentData: true, - }, "chain_id", "block_number", "address", "log_index", "topic_0", "topic_1", "topic_2", "topic_3") - if err != nil { - logsChan <- logResult{err: err} - return - } - - // Pre-organize logs by block number - logMap := make(map[string][]common.Log) - for _, log := range logsResult.Data { - blockNum := log.BlockNumber.String() - logMap[blockNum] = append(logMap[blockNum], log) - } - logsChan <- logResult{logMap: logMap} - }() - - go func() { - transactionsResult, err := c.GetTransactions(QueryFilter{ - ChainId: chainId, - BlockNumbers: blockNumbers, - ForceConsistentData: true, - }, "chain_id", "block_number", "nonce", "transaction_index", "to_address", "value", "gas", "gas_price", "data", "max_fee_per_gas", "max_priority_fee_per_gas", "max_fee_per_blob_gas", "blob_versioned_hashes", "transaction_type", "r", "s", "v", "access_list", "authorization_list", "blob_gas_used", "blob_gas_price") - if err != nil { - txsChan <- txResult{err: err} - return - } - - // Pre-organize transactions by block number - txMap := make(map[string][]common.Transaction) - for _, tx := range transactionsResult.Data { - blockNum := tx.BlockNumber.String() - txMap[blockNum] = append(txMap[blockNum], tx) - } - txsChan <- txResult{txMap: txMap} - }() - - // Wait for all results - blocksResult := <-blocksChan - logsResult := <-logsChan - txsResult := <-txsChan - - // Check for errors - if blocksResult.err != nil { - return nil, fmt.Errorf("error fetching blocks: %v", blocksResult.err) - } - if logsResult.err != nil { - return nil, fmt.Errorf("error fetching logs: %v", logsResult.err) - } - if txsResult.err != nil { - return nil, fmt.Errorf("error fetching transactions: %v", txsResult.err) - } - - // Build BlockData slice - blockData := make([]common.BlockData, len(blocksResult.blocks)) - - // Build BlockData for each block - for i, block := range blocksResult.blocks { - blockNum := block.Number.String() - blockData[i] = common.BlockData{ - Block: block, - Logs: logsResult.logMap[blockNum], - Transactions: txsResult.txMap[blockNum], - } - } - - return blockData, nil -} - -func (c *ClickHouseConnector) FindMissingBlockNumbers(chainId *big.Int, startBlock *big.Int, endBlock *big.Int) (blockNumbers []*big.Int, err error) { - tableName := c.getTableName(chainId, "blocks") - query := fmt.Sprintf(` - WITH sequence AS ( - SELECT - {startBlock:UInt256} + number AS expected_block_number - FROM - numbers(toUInt64({endBlock:UInt256} - {startBlock:UInt256} + 1)) - ), - existing_blocks AS ( - SELECT DISTINCT - block_number - FROM - %s FINAL - WHERE - chain_id = {chainId:UInt256} - AND block_number >= {startBlock:UInt256} - AND block_number <= {endBlock:UInt256} - ) - SELECT - s.expected_block_number AS missing_block_number - FROM - sequence s - LEFT JOIN - existing_blocks e ON s.expected_block_number = e.block_number - WHERE - e.block_number = 0 - ORDER BY - missing_block_number - `, tableName) - rows, err := c.conn.Query(context.Background(), query, clickhouse.Named("chainId", chainId.String()), clickhouse.Named("startBlock", startBlock.String()), clickhouse.Named("endBlock", endBlock.String())) - if err != nil { - return nil, err - } - defer rows.Close() - - for rows.Next() { - var blockNumber *big.Int - err := rows.Scan(&blockNumber) - if err != nil { - return nil, err - } - blockNumbers = append(blockNumbers, blockNumber) - } - return blockNumbers, nil -} - -func (c *ClickHouseConnector) GetFullBlockData(chainId *big.Int, blockNumbers []*big.Int) (blocks []common.BlockData, err error) { - // Get blocks, logs and transactions concurrently - type blockResult struct { - blocks []common.Block - err error - } - - type logResult struct { - logMap map[string][]common.Log // blockNumber -> logs - err error - } - - type txResult struct { - txMap map[string][]common.Transaction // blockNumber -> transactions - err error - } - - type traceResult struct { - traceMap map[string][]common.Trace // blockNumber -> traces - err error - } - - blocksChan := make(chan blockResult) - logsChan := make(chan logResult) - txsChan := make(chan txResult) - tracesChan := make(chan traceResult) - - // Launch goroutines for concurrent fetching - go func() { - blocksResult, err := c.GetBlocks(QueryFilter{ - ChainId: chainId, - BlockNumbers: blockNumbers, - ForceConsistentData: true, - }) - blocksChan <- blockResult{blocks: blocksResult.Data, err: err} - }() - - go func() { - logsResult, err := c.GetLogs(QueryFilter{ - ChainId: chainId, - BlockNumbers: blockNumbers, - ForceConsistentData: true, - }) - if err != nil { - logsChan <- logResult{err: err} - return - } - - // Pre-organize logs by block number - logMap := make(map[string][]common.Log) - for _, log := range logsResult.Data { - blockNum := log.BlockNumber.String() - logMap[blockNum] = append(logMap[blockNum], log) - } - logsChan <- logResult{logMap: logMap} - }() - - go func() { - transactionsResult, err := c.GetTransactions(QueryFilter{ - ChainId: chainId, - BlockNumbers: blockNumbers, - ForceConsistentData: true, - }) - if err != nil { - txsChan <- txResult{err: err} - return - } - - // Pre-organize transactions by block number - txMap := make(map[string][]common.Transaction) - for _, tx := range transactionsResult.Data { - blockNum := tx.BlockNumber.String() - txMap[blockNum] = append(txMap[blockNum], tx) - } - txsChan <- txResult{txMap: txMap} - }() - - go func() { - tracesResult, err := c.GetTraces(QueryFilter{ - ChainId: chainId, - BlockNumbers: blockNumbers, - ForceConsistentData: true, - }) - if err != nil { - tracesChan <- traceResult{err: err} - return - } - - traceMap := make(map[string][]common.Trace) - for _, trace := range tracesResult.Data { - blockNum := trace.BlockNumber.String() - traceMap[blockNum] = append(traceMap[blockNum], trace) - } - tracesChan <- traceResult{traceMap: traceMap} - }() - - // Wait for all results - blocksResult := <-blocksChan - logsResult := <-logsChan - txsResult := <-txsChan - tracesResult := <-tracesChan - - // Check for errors - if blocksResult.err != nil { - return nil, fmt.Errorf("error fetching blocks: %v", blocksResult.err) - } - if logsResult.err != nil { - return nil, fmt.Errorf("error fetching logs: %v", logsResult.err) - } - if txsResult.err != nil { - return nil, fmt.Errorf("error fetching transactions: %v", txsResult.err) - } - if tracesResult.err != nil { - return nil, fmt.Errorf("error fetching traces: %v", tracesResult.err) - } - - // Build BlockData slice - blockData := make([]common.BlockData, len(blocksResult.blocks)) - - // Build BlockData for each block - for i, block := range blocksResult.blocks { - blockNum := block.Number.String() - blockData[i] = common.BlockData{ - Block: block, - Logs: logsResult.logMap[blockNum], - Transactions: txsResult.txMap[blockNum], - Traces: tracesResult.traceMap[blockNum], - } - } - - return blockData, nil -} - -func (c *ClickHouseConnector) DeleteStagingDataOlderThan(chainId *big.Int, blockNumber *big.Int) error { - query := fmt.Sprintf(` - INSERT INTO %s.block_data (chain_id, block_number, is_deleted) - SELECT chain_id, block_number, 1 - FROM %s.block_data - WHERE chain_id = ? AND block_number <= ? AND is_deleted = 0 - GROUP BY chain_id, block_number - `, c.cfg.Database, c.cfg.Database) - return c.conn.Exec(context.Background(), query, chainId, blockNumber) -} - -// GetStagingDataBlockRange returns the minimum and maximum block numbers stored for a given chain -func (c *ClickHouseConnector) GetStagingDataBlockRange(chainId *big.Int) (*big.Int, *big.Int, error) { - query := fmt.Sprintf(` - SELECT MIN(block_number) AS min_block, MAX(block_number) AS max_block - FROM %s.block_data FINAL - WHERE chain_id = ? AND is_deleted = 0 - `, c.cfg.Database) - - var minBlock, maxBlock *big.Int - err := c.conn.QueryRow(context.Background(), query, chainId).Scan(&minBlock, &maxBlock) - if err != nil { - if err == sql.ErrNoRows { - return nil, nil, nil - } - return nil, nil, err - } - - // If either min or max is nil (no data), return nil for both - if minBlock == nil || maxBlock == nil { - return nil, nil, nil - } - - return minBlock, maxBlock, nil -} - -// Helper function to test query generation -func (c *ClickHouseConnector) TestQueryGeneration(table, columns string, qf QueryFilter) string { - return c.buildQuery(table, columns, qf) -} diff --git a/internal/storage/clickhouse_connector_test.go b/internal/storage/clickhouse_connector_test.go deleted file mode 100644 index fc17ced7..00000000 --- a/internal/storage/clickhouse_connector_test.go +++ /dev/null @@ -1,277 +0,0 @@ -package storage - -import ( - "math/big" - "reflect" - "strings" - "testing" - "time" - - config "github.com/thirdweb-dev/indexer/configs" -) - -// TestMapClickHouseTypeToGoType tests the mapClickHouseTypeToGoType function -func TestMapClickHouseTypeToGoType(t *testing.T) { - testCases := []struct { - dbType string - expectedType interface{} - }{ - // Signed integers - {"Int8", int8(0)}, - {"Nullable(Int8)", (**int8)(nil)}, - {"Int16", int16(0)}, - {"Nullable(Int16)", (**int16)(nil)}, - {"Int32", int32(0)}, - {"Nullable(Int32)", (**int32)(nil)}, - {"Int64", int64(0)}, - {"Nullable(Int64)", (**int64)(nil)}, - // Unsigned integers - {"UInt8", uint8(0)}, - {"Nullable(UInt8)", (**uint8)(nil)}, - {"UInt16", uint16(0)}, - {"Nullable(UInt16)", (**uint16)(nil)}, - {"UInt32", uint32(0)}, - {"Nullable(UInt32)", (**uint32)(nil)}, - {"UInt64", uint64(0)}, - {"Nullable(UInt64)", (**uint64)(nil)}, - // Big integers - {"Int128", big.NewInt(0)}, - {"Nullable(Int128)", (**big.Int)(nil)}, - {"UInt128", big.NewInt(0)}, - {"Nullable(UInt128)", (**big.Int)(nil)}, - {"Int256", big.NewInt(0)}, - {"Nullable(Int256)", (**big.Int)(nil)}, - {"UInt256", big.NewInt(0)}, - {"Nullable(UInt256)", (**big.Int)(nil)}, - // Floating-point numbers - {"Float32", float32(0)}, - {"Nullable(Float32)", (**float32)(nil)}, - {"Float64", float64(0)}, - {"Nullable(Float64)", (**float64)(nil)}, - // Decimal types - {"Decimal", big.NewFloat(0)}, - {"Nullable(Decimal)", (**big.Float)(nil)}, - {"Decimal32", big.NewFloat(0)}, - {"Nullable(Decimal32)", (**big.Float)(nil)}, - {"Decimal64", big.NewFloat(0)}, - {"Nullable(Decimal64)", (**big.Float)(nil)}, - {"Decimal128", big.NewFloat(0)}, - {"Nullable(Decimal128)", (**big.Float)(nil)}, - {"Decimal256", big.NewFloat(0)}, - {"Nullable(Decimal256)", (**big.Float)(nil)}, - // String types - {"String", ""}, - {"Nullable(String)", (**string)(nil)}, - {"FixedString(42)", ""}, - {"Nullable(FixedString(42))", (**string)(nil)}, - {"UUID", ""}, - {"Nullable(UUID)", (**string)(nil)}, - {"IPv4", ""}, - {"Nullable(IPv4)", (**string)(nil)}, - {"IPv6", ""}, - {"Nullable(IPv6)", (**string)(nil)}, - // Date and time types - {"Date", time.Time{}}, - {"Nullable(Date)", (**time.Time)(nil)}, - {"DateTime", time.Time{}}, - {"Nullable(DateTime)", (**time.Time)(nil)}, - {"DateTime64", time.Time{}}, - {"Nullable(DateTime64)", (**time.Time)(nil)}, - // Enums - {"Enum8('a' = 1, 'b' = 2)", ""}, - {"Nullable(Enum8('a' = 1, 'b' = 2))", (**string)(nil)}, - {"Enum16('a' = 1, 'b' = 2)", ""}, - {"Nullable(Enum16('a' = 1, 'b' = 2))", (**string)(nil)}, - // Arrays - {"Array(Int32)", &[]*int64{}}, - {"Array(String)", &[]*string{}}, - {"Array(Float64)", &[]*float64{}}, - // LowCardinality - {"LowCardinality(String)", ""}, - {"LowCardinality(Nullable(String))", (**string)(nil)}, - // Unknown type - {"UnknownType", new(interface{})}, - {"Nullable(UnknownType)", new(interface{})}, - } - - for _, tc := range testCases { - t.Run(tc.dbType, func(t *testing.T) { - result := mapClickHouseTypeToGoType(tc.dbType) - - expectedType := reflect.TypeOf(tc.expectedType) - resultType := reflect.TypeOf(result) - - // Handle pointers - if expectedType.Kind() == reflect.Ptr { - if resultType.Kind() != reflect.Ptr { - t.Errorf("Expected pointer type for dbType %s, got %s", tc.dbType, resultType.Kind()) - return - } - expectedElemType := expectedType.Elem() - resultElemType := resultType.Elem() - if expectedElemType.Kind() == reflect.Ptr { - // Expected pointer to pointer - if resultElemType.Kind() != reflect.Ptr { - t.Errorf("Expected pointer to pointer for dbType %s, got %s", tc.dbType, resultElemType.Kind()) - return - } - expectedElemType = expectedElemType.Elem() - resultElemType = resultElemType.Elem() - } - if expectedElemType != resultElemType { - t.Errorf("Type mismatch for dbType %s: expected %s, got %s", tc.dbType, expectedElemType, resultElemType) - } - } else { - // Non-pointer types - if resultType.Kind() != reflect.Ptr { - t.Errorf("Expected pointer type for dbType %s, got %s", tc.dbType, resultType.Kind()) - return - } - resultElemType := resultType.Elem() - if expectedType != resultElemType { - t.Errorf("Type mismatch for dbType %s: expected %s, got %s", tc.dbType, expectedType, resultElemType) - } - } - }) - } -} - -// TestUnionQueryLogic tests the UNION query logic for wallet addresses in transactions -func TestUnionQueryLogic(t *testing.T) { - // Create a mock config with valid connection details - cfg := &config.ClickhouseConfig{ - Database: "default", - Host: "localhost", - Port: 9000, - Username: "default", - Password: "", - MaxQueryTime: 30, - } - - // Create connector - connector, err := NewClickHouseConnector(cfg) - if err != nil { - // Skip test if we can't connect to ClickHouse (likely in CI environment) - t.Skipf("Skipping test - cannot connect to ClickHouse: %v", err) - } - - // Test case 1: Standard query without wallet address (should not use UNION) - t.Run("Standard query without wallet address", func(t *testing.T) { - qf := QueryFilter{ - ChainId: big.NewInt(8453), - Limit: 5, - SortBy: "block_number", - SortOrder: "DESC", - } - - query := connector.TestQueryGeneration("transactions", "*", qf) - - // Should not contain UNION ALL - if strings.Contains(query, "UNION ALL") { - t.Errorf("Standard query should not contain UNION ALL: %s", query) - } - - // Should contain standard WHERE clause - if !strings.Contains(query, "WHERE") { - t.Errorf("Query should contain WHERE clause: %s", query) - } - }) - - // Test case 2: UNION query with wallet address - t.Run("UNION query with wallet address", func(t *testing.T) { - qf := QueryFilter{ - ChainId: big.NewInt(8453), - WalletAddress: "0x0b230949b38fa651aefffcfa5e664554df8ae900", - Limit: 5, - SortBy: "block_number", - SortOrder: "DESC", - } - - query := connector.TestQueryGeneration("transactions", "*", qf) - - // Should contain UNION ALL - if !strings.Contains(query, "UNION ALL") { - t.Errorf("Query should contain UNION ALL: %s", query) - } - - // Should contain from_address and to_address conditions - if !strings.Contains(query, "from_address = '0x0b230949b38fa651aefffcfa5e664554df8ae900'") { - t.Errorf("Query should contain from_address condition: %s", query) - } - - if !strings.Contains(query, "to_address = '0x0b230949b38fa651aefffcfa5e664554df8ae900'") { - t.Errorf("Query should contain to_address condition: %s", query) - } - - // Should have proper ORDER BY and LIMIT at the end - if !strings.Contains(query, "ORDER BY block_number DESC") { - t.Errorf("Query should contain ORDER BY clause: %s", query) - } - - if !strings.Contains(query, "LIMIT 5") { - t.Errorf("Query should contain LIMIT clause: %s", query) - } - - // Should have SETTINGS at the very end - if !strings.Contains(query, "SETTINGS max_execution_time = 30") { - t.Errorf("Query should contain SETTINGS clause: %s", query) - } - }) - - // Test case 3: Standard query for logs table (should not use UNION) - t.Run("Standard query for logs table", func(t *testing.T) { - qf := QueryFilter{ - ChainId: big.NewInt(8453), - WalletAddress: "0x0b230949b38fa651aefffcfa5e664554df8ae900", - Limit: 5, - SortBy: "block_number", - SortOrder: "DESC", - } - - query := connector.TestQueryGeneration("logs", "*", qf) - - // Should not contain UNION ALL (logs table doesn't use UNION) - if strings.Contains(query, "UNION ALL") { - t.Errorf("Logs query should not contain UNION ALL: %s", query) - } - - // Logs table doesn't have wallet address clauses since it doesn't have from_address/to_address fields - // So it should just have the basic WHERE clause without wallet address - if !strings.Contains(query, "WHERE") { - t.Errorf("Logs query should contain WHERE clause: %s", query) - } - - // Should not contain wallet address conditions since logs don't have those fields - if strings.Contains(query, "from_address") || strings.Contains(query, "to_address") { - t.Errorf("Logs query should not contain address conditions: %s", query) - } - }) - - // Test case 4: UNION query with GROUP BY - t.Run("UNION query with GROUP BY", func(t *testing.T) { - qf := QueryFilter{ - ChainId: big.NewInt(8453), - WalletAddress: "0x0b230949b38fa651aefffcfa5e664554df8ae900", - GroupBy: []string{"block_number"}, - Limit: 5, - SortBy: "block_number", - SortOrder: "DESC", - } - - query := connector.TestQueryGeneration("transactions", "block_number, COUNT(*) as count", qf) - - // Should contain UNION ALL - if !strings.Contains(query, "UNION ALL") { - t.Errorf("Query should contain UNION ALL: %s", query) - } - - // Should contain GROUP BY wrapped in subquery - if !strings.Contains(query, "SELECT * FROM (") { - t.Errorf("Query should wrap UNION in subquery for GROUP BY: %s", query) - } - - if !strings.Contains(query, "GROUP BY block_number") { - t.Errorf("Query should contain GROUP BY clause: %s", query) - } - }) -} diff --git a/internal/storage/connector.go b/internal/storage/connector.go deleted file mode 100644 index 21bde33a..00000000 --- a/internal/storage/connector.go +++ /dev/null @@ -1,377 +0,0 @@ -package storage - -import ( - "fmt" - "math/big" - - config "github.com/thirdweb-dev/indexer/configs" - "github.com/thirdweb-dev/indexer/internal/common" -) - -type QueryFilter struct { - ChainId *big.Int - BlockNumbers []*big.Int - StartBlock *big.Int - EndBlock *big.Int - FilterParams map[string]string - GroupBy []string - SortBy string - SortOrder string - Page int - Limit int - Offset int - Aggregates []string // e.g., ["COUNT(*) AS count", "SUM(amount) AS total_amount"] - FromAddress string - ContractAddress string - WalletAddress string - Signature string - ForceConsistentData bool -} - -type TransfersQueryFilter struct { - ChainId *big.Int - TokenTypes []string - TokenAddress string - WalletAddress string - TokenIds []*big.Int - TransactionHash string - StartBlockNumber *big.Int - EndBlockNumber *big.Int - GroupBy []string - SortBy string - SortOrder string // "ASC" or "DESC" - Page int - Limit int - Offset int -} - -type BalancesQueryFilter struct { - ChainId *big.Int - TokenTypes []string - TokenAddress string - Owner string - TokenIds []*big.Int - ZeroBalance bool - GroupBy []string - SortBy string - SortOrder string - Page int - Limit int - Offset int -} - -type QueryResult[T any] struct { - // TODO: findout how to only allow Log/transaction arrays or split the result - Data []T `json:"data"` - Aggregates []map[string]interface{} `json:"aggregates"` -} - -type IStorage struct { - OrchestratorStorage IOrchestratorStorage - MainStorage IMainStorage - StagingStorage IStagingStorage -} - -// Close closes all storage connections -func (s *IStorage) Close() error { - var errs []error - - // Close each storage that implements Closer interface - if err := s.OrchestratorStorage.Close(); err != nil { - errs = append(errs, fmt.Errorf("failed to close orchestrator storage: %w", err)) - } - - if err := s.MainStorage.Close(); err != nil { - errs = append(errs, fmt.Errorf("failed to close main storage: %w", err)) - } - - if err := s.StagingStorage.Close(); err != nil { - errs = append(errs, fmt.Errorf("failed to close staging storage: %w", err)) - } - - if len(errs) > 0 { - return fmt.Errorf("errors closing storage: %v", errs) - } - - return nil -} - -// The orchestartor storage is a persisted key/value store -type IOrchestratorStorage interface { - GetLastReorgCheckedBlockNumber(chainId *big.Int) (*big.Int, error) - SetLastReorgCheckedBlockNumber(chainId *big.Int, blockNumber *big.Int) error - GetLastPublishedBlockNumber(chainId *big.Int) (blockNumber *big.Int, err error) - SetLastPublishedBlockNumber(chainId *big.Int, blockNumber *big.Int) error - GetLastCommittedBlockNumber(chainId *big.Int) (blockNumber *big.Int, err error) - SetLastCommittedBlockNumber(chainId *big.Int, blockNumber *big.Int) error - - Close() error -} - -// The staging storage is a emphemeral block data store -type IStagingStorage interface { - // Staging block data - InsertStagingData(data []common.BlockData) error - GetStagingData(qf QueryFilter) (data []common.BlockData, err error) - GetStagingDataBlockRange(chainId *big.Int) (minBlockNumber *big.Int, maxBlockNumber *big.Int, err error) - - DeleteStagingDataOlderThan(chainId *big.Int, blockNumber *big.Int) error - - Close() error -} - -type IMainStorage interface { - InsertBlockData(data []common.BlockData) error - ReplaceBlockData(data []common.BlockData) ([]common.BlockData, error) - - GetBlocks(qf QueryFilter, fields ...string) (blocks QueryResult[common.Block], err error) - GetTransactions(qf QueryFilter, fields ...string) (transactions QueryResult[common.Transaction], err error) - GetLogs(qf QueryFilter, fields ...string) (logs QueryResult[common.Log], err error) - GetTraces(qf QueryFilter, fields ...string) (traces QueryResult[common.Trace], err error) - GetAggregations(table string, qf QueryFilter) (QueryResult[interface{}], error) - GetTokenBalances(qf BalancesQueryFilter, fields ...string) (QueryResult[common.TokenBalance], error) - GetTokenTransfers(qf TransfersQueryFilter, fields ...string) (QueryResult[common.TokenTransfer], error) - - GetMaxBlockNumber(chainId *big.Int) (maxBlockNumber *big.Int, err error) - GetMaxBlockNumberInRange(chainId *big.Int, startBlock *big.Int, endBlock *big.Int) (maxBlockNumber *big.Int, err error) - GetBlockCount(chainId *big.Int, startBlock *big.Int, endBlock *big.Int) (blockCount *big.Int, err error) - - /** - * Get block headers ordered from latest to oldest. - */ - GetBlockHeadersDescending(chainId *big.Int, from *big.Int, to *big.Int) (blockHeaders []common.BlockHeader, err error) - /** - * Gets only the data required for validation. - */ - GetValidationBlockData(chainId *big.Int, startBlock *big.Int, endBlock *big.Int) (blocks []common.BlockData, err error) - /** - * Finds missing block numbers in a range. Block numbers should be sequential. - */ - FindMissingBlockNumbers(chainId *big.Int, startBlock *big.Int, endBlock *big.Int) (blockNumbers []*big.Int, err error) - /** - * Gets full block data with transactions, logs and traces. - */ - GetFullBlockData(chainId *big.Int, blockNumbers []*big.Int) (blocks []common.BlockData, err error) - - Close() error -} - -func NewStorageConnector(cfg *config.StorageConfig) (IStorage, error) { - var storage IStorage - var err error - - storage.OrchestratorStorage, err = NewOrchestratorConnector(&cfg.Orchestrator) - if err != nil { - return IStorage{}, fmt.Errorf("failed to create orchestrator storage: %w", err) - } - - storage.StagingStorage, err = NewStagingConnector(&cfg.Staging) - if err != nil { - return IStorage{}, fmt.Errorf("failed to create staging storage: %w", err) - } - - storage.MainStorage, err = NewMainConnector(&cfg.Main, &storage.OrchestratorStorage) - if err != nil { - return IStorage{}, fmt.Errorf("failed to create main storage: %w", err) - } - - return storage, nil -} - -func NewOrchestratorConnector(cfg *config.StorageOrchestratorConfig) (IOrchestratorStorage, error) { - var conn interface{} - var err error - - // Default to "auto" if Type is not specified - storageType := cfg.Type - if storageType == "" { - storageType = "auto" - } - - // Handle explicit type selection - if storageType != "auto" { - switch storageType { - case "redis": - if cfg.Redis == nil { - return nil, fmt.Errorf("redis storage type specified but redis config is nil") - } - conn, err = NewRedisConnector(cfg.Redis) - case "postgres": - if cfg.Postgres == nil { - return nil, fmt.Errorf("postgres storage type specified but postgres config is nil") - } - conn, err = NewPostgresConnector(cfg.Postgres) - case "clickhouse": - if cfg.Clickhouse == nil { - return nil, fmt.Errorf("clickhouse storage type specified but clickhouse config is nil") - } - conn, err = NewClickHouseConnector(cfg.Clickhouse) - case "pebble__experimental": - if cfg.Pebble == nil { - return nil, fmt.Errorf("pebble storage type specified but pebble config is nil") - } - conn, err = NewPebbleConnector(cfg.Pebble) - case "badger": - if cfg.Badger == nil { - return nil, fmt.Errorf("badger storage type specified but badger config is nil") - } - conn, err = NewBadgerConnector(cfg.Badger) - default: - return nil, fmt.Errorf("unknown storage type: %s", storageType) - } - } else { - // Auto mode: use the first non-nil config (existing behavior) - if cfg.Redis != nil { - conn, err = NewRedisConnector(cfg.Redis) - } else if cfg.Postgres != nil { - conn, err = NewPostgresConnector(cfg.Postgres) - } else if cfg.Clickhouse != nil { - conn, err = NewClickHouseConnector(cfg.Clickhouse) - } else if cfg.Badger != nil { - conn, err = NewBadgerConnector(cfg.Badger) - } else { - return nil, fmt.Errorf("no storage driver configured") - } - } - - if err != nil { - return nil, err - } - - typedConn, ok := conn.(IOrchestratorStorage) - if !ok { - return nil, fmt.Errorf("connector does not implement the required interface") - } - - return typedConn, nil -} - -func NewStagingConnector(cfg *config.StorageStagingConfig) (IStagingStorage, error) { - var conn interface{} - var err error - - // Default to "auto" if Type is not specified - storageType := cfg.Type - if storageType == "" { - storageType = "auto" - } - - // Handle explicit type selection - if storageType != "auto" { - switch storageType { - case "postgres": - if cfg.Postgres == nil { - return nil, fmt.Errorf("postgres storage type specified but postgres config is nil") - } - conn, err = NewPostgresConnector(cfg.Postgres) - case "clickhouse": - if cfg.Clickhouse == nil { - return nil, fmt.Errorf("clickhouse storage type specified but clickhouse config is nil") - } - conn, err = NewClickHouseConnector(cfg.Clickhouse) - case "pebble__experimental": - if cfg.Pebble == nil { - return nil, fmt.Errorf("pebble storage type specified but pebble config is nil") - } - conn, err = NewPebbleConnector(cfg.Pebble) - case "badger": - if cfg.Badger == nil { - return nil, fmt.Errorf("badger storage type specified but badger config is nil") - } - conn, err = NewBadgerConnector(cfg.Badger) - default: - return nil, fmt.Errorf("unknown storage type: %s", storageType) - } - } else { - // Auto mode: use the first non-nil config (existing behavior) - if cfg.Postgres != nil { - conn, err = NewPostgresConnector(cfg.Postgres) - } else if cfg.Clickhouse != nil { - conn, err = NewClickHouseConnector(cfg.Clickhouse) - } else if cfg.Badger != nil { - conn, err = NewBadgerConnector(cfg.Badger) - } else { - return nil, fmt.Errorf("no storage driver configured") - } - } - - if err != nil { - return nil, err - } - - typedConn, ok := conn.(IStagingStorage) - if !ok { - return nil, fmt.Errorf("connector does not implement the required interface") - } - - return typedConn, nil -} - -func NewMainConnector(cfg *config.StorageMainConfig, orchestratorStorage *IOrchestratorStorage) (IMainStorage, error) { - var conn interface{} - var err error - - // Default to "auto" if Type is not specified - storageType := cfg.Type - if storageType == "" { - storageType = "auto" - } - - // Handle explicit type selection - if storageType != "auto" { - switch storageType { - case "kafka": - if cfg.Kafka == nil { - return nil, fmt.Errorf("kafka storage type specified but kafka config is nil") - } - if orchestratorStorage == nil { - return nil, fmt.Errorf("orchestrator storage must be provided for kafka main storage") - } - conn, err = NewKafkaConnector(cfg.Kafka, orchestratorStorage) - case "s3": - if cfg.S3 == nil { - return nil, fmt.Errorf("s3 storage type specified but s3 config is nil") - } - conn, err = NewS3Connector(cfg.S3) - case "postgres": - if cfg.Postgres == nil { - return nil, fmt.Errorf("postgres storage type specified but postgres config is nil") - } - conn, err = NewPostgresConnector(cfg.Postgres) - case "clickhouse": - if cfg.Clickhouse == nil { - return nil, fmt.Errorf("clickhouse storage type specified but clickhouse config is nil") - } - conn, err = NewClickHouseConnector(cfg.Clickhouse) - default: - return nil, fmt.Errorf("unknown storage type: %s", storageType) - } - } else { - // Auto mode: use the first non-nil config (existing behavior) - if cfg.Kafka != nil { - if orchestratorStorage == nil { - return nil, fmt.Errorf("orchestrator storage must be provided for kafka main storage") - } - conn, err = NewKafkaConnector(cfg.Kafka, orchestratorStorage) - } else if cfg.S3 != nil { - conn, err = NewS3Connector(cfg.S3) - } else if cfg.Postgres != nil { - conn, err = NewPostgresConnector(cfg.Postgres) - } else if cfg.Clickhouse != nil { - conn, err = NewClickHouseConnector(cfg.Clickhouse) - } else { - return nil, fmt.Errorf("no storage driver configured") - } - } - - if err != nil { - return nil, err - } - - typedConn, ok := conn.(IMainStorage) - if !ok { - return nil, fmt.Errorf("connector does not implement the required interface") - } - - return typedConn, nil -} diff --git a/internal/storage/kafka.go b/internal/storage/kafka.go deleted file mode 100644 index c9571aee..00000000 --- a/internal/storage/kafka.go +++ /dev/null @@ -1,137 +0,0 @@ -package storage - -import ( - "fmt" - "math/big" - - "github.com/rs/zerolog/log" - config "github.com/thirdweb-dev/indexer/configs" - "github.com/thirdweb-dev/indexer/internal/common" -) - -// KafkaConnector uses Redis for metadata storage and Kafka for block data delivery -type KafkaConnector struct { - cfg *config.KafkaConfig - kafkaPublisher *KafkaPublisher - orchestratorStorage IOrchestratorStorage -} - -func NewKafkaConnector(cfg *config.KafkaConfig, orchestratorStorage *IOrchestratorStorage) (*KafkaConnector, error) { - // Initialize Kafka publisher - kafkaPublisher, err := NewKafkaPublisher(cfg) - if err != nil { - return nil, err - } - - if orchestratorStorage == nil { - return nil, fmt.Errorf("orchestrator storage must be provided for kafka connector") - } - - return &KafkaConnector{ - cfg: cfg, - kafkaPublisher: kafkaPublisher, - orchestratorStorage: *orchestratorStorage, - }, nil -} - -// InsertBlockData publishes block data to Kafka instead of storing in database -func (kr *KafkaConnector) InsertBlockData(data []*common.BlockData) error { - if len(data) == 0 { - return nil - } - - // Publish to Kafka - if err := kr.kafkaPublisher.PublishBlockData(data); err != nil { - return fmt.Errorf("failed to publish block data to kafka: %w", err) - } - log.Debug(). - Int("blocks", len(data)). - Msg("Published block data to Kafka") - - chainId := data[0].Block.ChainId - maxBlockNumber := data[len(data)-1].Block.Number - if err := kr.orchestratorStorage.SetLastCommittedBlockNumber(chainId, maxBlockNumber); err != nil { - return fmt.Errorf("failed to update last committed block number in orchestrator storage: %w", err) - } - - return nil -} - -// ReplaceBlockData handles reorg by publishing both old and new data to Kafka -func (kr *KafkaConnector) ReplaceBlockData(data []*common.BlockData) ([]common.BlockData, error) { - if len(data) == 0 { - return nil, nil - } - - oldBlocks := []common.BlockData{} - - // TODO: We need to fetch the old blocks from the primary data store - if err := kr.kafkaPublisher.PublishReorg(data, data); err != nil { - return nil, fmt.Errorf("failed to publish reorg blocks to kafka: %w", err) - } - - // save cursor - return oldBlocks, nil -} - -func (kr *KafkaConnector) GetMaxBlockNumber(chainId *big.Int) (*big.Int, error) { - return kr.orchestratorStorage.GetLastCommittedBlockNumber(chainId) -} - -func (kr *KafkaConnector) GetMaxBlockNumberInRange(chainId *big.Int, startBlock *big.Int, endBlock *big.Int) (*big.Int, error) { - return nil, fmt.Errorf("query operations are not supported with Kafka connector - this is a write-only connector for streaming") -} - -func (kr *KafkaConnector) GetBlockCount(chainId *big.Int, startBlock *big.Int, endBlock *big.Int) (*big.Int, error) { - return nil, fmt.Errorf("query operations are not supported with Kafka connector - this is a write-only connector for streaming") -} - -func (kr *KafkaConnector) GetBlockHeadersDescending(chainId *big.Int, from *big.Int, to *big.Int) ([]common.BlockHeader, error) { - return nil, fmt.Errorf("query operations are not supported with Kafka connector - this is a write-only connector for streaming") -} - -func (kr *KafkaConnector) GetTokenBalances(qf BalancesQueryFilter, fields ...string) (QueryResult[common.TokenBalance], error) { - return QueryResult[common.TokenBalance]{}, fmt.Errorf("query operations are not supported with Kafka connector - this is a write-only connector for streaming") -} - -func (kr *KafkaConnector) GetTokenTransfers(qf TransfersQueryFilter, fields ...string) (QueryResult[common.TokenTransfer], error) { - return QueryResult[common.TokenTransfer]{}, fmt.Errorf("query operations are not supported with Kafka connector - this is a write-only connector for streaming") -} - -func (kr *KafkaConnector) GetValidationBlockData(chainId *big.Int, startBlock *big.Int, endBlock *big.Int) ([]common.BlockData, error) { - return nil, fmt.Errorf("query operations are not supported with Kafka connector - this is a write-only connector for streaming") -} - -func (kr *KafkaConnector) FindMissingBlockNumbers(chainId *big.Int, startBlock *big.Int, endBlock *big.Int) ([]*big.Int, error) { - return nil, fmt.Errorf("query operations are not supported with Kafka connector - this is a write-only connector for streaming") -} - -func (kr *KafkaConnector) GetFullBlockData(chainId *big.Int, blockNumbers []*big.Int) ([]common.BlockData, error) { - return nil, fmt.Errorf("query operations are not supported with Kafka connector - this is a write-only connector for streaming") -} - -// Query methods return errors as this is a write-only connector for streaming -func (kr *KafkaConnector) GetBlocks(qf QueryFilter, fields ...string) (QueryResult[common.Block], error) { - return QueryResult[common.Block]{}, fmt.Errorf("query operations are not supported with Kafka connector - this is a write-only connector for streaming") -} - -func (kr *KafkaConnector) GetTransactions(qf QueryFilter, fields ...string) (QueryResult[common.Transaction], error) { - return QueryResult[common.Transaction]{}, fmt.Errorf("query operations are not supported with Kafka connector - this is a write-only connector for streaming") -} - -func (kr *KafkaConnector) GetLogs(qf QueryFilter, fields ...string) (QueryResult[common.Log], error) { - return QueryResult[common.Log]{}, fmt.Errorf("query operations are not supported with Kafka connector - this is a write-only connector for streaming") -} - -func (kr *KafkaConnector) GetTraces(qf QueryFilter, fields ...string) (QueryResult[common.Trace], error) { - return QueryResult[common.Trace]{}, fmt.Errorf("query operations are not supported with Kafka connector - this is a write-only connector for streaming") -} - -func (kr *KafkaConnector) GetAggregations(table string, qf QueryFilter) (QueryResult[interface{}], error) { - return QueryResult[interface{}]{}, fmt.Errorf("query operations are not supported with Kafka connector - this is a write-only connector for streaming") -} - -// Close closes the Redis connection -func (kr *KafkaConnector) Close() error { - return kr.kafkaPublisher.Close() -} diff --git a/internal/storage/pebble.go b/internal/storage/pebble.go deleted file mode 100644 index 8f8ca8f5..00000000 --- a/internal/storage/pebble.go +++ /dev/null @@ -1,698 +0,0 @@ -package storage - -import ( - "bytes" - "encoding/gob" - "fmt" - "math/big" - "os" - "path/filepath" - "sort" - "strings" - "sync" - "time" - - "github.com/cockroachdb/pebble" - "github.com/rs/zerolog/log" - config "github.com/thirdweb-dev/indexer/configs" - "github.com/thirdweb-dev/indexer/internal/common" -) - -type PebbleConnector struct { - db *pebble.DB - gcTicker *time.Ticker - stopGC chan struct{} - closeOnce sync.Once // Ensure Close() is only executed once - - // Configuration - stagingDataTTL time.Duration // TTL for staging data entries - gcInterval time.Duration // Interval for running garbage collection - cacheRefreshInterval time.Duration // Interval for refreshing range cache - cacheStalenessTimeout time.Duration // Timeout before considering cache entry stale - - // In-memory block range cache - // NOTE: Staging data has a TTL. The cache is refreshed periodically - // to detect expired entries and update min/max ranges accordingly. - // Pebble doesn't provide expiry notifications, so we rely on periodic scanning. - rangeCache map[string]*pebbleBlockRange // chainId -> range - rangeCacheMu sync.RWMutex - rangeUpdateChan chan string // channel for triggering background updates - stopRangeUpdate chan struct{} - - // TTL tracking - since Pebble doesn't have built-in TTL support - ttlTracker map[string]time.Time - ttlTrackerMu sync.RWMutex -} - -type pebbleBlockRange struct { - min *big.Int - max *big.Int - lastUpdated time.Time -} - -func NewPebbleConnector(cfg *config.PebbleConfig) (*PebbleConnector, error) { - path := cfg.Path - if path == "" { - path = filepath.Join(os.TempDir(), "insight-staging-pebble") - } - - // Configure Pebble options for optimal performance - cache := pebble.NewCache(256 << 20) // 256MB total cache (index + block) - defer cache.Unref() - - opts := &pebble.Options{ - // Memory and caching - MemTableSize: 128 << 20, // 128MB per memtable - MemTableStopWritesThreshold: 4, // 512MB total - L0CompactionThreshold: 8, - L0StopWritesThreshold: 24, - - // Compaction settings - MaxConcurrentCompactions: func() int { return 1 }, - - // Cache sizes - Cache: cache, - - // File sizes - Levels: make([]pebble.LevelOptions, 7), - - DisableWAL: false, - } - - // Configure level-specific options - for i := range opts.Levels { - opts.Levels[i] = pebble.LevelOptions{ - BlockSize: 128 << 10, // 128KB blocks - IndexBlockSize: 256 << 10, // 256KB index blocks - FilterPolicy: nil, // Disable bloom filters for ephemeral cache (save memory) - } - if i == 0 { - // L0 gets special treatment - opts.Levels[i].TargetFileSize = 128 << 20 // 128MB - opts.Levels[i].Compression = pebble.SnappyCompression - } else { - // Other levels grow exponentially - opts.Levels[i].TargetFileSize = min( - opts.Levels[i-1].TargetFileSize*2, - 1<<30, // 2GB cap - ) - opts.Levels[i].Compression = pebble.ZstdCompression - } - } - - // Disable Pebble's verbose logging - opts.Logger = nil - - db, err := pebble.Open(path, opts) - if err != nil { - return nil, fmt.Errorf("failed to open pebble db: %w", err) - } - - pc := &PebbleConnector{ - db: db, - stopGC: make(chan struct{}), - rangeCache: make(map[string]*pebbleBlockRange), - rangeUpdateChan: make(chan string, 5), - stopRangeUpdate: make(chan struct{}), - stagingDataTTL: 10 * time.Minute, - gcInterval: 60 * time.Second, - cacheRefreshInterval: 60 * time.Second, - cacheStalenessTimeout: 120 * time.Second, - ttlTracker: make(map[string]time.Time), - } - - // Start GC routine for TTL management - pc.gcTicker = time.NewTicker(pc.gcInterval) - go pc.runGC() - - // Start range cache update routine - go pc.runRangeCacheUpdater() - - return pc, nil -} - -func (pc *PebbleConnector) runGC() { - for { - select { - case <-pc.gcTicker.C: - pc.cleanExpiredEntries() - case <-pc.stopGC: - return - } - } -} - -// cleanExpiredEntries removes entries that have exceeded their TTL -func (pc *PebbleConnector) cleanExpiredEntries() { - pc.ttlTrackerMu.Lock() - now := time.Now() - var expiredKeys []string - for key, expiresAt := range pc.ttlTracker { - if now.After(expiresAt) { - expiredKeys = append(expiredKeys, key) - } - } - pc.ttlTrackerMu.Unlock() - - if len(expiredKeys) == 0 { - return - } - - // Delete expired entries - batch := pc.db.NewBatch() - defer batch.Close() - - for _, key := range expiredKeys { - if err := batch.Delete([]byte(key), nil); err != nil { - log.Debug().Err(err).Str("key", key).Msg("Failed to delete expired key") - continue - } - } - - if err := batch.Commit(pebble.Sync); err != nil { - log.Debug().Err(err).Msg("Failed to commit TTL cleanup batch") - return - } - - // Remove from tracker - pc.ttlTrackerMu.Lock() - for _, key := range expiredKeys { - delete(pc.ttlTracker, key) - } - pc.ttlTrackerMu.Unlock() - - // Trigger range cache updates for affected chains - affectedChains := make(map[string]bool) - for _, key := range expiredKeys { - if strings.HasPrefix(key, "blockdata:") { - parts := strings.Split(key, ":") - if len(parts) >= 2 { - affectedChains[parts[1]] = true - } - } - } - - for chainId := range affectedChains { - select { - case pc.rangeUpdateChan <- chainId: - default: - } - } -} - -// runRangeCacheUpdater runs in the background to validate cache entries -func (pc *PebbleConnector) runRangeCacheUpdater() { - ticker := time.NewTicker(pc.cacheRefreshInterval) - defer ticker.Stop() - - for { - select { - case chainIdStr := <-pc.rangeUpdateChan: - pc.updateRangeForChain(chainIdStr) - - case <-ticker.C: - pc.refreshStaleRanges() - - case <-pc.stopRangeUpdate: - return - } - } -} - -func (pc *PebbleConnector) updateRangeForChain(chainIdStr string) { - chainId, ok := new(big.Int).SetString(chainIdStr, 10) - if !ok { - return - } - - // Scan the actual data to find min/max - var minBlock, maxBlock *big.Int - prefix := pebbleBlockKeyRange(chainId) - - iter, err := pc.db.NewIter(&pebble.IterOptions{ - LowerBound: []byte(prefix), - UpperBound: append([]byte(prefix), 0xff), // End of prefix range - }) - if err != nil { - log.Error().Err(err).Str("chainId", chainIdStr).Msg("Failed to create iterator") - return - } - defer iter.Close() - - for iter.First(); iter.Valid(); iter.Next() { - key := string(iter.Key()) - parts := strings.Split(key, ":") - if len(parts) != 3 { - continue - } - - blockNum, ok := new(big.Int).SetString(parts[2], 10) - if !ok { - continue - } - - if minBlock == nil || blockNum.Cmp(minBlock) < 0 { - minBlock = blockNum - } - if maxBlock == nil || blockNum.Cmp(maxBlock) > 0 { - maxBlock = blockNum - } - } - - if err := iter.Error(); err != nil { - log.Error().Err(err).Str("chainId", chainIdStr).Msg("Iterator error during range update") - return - } - - // Update cache - pc.rangeCacheMu.Lock() - if minBlock != nil && maxBlock != nil { - pc.rangeCache[chainIdStr] = &pebbleBlockRange{ - min: minBlock, - max: maxBlock, - lastUpdated: time.Now(), - } - } else { - // No data, remove from cache - delete(pc.rangeCache, chainIdStr) - } - pc.rangeCacheMu.Unlock() -} - -func (pc *PebbleConnector) refreshStaleRanges() { - pc.rangeCacheMu.RLock() - staleChains := []string{} - now := time.Now() - for chainId, r := range pc.rangeCache { - if now.Sub(r.lastUpdated) > pc.cacheStalenessTimeout { - staleChains = append(staleChains, chainId) - } - } - pc.rangeCacheMu.RUnlock() - - // Update stale entries - for _, chainId := range staleChains { - select { - case pc.rangeUpdateChan <- chainId: - // Queued for update - default: - // Channel full, skip this update - } - } -} - -func (pc *PebbleConnector) Close() error { - var closeErr error - pc.closeOnce.Do(func() { - if pc.gcTicker != nil { - pc.gcTicker.Stop() - close(pc.stopGC) - } - select { - case <-pc.stopRangeUpdate: - default: - close(pc.stopRangeUpdate) - } - closeErr = pc.db.Close() - }) - return closeErr -} - -// Key construction helpers for Pebble -func pebbleBlockKey(chainId *big.Int, blockNumber *big.Int) []byte { - return fmt.Appendf(nil, "blockdata:%s:%s", chainId.String(), blockNumber.String()) -} - -func pebbleBlockKeyRange(chainId *big.Int) []byte { - return fmt.Appendf(nil, "blockdata:%s:", chainId.String()) -} - -func pebbleLastReorgKey(chainId *big.Int) []byte { - return fmt.Appendf(nil, "reorg:%s", chainId.String()) -} - -func pebbleLastPublishedKey(chainId *big.Int) []byte { - return fmt.Appendf(nil, "publish:%s", chainId.String()) -} - -func pebbleLastCommittedKey(chainId *big.Int) []byte { - return fmt.Appendf(nil, "commit:%s", chainId.String()) -} - -func (pc *PebbleConnector) GetLastReorgCheckedBlockNumber(chainId *big.Int) (*big.Int, error) { - val, closer, err := pc.db.Get(pebbleLastReorgKey(chainId)) - if err == pebble.ErrNotFound { - return big.NewInt(0), nil - } - if err != nil { - return nil, err - } - defer closer.Close() - - blockNumber := new(big.Int).SetBytes(val) - return blockNumber, nil -} - -func (pc *PebbleConnector) SetLastReorgCheckedBlockNumber(chainId *big.Int, blockNumber *big.Int) error { - return pc.db.Set(pebbleLastReorgKey(chainId), blockNumber.Bytes(), pebble.Sync) -} - -// IStagingStorage implementation -func (pc *PebbleConnector) InsertStagingData(data []common.BlockData) error { - // Track min/max blocks per chain for cache update - chainRanges := make(map[string]struct { - min *big.Int - max *big.Int - }) - - batch := pc.db.NewBatch() - defer batch.Close() - - now := time.Now() - expiresAt := now.Add(pc.stagingDataTTL) - - // Insert block data and track ranges - for _, blockData := range data { - key := pebbleBlockKey(blockData.Block.ChainId, blockData.Block.Number) - - var buf bytes.Buffer - if err := gob.NewEncoder(&buf).Encode(blockData); err != nil { - return err - } - - // Store with TTL tracking - if err := batch.Set(key, buf.Bytes(), nil); err != nil { - return err - } - - // Track TTL - pc.ttlTrackerMu.Lock() - pc.ttlTracker[string(key)] = expiresAt - pc.ttlTrackerMu.Unlock() - - // Track min/max for this chain - chainStr := blockData.Block.ChainId.String() - if r, exists := chainRanges[chainStr]; exists { - if blockData.Block.Number.Cmp(r.min) < 0 { - chainRanges[chainStr] = struct { - min *big.Int - max *big.Int - }{blockData.Block.Number, r.max} - } - if blockData.Block.Number.Cmp(r.max) > 0 { - chainRanges[chainStr] = struct { - min *big.Int - max *big.Int - }{r.min, blockData.Block.Number} - } - } else { - chainRanges[chainStr] = struct { - min *big.Int - max *big.Int - }{blockData.Block.Number, blockData.Block.Number} - } - } - - if err := batch.Commit(pebble.Sync); err != nil { - return err - } - - // Update in-memory cache - pc.rangeCacheMu.Lock() - defer pc.rangeCacheMu.Unlock() - - for chainStr, newRange := range chainRanges { - existing, exists := pc.rangeCache[chainStr] - if exists { - // Update existing range - if newRange.min.Cmp(existing.min) < 0 { - existing.min = newRange.min - } - if newRange.max.Cmp(existing.max) > 0 { - existing.max = newRange.max - } - existing.lastUpdated = time.Now() - } else { - // Create new range entry - pc.rangeCache[chainStr] = &pebbleBlockRange{ - min: newRange.min, - max: newRange.max, - lastUpdated: time.Now(), - } - // Trigger background update to ensure accuracy - select { - case pc.rangeUpdateChan <- chainStr: - default: - // Channel full, will be updated in next periodic scan - } - } - } - - return nil -} - -func (pc *PebbleConnector) GetStagingData(qf QueryFilter) ([]common.BlockData, error) { - var results []common.BlockData - - if len(qf.BlockNumbers) > 0 { - // Fetch specific blocks - for _, blockNum := range qf.BlockNumbers { - key := pebbleBlockKey(qf.ChainId, blockNum) - val, closer, err := pc.db.Get(key) - if err == pebble.ErrNotFound { - continue - } - if err != nil { - return nil, err - } - - var blockData common.BlockData - if err := gob.NewDecoder(bytes.NewReader(val)).Decode(&blockData); err != nil { - closer.Close() - return nil, err - } - closer.Close() - - results = append(results, blockData) - } - return results, nil - } - - // Range query - prefix := pebbleBlockKeyRange(qf.ChainId) - - iter, err := pc.db.NewIter(&pebble.IterOptions{ - LowerBound: []byte(prefix), - UpperBound: append([]byte(prefix), 0xff), - }) - if err != nil { - return nil, err - } - defer iter.Close() - - count := 0 - for iter.First(); iter.Valid(); iter.Next() { - if qf.Offset > 0 && count < qf.Offset { - count++ - continue - } - - val, err := iter.ValueAndErr() - if err != nil { - return nil, err - } - - var blockData common.BlockData - if err := gob.NewDecoder(bytes.NewReader(val)).Decode(&blockData); err != nil { - log.Debug().Err(err).Msg("Failed to decode block data") - continue - } - - // Apply filters - if qf.StartBlock != nil && blockData.Block.Number.Cmp(qf.StartBlock) < 0 { - continue - } - if qf.EndBlock != nil && blockData.Block.Number.Cmp(qf.EndBlock) > 0 { - continue - } - - results = append(results, blockData) - - count++ - if qf.Limit > 0 && len(results) >= qf.Limit { - break - } - } - - if err := iter.Error(); err != nil { - return nil, err - } - - // Sort by block number - sort.Slice(results, func(i, j int) bool { - return results[i].Block.Number.Cmp(results[j].Block.Number) < 0 - }) - - return results, nil -} - -func (pc *PebbleConnector) GetLastPublishedBlockNumber(chainId *big.Int) (*big.Int, error) { - val, closer, err := pc.db.Get(pebbleLastPublishedKey(chainId)) - if err == pebble.ErrNotFound { - return big.NewInt(0), nil - } - if err != nil { - return nil, err - } - defer closer.Close() - - blockNumber := new(big.Int).SetBytes(val) - return blockNumber, nil -} - -func (pc *PebbleConnector) SetLastPublishedBlockNumber(chainId *big.Int, blockNumber *big.Int) error { - return pc.db.Set(pebbleLastPublishedKey(chainId), blockNumber.Bytes(), pebble.Sync) -} - -func (pc *PebbleConnector) GetLastCommittedBlockNumber(chainId *big.Int) (*big.Int, error) { - val, closer, err := pc.db.Get(pebbleLastCommittedKey(chainId)) - if err == pebble.ErrNotFound { - return big.NewInt(0), nil - } - if err != nil { - return nil, err - } - defer closer.Close() - - blockNumber := new(big.Int).SetBytes(val) - return blockNumber, nil -} - -func (pc *PebbleConnector) SetLastCommittedBlockNumber(chainId *big.Int, blockNumber *big.Int) error { - return pc.db.Set(pebbleLastCommittedKey(chainId), blockNumber.Bytes(), pebble.Sync) -} - -func (pc *PebbleConnector) DeleteStagingDataOlderThan(chainId *big.Int, blockNumber *big.Int) error { - prefix := pebbleBlockKeyRange(chainId) - var deletedSome bool - - batch := pc.db.NewBatch() - defer batch.Close() - - iter, err := pc.db.NewIter(&pebble.IterOptions{ - LowerBound: []byte(prefix), - UpperBound: append([]byte(prefix), 0xff), - }) - if err != nil { - return err - } - defer iter.Close() - - var keysToDelete [][]byte - - for iter.First(); iter.Valid(); iter.Next() { - key := iter.Key() - keyStr := string(key) - parts := strings.Split(keyStr, ":") - if len(parts) != 3 { - continue - } - - blockNum, ok := new(big.Int).SetString(parts[2], 10) - if !ok { - continue - } - - if blockNum.Cmp(blockNumber) <= 0 { - // Make a copy of the key - keyCopy := make([]byte, len(key)) - copy(keyCopy, key) - keysToDelete = append(keysToDelete, keyCopy) - } - } - - if err := iter.Error(); err != nil { - return err - } - - // Delete the keys - for _, key := range keysToDelete { - if err := batch.Delete(key, nil); err != nil { - return err - } - - // Remove from TTL tracker - pc.ttlTrackerMu.Lock() - delete(pc.ttlTracker, string(key)) - pc.ttlTrackerMu.Unlock() - - deletedSome = true - } - - if err := batch.Commit(pebble.Sync); err != nil { - return err - } - - // Update cache if we deleted something - if deletedSome { - chainStr := chainId.String() - pc.rangeCacheMu.Lock() - if entry, exists := pc.rangeCache[chainStr]; exists { - // Check if we need to update min - if entry.min.Cmp(blockNumber) <= 0 { - // The new minimum must be blockNumber + 1 or higher - newMin := new(big.Int).Add(blockNumber, big.NewInt(1)) - // Only update if the new min is still <= max - if newMin.Cmp(entry.max) <= 0 { - entry.min = newMin - entry.lastUpdated = time.Now() - } else { - // No blocks remaining, remove from cache - delete(pc.rangeCache, chainStr) - } - } - } - pc.rangeCacheMu.Unlock() - - // Trigger background update to ensure accuracy - select { - case pc.rangeUpdateChan <- chainStr: - default: - // Channel full, will be updated in next periodic scan - } - } - - return nil -} - -// GetStagingDataBlockRange returns the minimum and maximum block numbers stored for a given chain -func (pc *PebbleConnector) GetStagingDataBlockRange(chainId *big.Int) (*big.Int, *big.Int, error) { - chainStr := chainId.String() - - // Check cache - pc.rangeCacheMu.RLock() - if entry, exists := pc.rangeCache[chainStr]; exists { - // Always return cached values - they're updated live during insert/delete - min := new(big.Int).Set(entry.min) - max := new(big.Int).Set(entry.max) - pc.rangeCacheMu.RUnlock() - return min, max, nil - } - pc.rangeCacheMu.RUnlock() - - // Cache miss - do synchronous update to populate cache - pc.updateRangeForChain(chainStr) - - // Return newly cached value - pc.rangeCacheMu.RLock() - defer pc.rangeCacheMu.RUnlock() - - if entry, exists := pc.rangeCache[chainStr]; exists { - min := new(big.Int).Set(entry.min) - max := new(big.Int).Set(entry.max) - return min, max, nil - } - - // No data found - return nil, nil, nil -} diff --git a/internal/storage/postgres.go b/internal/storage/postgres.go deleted file mode 100644 index ea4f9a32..00000000 --- a/internal/storage/postgres.go +++ /dev/null @@ -1,295 +0,0 @@ -package storage - -import ( - "database/sql" - "encoding/json" - "fmt" - "math/big" - "strings" - "time" - - _ "github.com/lib/pq" - "github.com/rs/zerolog/log" - config "github.com/thirdweb-dev/indexer/configs" - "github.com/thirdweb-dev/indexer/internal/common" -) - -type PostgresConnector struct { - db *sql.DB - cfg *config.PostgresConfig -} - -func NewPostgresConnector(cfg *config.PostgresConfig) (*PostgresConnector, error) { - connStr := fmt.Sprintf("host=%s port=%d user=%s password=%s dbname=%s", - cfg.Host, cfg.Port, cfg.Username, cfg.Password, cfg.Database) - - // Default to "require" for security if SSL mode not specified - sslMode := cfg.SSLMode - if sslMode == "" { - sslMode = "require" - log.Info().Msg("No SSL mode specified, defaulting to 'require' for secure connection") - } - connStr += fmt.Sprintf(" sslmode=%s", sslMode) - - if cfg.ConnectTimeout > 0 { - connStr += fmt.Sprintf(" connect_timeout=%d", cfg.ConnectTimeout) - } - - db, err := sql.Open("postgres", connStr) - if err != nil { - return nil, fmt.Errorf("failed to connect to postgres: %w", err) - } - - db.SetMaxOpenConns(cfg.MaxOpenConns) - db.SetMaxIdleConns(cfg.MaxIdleConns) - - if cfg.MaxConnLifetime > 0 { - db.SetConnMaxLifetime(time.Duration(cfg.MaxConnLifetime) * time.Second) - } - - if err := db.Ping(); err != nil { - return nil, fmt.Errorf("failed to ping postgres: %w", err) - } - - return &PostgresConnector{ - db: db, - cfg: cfg, - }, nil -} - -func (p *PostgresConnector) GetLastReorgCheckedBlockNumber(chainId *big.Int) (*big.Int, error) { - query := `SELECT cursor_value FROM cursors - WHERE cursor_type = 'reorg' AND chain_id = $1` - - var blockNumberString string - err := p.db.QueryRow(query, chainId.String()).Scan(&blockNumberString) - if err != nil { - return nil, err - } - - blockNumber, ok := new(big.Int).SetString(blockNumberString, 10) - if !ok { - return nil, fmt.Errorf("failed to parse block number: %s", blockNumberString) - } - - return blockNumber, nil -} - -func (p *PostgresConnector) SetLastReorgCheckedBlockNumber(chainId *big.Int, blockNumber *big.Int) error { - query := `INSERT INTO cursors (chain_id, cursor_type, cursor_value) - VALUES ($1, 'reorg', $2) - ON CONFLICT (chain_id, cursor_type) - DO UPDATE SET cursor_value = EXCLUDED.cursor_value, updated_at = NOW()` - - _, err := p.db.Exec(query, chainId.String(), blockNumber.String()) - return err -} - -// Staging Storage Implementation - -func (p *PostgresConnector) InsertStagingData(data []common.BlockData) error { - if len(data) == 0 { - return nil - } - - // Build multi-row INSERT without transaction for better performance - valueStrings := make([]string, 0, len(data)) - valueArgs := make([]interface{}, 0, len(data)*3) - - for i, blockData := range data { - blockDataJSON, err := json.Marshal(blockData) - if err != nil { - return err - } - - valueStrings = append(valueStrings, fmt.Sprintf("($%d, $%d, $%d)", - i*3+1, i*3+2, i*3+3)) - valueArgs = append(valueArgs, - blockData.Block.ChainId.String(), - blockData.Block.Number.String(), - string(blockDataJSON), - ) - } - - query := fmt.Sprintf(`INSERT INTO block_data (chain_id, block_number, data) - VALUES %s - ON CONFLICT (chain_id, block_number) - DO UPDATE SET data = EXCLUDED.data, updated_at = NOW()`, strings.Join(valueStrings, ",")) - - _, err := p.db.Exec(query, valueArgs...) - return err -} - -func (p *PostgresConnector) GetStagingData(qf QueryFilter) ([]common.BlockData, error) { - // No need to check is_deleted since we're using hard deletes for staging data - query := `SELECT data FROM block_data WHERE 1=1` - - args := []interface{}{} - argCount := 0 - - if qf.ChainId != nil && qf.ChainId.Sign() > 0 { - argCount++ - query += fmt.Sprintf(" AND chain_id = $%d", argCount) - args = append(args, qf.ChainId.String()) - } - - if len(qf.BlockNumbers) > 0 { - placeholders := make([]string, len(qf.BlockNumbers)) - for i, bn := range qf.BlockNumbers { - argCount++ - placeholders[i] = fmt.Sprintf("$%d", argCount) - args = append(args, bn.String()) - } - query += fmt.Sprintf(" AND block_number IN (%s)", strings.Join(placeholders, ",")) - } else if qf.StartBlock != nil && qf.EndBlock != nil { - argCount++ - query += fmt.Sprintf(" AND block_number BETWEEN $%d AND $%d", argCount, argCount+1) - args = append(args, qf.StartBlock.String(), qf.EndBlock.String()) - argCount++ // Increment once more since we used two args - } - - query += " ORDER BY block_number ASC" - - if qf.Limit > 0 { - argCount++ - query += fmt.Sprintf(" LIMIT $%d", argCount) - args = append(args, qf.Limit) - } - - rows, err := p.db.Query(query, args...) - if err != nil { - return nil, err - } - defer func() { - if err := rows.Close(); err != nil { - log.Error().Err(err).Msg("Failed to close rows in GetStagingData") - } - }() - - // Initialize as empty slice to match ClickHouse behavior - blockDataList := make([]common.BlockData, 0) - for rows.Next() { - var blockDataJson string - if err := rows.Scan(&blockDataJson); err != nil { - return nil, fmt.Errorf("error scanning block data: %w", err) - } - - var blockData common.BlockData - if err := json.Unmarshal([]byte(blockDataJson), &blockData); err != nil { - return nil, err - } - - blockDataList = append(blockDataList, blockData) - } - - return blockDataList, rows.Err() -} - -func (p *PostgresConnector) GetLastPublishedBlockNumber(chainId *big.Int) (*big.Int, error) { - query := `SELECT cursor_value FROM cursors WHERE cursor_type = 'publish' AND chain_id = $1` - - var blockNumberString string - err := p.db.QueryRow(query, chainId.String()).Scan(&blockNumberString) - if err != nil { - if err == sql.ErrNoRows { - return big.NewInt(0), nil - } - return nil, err - } - - blockNumber, ok := new(big.Int).SetString(blockNumberString, 10) - if !ok { - return nil, fmt.Errorf("failed to parse block number: %s", blockNumberString) - } - return blockNumber, nil -} - -func (p *PostgresConnector) SetLastPublishedBlockNumber(chainId *big.Int, blockNumber *big.Int) error { - query := `INSERT INTO cursors (chain_id, cursor_type, cursor_value) - VALUES ($1, 'publish', $2) - ON CONFLICT (chain_id, cursor_type) - DO UPDATE SET cursor_value = EXCLUDED.cursor_value, updated_at = NOW()` - - _, err := p.db.Exec(query, chainId.String(), blockNumber.String()) - return err -} - -func (p *PostgresConnector) GetLastCommittedBlockNumber(chainId *big.Int) (*big.Int, error) { - query := `SELECT cursor_value FROM cursors WHERE cursor_type = 'commit' AND chain_id = $1` - - var blockNumberString string - err := p.db.QueryRow(query, chainId.String()).Scan(&blockNumberString) - if err != nil { - if err == sql.ErrNoRows { - return big.NewInt(0), nil - } - return nil, err - } - - blockNumber, ok := new(big.Int).SetString(blockNumberString, 10) - if !ok { - return nil, fmt.Errorf("failed to parse block number: %s", blockNumberString) - } - return blockNumber, nil -} - -func (p *PostgresConnector) SetLastCommittedBlockNumber(chainId *big.Int, blockNumber *big.Int) error { - query := `INSERT INTO cursors (chain_id, cursor_type, cursor_value) - VALUES ($1, 'commit', $2) - ON CONFLICT (chain_id, cursor_type) - DO UPDATE SET cursor_value = EXCLUDED.cursor_value, updated_at = NOW()` - - _, err := p.db.Exec(query, chainId.String(), blockNumber.String()) - return err -} - -func (p *PostgresConnector) DeleteStagingDataOlderThan(chainId *big.Int, blockNumber *big.Int) error { - query := `DELETE FROM block_data - WHERE ctid IN ( - SELECT ctid - FROM block_data - WHERE chain_id = $1 - AND block_number <= $2 - FOR UPDATE SKIP LOCKED - )` - _, err := p.db.Exec(query, chainId.String(), blockNumber.String()) - return err -} - -// GetStagingDataBlockRange returns the minimum and maximum block numbers stored for a given chain -func (p *PostgresConnector) GetStagingDataBlockRange(chainId *big.Int) (*big.Int, *big.Int, error) { - query := `SELECT MIN(block_number), MAX(block_number) - FROM block_data - WHERE chain_id = $1` - - var minStr, maxStr sql.NullString - err := p.db.QueryRow(query, chainId.String()).Scan(&minStr, &maxStr) - if err != nil { - if err == sql.ErrNoRows { - return nil, nil, nil - } - return nil, nil, err - } - - // If either min or max is NULL (no data), return nil for both - if !minStr.Valid || !maxStr.Valid { - return nil, nil, nil - } - - minBlock, ok := new(big.Int).SetString(minStr.String, 10) - if !ok { - return nil, nil, fmt.Errorf("failed to parse min block number: %s", minStr.String) - } - - maxBlock, ok := new(big.Int).SetString(maxStr.String, 10) - if !ok { - return nil, nil, fmt.Errorf("failed to parse max block number: %s", maxStr.String) - } - - return minBlock, maxBlock, nil -} - -// Close closes the database connection -func (p *PostgresConnector) Close() error { - return p.db.Close() -} diff --git a/internal/storage/postgres_connector_test.go b/internal/storage/postgres_connector_test.go deleted file mode 100644 index 103245b6..00000000 --- a/internal/storage/postgres_connector_test.go +++ /dev/null @@ -1,124 +0,0 @@ -package storage - -import ( - "math/big" - "testing" - - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - config "github.com/thirdweb-dev/indexer/configs" - "github.com/thirdweb-dev/indexer/internal/common" -) - -func TestPostgresConnector_Cursors(t *testing.T) { - // Skip if no Postgres is available - t.Skip("Skipping Postgres tests - requires running Postgres instance") - - cfg := &config.PostgresConfig{ - Host: "localhost", - Port: 5432, - Username: "test", - Password: "test", - Database: "test_orchestrator", - SSLMode: "disable", - MaxOpenConns: 10, - MaxIdleConns: 5, - } - - conn, err := NewPostgresConnector(cfg) - require.NoError(t, err) - defer conn.Close() - - chainId := big.NewInt(1) - blockNumber := big.NewInt(67890) - - // Test SetLastReorgCheckedBlockNumber - err = conn.SetLastReorgCheckedBlockNumber(chainId, blockNumber) - assert.NoError(t, err) - - // Test GetLastReorgCheckedBlockNumber - retrievedBlockNumber, err := conn.GetLastReorgCheckedBlockNumber(chainId) - assert.NoError(t, err) - assert.Equal(t, blockNumber, retrievedBlockNumber) - - // Test update - newBlockNumber := big.NewInt(67891) - err = conn.SetLastReorgCheckedBlockNumber(chainId, newBlockNumber) - assert.NoError(t, err) - - retrievedBlockNumber, err = conn.GetLastReorgCheckedBlockNumber(chainId) - assert.NoError(t, err) - assert.Equal(t, newBlockNumber, retrievedBlockNumber) -} - -func TestPostgresConnector_StagingData(t *testing.T) { - // Skip if no Postgres is available - t.Skip("Skipping Postgres tests - requires running Postgres instance") - - cfg := &config.PostgresConfig{ - Host: "localhost", - Port: 5432, - Username: "test", - Password: "test", - Database: "test_staging", - SSLMode: "disable", - MaxOpenConns: 10, - MaxIdleConns: 5, - } - - conn, err := NewPostgresConnector(cfg) - require.NoError(t, err) - defer conn.Close() - - // Create test block data - blockData := []common.BlockData{ - { - Block: common.Block{ - ChainId: big.NewInt(1), - Number: big.NewInt(100), - Hash: "0xabc123", - }, - Transactions: []common.Transaction{}, - Logs: []common.Log{}, - }, - { - Block: common.Block{ - ChainId: big.NewInt(1), - Number: big.NewInt(101), - Hash: "0xdef456", - }, - Transactions: []common.Transaction{}, - Logs: []common.Log{}, - }, - } - - // Test InsertStagingData - err = conn.InsertStagingData(blockData) - assert.NoError(t, err) - - // Test GetStagingData - qf := QueryFilter{ - ChainId: big.NewInt(1), - BlockNumbers: []*big.Int{big.NewInt(100), big.NewInt(101)}, - } - - retrievedData, err := conn.GetStagingData(qf) - assert.NoError(t, err) - assert.Len(t, retrievedData, 2) - - // Test GetStagingData with StartBlock and EndBlock - rangeQf := QueryFilter{ - ChainId: big.NewInt(1), - StartBlock: big.NewInt(100), - EndBlock: big.NewInt(101), - } - - retrievedDataRange, err := conn.GetStagingData(rangeQf) - assert.NoError(t, err) - assert.Len(t, retrievedDataRange, 2) - - retrievedData, err = conn.GetStagingData(qf) - assert.NoError(t, err) - assert.Len(t, retrievedData, 1) - assert.Equal(t, big.NewInt(101), retrievedData[0].Block.Number) -} diff --git a/internal/storage/redis.go b/internal/storage/redis.go deleted file mode 100644 index bb718105..00000000 --- a/internal/storage/redis.go +++ /dev/null @@ -1,134 +0,0 @@ -package storage - -import ( - "context" - "crypto/tls" - "fmt" - "math/big" - "time" - - "github.com/redis/go-redis/v9" - config "github.com/thirdweb-dev/indexer/configs" -) - -// Redis key namespace constants for better organization and maintainability -const ( - // Cursor keys for tracking positions - KeyCursorReorg = "cursor:reorg" // String: cursor:reorg:{chainId} - KeyCursorPublish = "cursor:publish" // String: cursor:publish:{chainId} - KeyCursorCommit = "cursor:commit" // String: cursor:commit:{chainId} -) - -// RedisConnector uses Redis for metadata storage -type RedisConnector struct { - redisClient *redis.Client - cfg *config.RedisConfig -} - -func NewRedisConnector(cfg *config.RedisConfig) (*RedisConnector, error) { - // Connect to Redis - var tlsConfig *tls.Config - if cfg.EnableTLS { - tlsConfig = &tls.Config{ - MinVersion: tls.VersionTLS12, // Ensure a secure TLS version - } - } - - redisClient := redis.NewClient(&redis.Options{ - Addr: fmt.Sprintf("%s:%d", cfg.Host, cfg.Port), - Password: cfg.Password, - DB: cfg.DB, - TLSConfig: tlsConfig, - }) - - ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) - defer cancel() - - if err := redisClient.Ping(ctx).Err(); err != nil { - return nil, fmt.Errorf("failed to connect to redis: %w", err) - } - - return &RedisConnector{ - redisClient: redisClient, - cfg: cfg, - }, nil -} - -// Orchestrator Storage Implementation -func (kr *RedisConnector) GetLastReorgCheckedBlockNumber(chainId *big.Int) (*big.Int, error) { - ctx := context.Background() - key := fmt.Sprintf("%s:%s", KeyCursorReorg, chainId.String()) - - val, err := kr.redisClient.Get(ctx, key).Result() - if err == redis.Nil { - return big.NewInt(0), nil - } else if err != nil { - return nil, err - } - - blockNumber, ok := new(big.Int).SetString(val, 10) - if !ok { - return nil, fmt.Errorf("failed to parse block number: %s", val) - } - - return blockNumber, nil -} - -func (kr *RedisConnector) SetLastReorgCheckedBlockNumber(chainId *big.Int, blockNumber *big.Int) error { - ctx := context.Background() - key := fmt.Sprintf("%s:%s", KeyCursorReorg, chainId.String()) - return kr.redisClient.Set(ctx, key, blockNumber.String(), 0).Err() -} - -func (kr *RedisConnector) GetLastPublishedBlockNumber(chainId *big.Int) (*big.Int, error) { - ctx := context.Background() - key := fmt.Sprintf("%s:%s", KeyCursorPublish, chainId.String()) - - val, err := kr.redisClient.Get(ctx, key).Result() - if err == redis.Nil { - return big.NewInt(0), nil - } else if err != nil { - return nil, err - } - - blockNumber, ok := new(big.Int).SetString(val, 10) - if !ok { - return nil, fmt.Errorf("failed to parse block number: %s", val) - } - return blockNumber, nil -} - -func (kr *RedisConnector) SetLastPublishedBlockNumber(chainId *big.Int, blockNumber *big.Int) error { - ctx := context.Background() - key := fmt.Sprintf("%s:%s", KeyCursorPublish, chainId.String()) - return kr.redisClient.Set(ctx, key, blockNumber.String(), 0).Err() -} - -func (kr *RedisConnector) GetLastCommittedBlockNumber(chainId *big.Int) (*big.Int, error) { - ctx := context.Background() - key := fmt.Sprintf("%s:%s", KeyCursorCommit, chainId.String()) - - val, err := kr.redisClient.Get(ctx, key).Result() - if err == redis.Nil { - return big.NewInt(0), nil - } else if err != nil { - return nil, err - } - - blockNumber, ok := new(big.Int).SetString(val, 10) - if !ok { - return nil, fmt.Errorf("failed to parse block number: %s", val) - } - return blockNumber, nil -} - -func (kr *RedisConnector) SetLastCommittedBlockNumber(chainId *big.Int, blockNumber *big.Int) error { - ctx := context.Background() - key := fmt.Sprintf("%s:%s", KeyCursorCommit, chainId.String()) - return kr.redisClient.Set(ctx, key, blockNumber.String(), 0).Err() -} - -// Close closes the Redis connection -func (kr *RedisConnector) Close() error { - return kr.redisClient.Close() -} diff --git a/internal/storage/s3.go b/internal/storage/s3.go deleted file mode 100644 index e69b796f..00000000 --- a/internal/storage/s3.go +++ /dev/null @@ -1,1174 +0,0 @@ -package storage - -import ( - "bytes" - "context" - "crypto/sha256" - "encoding/hex" - "encoding/json" - "fmt" - "io" - "math/big" - "sort" - "strings" - "sync" - "time" - - "github.com/aws/aws-sdk-go-v2/aws" - awsconfig "github.com/aws/aws-sdk-go-v2/config" - "github.com/aws/aws-sdk-go-v2/service/s3" - "github.com/parquet-go/parquet-go" - "github.com/rs/zerolog/log" - config "github.com/thirdweb-dev/indexer/configs" - "github.com/thirdweb-dev/indexer/internal/common" -) - -type S3Connector struct { - client *s3.Client - config *config.S3StorageConfig - formatter DataFormatter - buffer IBlockBuffer - - // Flush control - stopCh chan struct{} - flushCh chan struct{} - flushDoneCh chan struct{} // Signals when flush is complete - flushTimer *time.Timer - timerMu sync.Mutex - lastAddTime time.Time - wg sync.WaitGroup - closeOnce sync.Once -} - -// DataFormatter interface for different file formats -type DataFormatter interface { - FormatBlockData(data []common.BlockData) ([]byte, error) - GetFileExtension() string - GetContentType() string -} - -// ParquetBlockData represents the complete block data in Parquet format -type ParquetBlockData struct { - ChainId uint64 `parquet:"chain_id"` - BlockNumber uint64 `parquet:"block_number"` // Numeric for efficient min/max queries - BlockHash string `parquet:"block_hash"` - BlockTimestamp int64 `parquet:"block_timestamp"` - Block []byte `parquet:"block_json"` - Transactions []byte `parquet:"transactions_json"` - Logs []byte `parquet:"logs_json"` - Traces []byte `parquet:"traces_json"` -} - -func NewS3Connector(cfg *config.S3StorageConfig) (*S3Connector, error) { - awsCfg, err := awsconfig.LoadDefaultConfig(context.Background(), - awsconfig.WithRegion(cfg.Region), - ) - if err != nil { - return nil, fmt.Errorf("failed to load AWS config: %w", err) - } - - // Override with explicit credentials if provided - if cfg.AccessKeyID != "" && cfg.SecretAccessKey != "" { - awsCfg.Credentials = aws.CredentialsProviderFunc(func(ctx context.Context) (aws.Credentials, error) { - return aws.Credentials{ - AccessKeyID: cfg.AccessKeyID, - SecretAccessKey: cfg.SecretAccessKey, - }, nil - }) - } - - s3Client := s3.NewFromConfig(awsCfg, func(o *s3.Options) { - if cfg.Endpoint != "" { - o.BaseEndpoint = aws.String(cfg.Endpoint) - } - }) - - // Set defaults - if cfg.Format == "" { - cfg.Format = "parquet" - } - - // Initialize parquet config with defaults if using parquet - if cfg.Format == "parquet" && cfg.Parquet == nil { - cfg.Parquet = &config.ParquetConfig{ - Compression: "snappy", - RowGroupSize: 256, // MB - PageSize: 8192, // KB - } - } - - if cfg.BufferSize == 0 { - cfg.BufferSize = 512 // 512MB default - } - if cfg.BufferTimeout == 0 { - cfg.BufferTimeout = 1 * 60 * 60 // 1 hour in seconds default - } - if cfg.FlushTimeout == 0 { - cfg.FlushTimeout = 300 // 5 mins default - } - - // Create formatter based on format - var formatter DataFormatter - switch cfg.Format { - case "parquet": - formatter = &ParquetFormatter{config: cfg.Parquet} - default: - return nil, fmt.Errorf("unsupported format: %s", cfg.Format) - } - - // Create buffer with configured settings - var buffer IBlockBuffer - buffer, err = NewBadgerBlockBuffer(cfg.BufferSize, cfg.MaxBlocksPerFile) - if err != nil { - // fallback - log.Error().Err(err).Msg("Failed to create Badger buffer, falling back to in-memory buffer") - buffer = NewBlockBuffer(cfg.BufferSize, cfg.MaxBlocksPerFile) - } - - s3c := &S3Connector{ - client: s3Client, - config: cfg, - formatter: formatter, - buffer: buffer, - stopCh: make(chan struct{}), - flushCh: make(chan struct{}, 1), - flushDoneCh: make(chan struct{}), - } - - // Start background flush worker - s3c.wg.Add(1) - go s3c.flushWorker() - - return s3c, nil -} - -func (s *S3Connector) InsertBlockData(data []common.BlockData) error { - if len(data) == 0 { - return nil - } - - // Add to buffer and check if flush is needed - shouldFlush := s.buffer.Add(data) - - // Start or reset timer when first data is added - s.timerMu.Lock() - _, blockCount := s.buffer.Size() - // Check if this is the first batch added (buffer was empty before) - if blockCount == len(data) && s.config.BufferTimeout > 0 { - // First data added to buffer, track time and start timer - s.lastAddTime = time.Now() - if s.flushTimer != nil { - s.flushTimer.Stop() - } - s.flushTimer = time.AfterFunc(time.Duration(s.config.BufferTimeout)*time.Second, func() { - select { - case s.flushCh <- struct{}{}: - default: - } - }) - } - s.timerMu.Unlock() - - if shouldFlush { - // Stop timer and trigger flush - s.stopFlushTimer() - select { - case s.flushCh <- struct{}{}: - default: - } - } - - return nil -} - -// flushWorker runs in background and handles buffer flushes -func (s *S3Connector) flushWorker() { - defer s.wg.Done() - - // Check periodically for expired buffers - ticker := time.NewTicker(10 * time.Second) - defer ticker.Stop() - - for { - select { - case <-s.stopCh: - // Final flush before stopping - s.flushBuffer() - return - case <-s.flushCh: - s.flushBuffer() - // Signal flush completion - select { - case s.flushDoneCh <- struct{}{}: - default: - } - case <-ticker.C: - // Check if buffer has expired based on our own tracking - if s.isBufferExpired() { - s.flushBuffer() - } - } - } -} - -// stopFlushTimer stops the flush timer if it's running -func (s *S3Connector) stopFlushTimer() { - s.timerMu.Lock() - defer s.timerMu.Unlock() - - if s.flushTimer != nil { - s.flushTimer.Stop() - s.flushTimer = nil - } -} - -// isBufferExpired checks if the buffer has exceeded the timeout duration -func (s *S3Connector) isBufferExpired() bool { - s.timerMu.Lock() - defer s.timerMu.Unlock() - - if s.config.BufferTimeout <= 0 || s.lastAddTime.IsZero() || s.buffer.IsEmpty() { - return false - } - - return time.Since(s.lastAddTime) > time.Duration(s.config.BufferTimeout)*time.Second -} - -// flushBuffer writes buffered data to S3 -func (s *S3Connector) flushBuffer() error { - data := s.buffer.Flush() - if len(data) == 0 { - return nil - } - - // Stop timer and reset last add time since we're flushing - s.stopFlushTimer() - s.timerMu.Lock() - s.lastAddTime = time.Time{} - s.timerMu.Unlock() - - return s.uploadBatchData(data) -} - -// uploadBatchData handles uploading batched data to S3, grouped by chain -func (s *S3Connector) uploadBatchData(data []common.BlockData) error { - // Group blocks by chain to generate appropriate keys - chainGroups := make(map[uint64][]common.BlockData) - for _, block := range data { - chainId := block.Block.ChainId.Uint64() - chainGroups[chainId] = append(chainGroups[chainId], block) - } - - for _, blocks := range chainGroups { - // Sort blocks by number - sort.Slice(blocks, func(i, j int) bool { - return blocks[i].Block.Number.Cmp(blocks[j].Block.Number) < 0 - }) - - // Process in chunks if MaxBlocksPerFile is set, otherwise upload all at once - if s.config.MaxBlocksPerFile > 0 { - // Split into chunks based on MaxBlocksPerFile - for i := 0; i < len(blocks); i += s.config.MaxBlocksPerFile { - end := i + s.config.MaxBlocksPerFile - if end > len(blocks) { - end = len(blocks) - } - - chunk := blocks[i:end] - if err := s.uploadBatch(chunk); err != nil { - log.Error().Err(err).Msg("Failed to upload batch to S3") - return err - } - } - } else { - // No block limit, upload entire buffer as one file - if err := s.uploadBatch(blocks); err != nil { - log.Error().Err(err).Msg("Failed to upload batch to S3") - return err - } - } - } - - return nil -} - -// Flush manually triggers a buffer flush and waits for completion -func (s *S3Connector) Flush() error { - // Check if buffer has data - if s.buffer.IsEmpty() { - return nil - } - - // Clear any pending flush completion signals - select { - case <-s.flushDoneCh: - default: - } - - // Trigger flush - select { - case s.flushCh <- struct{}{}: - // Wait for flush to complete - select { - case <-s.flushDoneCh: - return nil - case <-time.After(time.Duration(s.config.FlushTimeout) * time.Second): - return fmt.Errorf("flush timeout after %d seconds", s.config.FlushTimeout) - } - default: - // Flush channel is full, likely a flush is already in progress - // Wait for it to complete - select { - case <-s.flushDoneCh: - return nil - case <-time.After(time.Duration(s.config.FlushTimeout) * time.Second): - return fmt.Errorf("flush timeout after %d seconds", s.config.FlushTimeout) - } - } -} - -// Close closes the S3 connector and flushes any remaining data -func (s *S3Connector) Close() error { - var closeErr error - - s.closeOnce.Do(func() { - // Stop the flush timer - s.stopFlushTimer() - - // First, ensure any pending data is flushed - if err := s.Flush(); err != nil { - log.Error().Err(err).Msg("Error flushing buffer during close") - closeErr = err - } - - // Signal stop - close(s.stopCh) - - // Wait for worker to finish - s.wg.Wait() - - // Clean up buffer resources - if err := s.buffer.Close(); err != nil { - log.Error().Err(err).Msg("Error closing buffer") - if closeErr == nil { - closeErr = err - } - } - }) - - return closeErr -} - -func (s *S3Connector) uploadBatch(data []common.BlockData) error { - if len(data) == 0 { - return nil - } - - chainId := data[0].Block.ChainId.Uint64() - startBlock := data[0].Block.Number - endBlock := data[len(data)-1].Block.Number - // Use the first block's timestamp for year partitioning - blockTimestamp := data[0].Block.Timestamp - - // Format data using the configured formatter - formattedData, err := s.formatter.FormatBlockData(data) - if err != nil { - return fmt.Errorf("failed to format block data: %w", err) - } - - // Generate S3 key with chain_id/year partitioning based on block timestamp - key := s.generateS3Key(chainId, startBlock, endBlock, blockTimestamp) - - // Upload to S3 - ctx := context.Background() - _, err = s.client.PutObject(ctx, &s3.PutObjectInput{ - Bucket: aws.String(s.config.Bucket), - Key: aws.String(key), - Body: bytes.NewReader(formattedData), - ContentType: aws.String(s.formatter.GetContentType()), - Metadata: map[string]string{ - "chain_id": fmt.Sprintf("%d", chainId), - "start_block": startBlock.String(), - "end_block": endBlock.String(), - "block_count": fmt.Sprintf("%d", len(data)), - "timestamp": blockTimestamp.Format(time.RFC3339), - "checksum": s.calculateChecksum(formattedData), - "file_size": fmt.Sprintf("%d", len(formattedData)), - }, - }) - - if err != nil { - return fmt.Errorf("failed to upload to S3: %w", err) - } - - log.Info(). - Uint64("chain_id", chainId). - Str("min_block", startBlock.String()). - Str("max_block", endBlock.String()). - Int("block_count", len(data)). - Int("file_size_mb", len(formattedData)/(1024*1024)). - Str("s3_key", key). - Msg("Successfully uploaded buffered blocks to S3") - - return nil -} - -func (s *S3Connector) generateS3Key(chainID uint64, startBlock, endBlock *big.Int, blockTimestamp time.Time) string { - // Use the block's timestamp for year partitioning - year := blockTimestamp.Year() - if len(s.config.Prefix) > 0 { - return fmt.Sprintf("%s/chain_%d/year=%d/blocks_%s_%s%s", - s.config.Prefix, - chainID, - year, - startBlock.String(), - endBlock.String(), - s.formatter.GetFileExtension(), - ) - } - return fmt.Sprintf("chain_%d/year=%d/blocks_%s_%s%s", - chainID, - year, - startBlock.String(), - endBlock.String(), - s.formatter.GetFileExtension(), - ) -} - -// ParquetFormatter implements DataFormatter for Parquet format -type ParquetFormatter struct { - config *config.ParquetConfig -} - -func (f *ParquetFormatter) FormatBlockData(data []common.BlockData) ([]byte, error) { - var parquetData []ParquetBlockData - - for _, d := range data { - // Serialize each component to JSON - blockJSON, err := json.Marshal(d.Block) - if err != nil { - return nil, fmt.Errorf("failed to marshal block: %w", err) - } - - // Default transactions to empty array if nil - var txJSON []byte - if d.Transactions == nil { - txJSON, err = json.Marshal([]common.Transaction{}) - } else { - txJSON, err = json.Marshal(d.Transactions) - } - if err != nil { - return nil, fmt.Errorf("failed to marshal transactions: %w", err) - } - - // Default logs to empty array if nil - var logsJSON []byte - if d.Logs == nil { - logsJSON, err = json.Marshal([]common.Log{}) - } else { - logsJSON, err = json.Marshal(d.Logs) - } - if err != nil { - return nil, fmt.Errorf("failed to marshal logs: %w", err) - } - - // Default traces to empty array if nil - var tracesJSON []byte - if d.Traces == nil { - tracesJSON, err = json.Marshal([]common.Trace{}) - } else { - tracesJSON, err = json.Marshal(d.Traces) - } - if err != nil { - return nil, fmt.Errorf("failed to marshal traces: %w", err) - } - - // Convert block number to uint64 for efficient queries - blockNum := d.Block.Number.Uint64() - if d.Block.Number.BitLen() > 64 { - return nil, fmt.Errorf("block number exceeds uint64 is not supported") - } - - pd := ParquetBlockData{ - ChainId: d.Block.ChainId.Uint64(), - BlockNumber: blockNum, - BlockHash: d.Block.Hash, - BlockTimestamp: d.Block.Timestamp.Unix(), - Block: blockJSON, - Transactions: txJSON, - Logs: logsJSON, - Traces: tracesJSON, - } - - parquetData = append(parquetData, pd) - } - - var buf bytes.Buffer - - // Configure writer with compression and statistics for efficient queries - writerOptions := []parquet.WriterOption{ - f.getCompressionCodec(), - // Enable page statistics for query optimization (min/max per page) - parquet.DataPageStatistics(true), - // Set page buffer size for better statistics granularity - parquet.PageBufferSize(8 * 1024 * 1024), // 8MB pages - // Configure sorting for optimal query performance - // Sort by block_number first, then block_timestamp for efficient range queries - parquet.SortingWriterConfig( - parquet.SortingColumns( - parquet.Ascending("block_number"), - parquet.Ascending("block_timestamp"), - ), - ), - // Set column index size limit (enables column indexes for all columns) - parquet.ColumnIndexSizeLimit(16 * 1024), // 16KB limit for column index - } - - writer := parquet.NewGenericWriter[ParquetBlockData](&buf, writerOptions...) - - // Write all data at once for better compression and statistics - if _, err := writer.Write(parquetData); err != nil { - return nil, fmt.Errorf("failed to write parquet data: %w", err) - } - - if err := writer.Close(); err != nil { - return nil, err - } - - return buf.Bytes(), nil -} - -func (f *ParquetFormatter) GetFileExtension() string { - return ".parquet" -} - -func (f *ParquetFormatter) GetContentType() string { - return "application/octet-stream" -} - -func (f *ParquetFormatter) getCompressionCodec() parquet.WriterOption { - switch f.config.Compression { - case "gzip": - return parquet.Compression(&parquet.Gzip) - case "zstd": - return parquet.Compression(&parquet.Zstd) - default: - return parquet.Compression(&parquet.Snappy) - } -} - -func (s *S3Connector) calculateChecksum(data []byte) string { - hash := sha256.Sum256(data) - return hex.EncodeToString(hash[:]) -} - -// Implement remaining IMainStorage methods with empty implementations -// These will return errors indicating they're not supported - -func (s *S3Connector) ReplaceBlockData(data []common.BlockData) ([]common.BlockData, error) { - return nil, fmt.Errorf("ReplaceBlockData not supported by S3 connector") -} - -func (s *S3Connector) GetBlocks(qf QueryFilter, fields ...string) (QueryResult[common.Block], error) { - return QueryResult[common.Block]{}, fmt.Errorf("GetBlocks not supported by S3 connector - use Athena or similar") -} - -func (s *S3Connector) GetTransactions(qf QueryFilter, fields ...string) (QueryResult[common.Transaction], error) { - return QueryResult[common.Transaction]{}, fmt.Errorf("GetTransactions not supported by S3 connector - use Athena or similar") -} - -func (s *S3Connector) GetLogs(qf QueryFilter, fields ...string) (QueryResult[common.Log], error) { - return QueryResult[common.Log]{}, fmt.Errorf("GetLogs not supported by S3 connector - use Athena or similar") -} - -func (s *S3Connector) GetTraces(qf QueryFilter, fields ...string) (QueryResult[common.Trace], error) { - return QueryResult[common.Trace]{}, fmt.Errorf("GetTraces not supported by S3 connector") -} - -func (s *S3Connector) GetAggregations(table string, qf QueryFilter) (QueryResult[interface{}], error) { - return QueryResult[interface{}]{}, fmt.Errorf("GetAggregations not supported by S3 connector") -} - -func (s *S3Connector) GetTokenBalances(qf BalancesQueryFilter, fields ...string) (QueryResult[common.TokenBalance], error) { - return QueryResult[common.TokenBalance]{}, fmt.Errorf("GetTokenBalances not supported by S3 connector") -} - -func (s *S3Connector) GetTokenTransfers(qf TransfersQueryFilter, fields ...string) (QueryResult[common.TokenTransfer], error) { - return QueryResult[common.TokenTransfer]{}, fmt.Errorf("GetTokenTransfers not supported by S3 connector") -} - -func (s *S3Connector) GetMaxBlockNumber(chainId *big.Int) (*big.Int, error) { - // First check the buffer for blocks from this chain - maxBlock := s.buffer.GetMaxBlockNumber(chainId) - if maxBlock == nil { - maxBlock = big.NewInt(0) - } - - // Then check S3 for the maximum block number - prefix := fmt.Sprintf("chain_%d/", chainId.Uint64()) - if s.config.Prefix != "" { - prefix = fmt.Sprintf("%s/%s", s.config.Prefix, prefix) - } - - ctx := context.Background() - paginator := s3.NewListObjectsV2Paginator(s.client, &s3.ListObjectsV2Input{ - Bucket: aws.String(s.config.Bucket), - Prefix: aws.String(prefix), - }) - - for paginator.HasMorePages() { - page, err := paginator.NextPage(ctx) - if err != nil { - return nil, fmt.Errorf("failed to list S3 objects: %w", err) - } - - for _, obj := range page.Contents { - // Extract block range from filename: blocks_{start}_{end}.parquet - if obj.Key == nil { - continue - } - _, endBlock := s.extractBlockRangeFromKey(*obj.Key) - if endBlock != nil && endBlock.Cmp(maxBlock) > 0 { - maxBlock = endBlock - } - } - } - - return maxBlock, nil -} - -func (s *S3Connector) GetMaxBlockNumberInRange(chainId *big.Int, startBlock *big.Int, endBlock *big.Int) (*big.Int, error) { - maxBlock := big.NewInt(0) - foundAny := false - - // First check the buffer for blocks in this range - bufferBlocks := s.buffer.GetBlocksInRange(chainId, startBlock, endBlock) - for _, block := range bufferBlocks { - blockNum := block.Block.Number - if !foundAny || blockNum.Cmp(maxBlock) > 0 { - maxBlock = new(big.Int).Set(blockNum) - foundAny = true - } - } - - // Then check S3 files - prefix := fmt.Sprintf("chain_%d/", chainId.Uint64()) - if s.config.Prefix != "" { - prefix = fmt.Sprintf("%s/%s", s.config.Prefix, prefix) - } - - ctx := context.Background() - paginator := s3.NewListObjectsV2Paginator(s.client, &s3.ListObjectsV2Input{ - Bucket: aws.String(s.config.Bucket), - Prefix: aws.String(prefix), - }) - - for paginator.HasMorePages() { - page, err := paginator.NextPage(ctx) - if err != nil { - return nil, fmt.Errorf("failed to list objects: %w", err) - } - - for _, obj := range page.Contents { - if obj.Key == nil { - continue - } - fileStart, fileEnd := s.extractBlockRangeFromKey(*obj.Key) - if fileStart == nil || fileEnd == nil { - continue - } - - // Check if this file overlaps with our range - if fileEnd.Cmp(startBlock) >= 0 && fileStart.Cmp(endBlock) <= 0 { - // The maximum block in this file that's within our range - maxInFile := new(big.Int).Set(fileEnd) - if maxInFile.Cmp(endBlock) > 0 { - maxInFile = endBlock - } - - if !foundAny || maxInFile.Cmp(maxBlock) > 0 { - maxBlock = new(big.Int).Set(maxInFile) - foundAny = true - } - } - } - } - - if !foundAny { - return big.NewInt(0), nil - } - - return maxBlock, nil -} - -func (s *S3Connector) GetBlockCount(chainId *big.Int, startBlock *big.Int, endBlock *big.Int) (*big.Int, error) { - minBlock := big.NewInt(0) - maxBlock := big.NewInt(0) - count := big.NewInt(0) - foundAny := false - - // First check the buffer for blocks in this range - bufferBlocks := s.buffer.GetBlocksInRange(chainId, startBlock, endBlock) - for _, block := range bufferBlocks { - blockNum := block.Block.Number - count.Add(count, big.NewInt(1)) - - if !foundAny { - minBlock = new(big.Int).Set(blockNum) - maxBlock = new(big.Int).Set(blockNum) - foundAny = true - } else { - if blockNum.Cmp(minBlock) < 0 { - minBlock = new(big.Int).Set(blockNum) - } - if blockNum.Cmp(maxBlock) > 0 { - maxBlock = new(big.Int).Set(blockNum) - } - } - } - - // Then check S3 files - prefix := fmt.Sprintf("chain_%d/", chainId.Uint64()) - if s.config.Prefix != "" { - prefix = fmt.Sprintf("%s/%s", s.config.Prefix, prefix) - } - - ctx := context.Background() - paginator := s3.NewListObjectsV2Paginator(s.client, &s3.ListObjectsV2Input{ - Bucket: aws.String(s.config.Bucket), - Prefix: aws.String(prefix), - }) - - for paginator.HasMorePages() { - page, err := paginator.NextPage(ctx) - if err != nil { - return nil, fmt.Errorf("failed to list objects: %w", err) - } - - for _, obj := range page.Contents { - if obj.Key == nil { - continue - } - fileStart, fileEnd := s.extractBlockRangeFromKey(*obj.Key) - if fileStart == nil || fileEnd == nil { - continue - } - - // Check if this file overlaps with our range - if fileEnd.Cmp(startBlock) >= 0 && fileStart.Cmp(endBlock) <= 0 { - // Calculate the effective range within our query bounds - effectiveStart := new(big.Int).Set(fileStart) - if effectiveStart.Cmp(startBlock) < 0 { - effectiveStart = startBlock - } - effectiveEnd := new(big.Int).Set(fileEnd) - if effectiveEnd.Cmp(endBlock) > 0 { - effectiveEnd = endBlock - } - - // Update min/max blocks - if !foundAny { - minBlock = new(big.Int).Set(effectiveStart) - maxBlock = new(big.Int).Set(effectiveEnd) - foundAny = true - } else { - if effectiveStart.Cmp(minBlock) < 0 { - minBlock = new(big.Int).Set(effectiveStart) - } - if effectiveEnd.Cmp(maxBlock) > 0 { - maxBlock = new(big.Int).Set(effectiveEnd) - } - } - - // Add the count of blocks in this file's overlapping range - // Note: This assumes contiguous blocks in the file - blocksInRange := new(big.Int).Sub(effectiveEnd, effectiveStart) - blocksInRange.Add(blocksInRange, big.NewInt(1)) // Add 1 because range is inclusive - count.Add(count, blocksInRange) - } - } - } - - return count, nil -} - -func (s *S3Connector) GetBlockHeadersDescending(chainId *big.Int, from *big.Int, to *big.Int) ([]common.BlockHeader, error) { - var headers []common.BlockHeader - - // First get headers from buffer - bufferData := s.buffer.GetData() - for _, block := range bufferData { - if block.Block.ChainId.Cmp(chainId) == 0 { - // Check if block is in range (if from is specified) - if from != nil && block.Block.Number.Cmp(from) > 0 { - continue - } - // Apply limit if specified - if to != nil && len(headers) >= int(to.Int64()) { - break - } - headers = append(headers, common.BlockHeader{ - Number: block.Block.Number, - Hash: block.Block.Hash, - ParentHash: block.Block.ParentHash, - }) - } - } - - // If we need more headers, get from S3 - if to == nil || len(headers) < int(to.Int64()) { - // Download relevant parquet files and extract block headers - files, err := s.findFilesInRange(chainId, big.NewInt(0), from) // from 0 to 'from' block - if err != nil { - return nil, err - } - - for _, file := range files { - fileHeaders, err := s.extractBlockHeadersFromFile(file, chainId, from, to) - if err != nil { - log.Warn().Err(err).Str("file", file).Msg("Failed to extract headers from file") - continue - } - headers = append(headers, fileHeaders...) - } - } - - // Sort in descending order - sort.Slice(headers, func(i, j int) bool { - return headers[i].Number.Cmp(headers[j].Number) > 0 - }) - - // Apply limit if specified - if to != nil && len(headers) > int(to.Int64()) { - headers = headers[:to.Int64()] - } - - return headers, nil -} - -func (s *S3Connector) GetValidationBlockData(chainId *big.Int, startBlock *big.Int, endBlock *big.Int) ([]common.BlockData, error) { - if startBlock == nil || endBlock == nil { - return nil, fmt.Errorf("start block and end block must not be nil") - } - - if startBlock.Cmp(endBlock) > 0 { - return nil, fmt.Errorf("start block must be less than or equal to end block") - } - - // First check buffer for blocks in range - blockData := s.buffer.GetBlocksInRange(chainId, startBlock, endBlock) - - // Then find and download relevant files from S3 - files, err := s.findFilesInRange(chainId, startBlock, endBlock) - if err != nil { - return nil, err - } - - for _, file := range files { - fileData, err := s.downloadAndParseFile(file, chainId, startBlock, endBlock) - if err != nil { - log.Warn().Err(err).Str("file", file).Msg("Failed to parse file") - continue - } - blockData = append(blockData, fileData...) - } - - // Sort by block number - sort.Slice(blockData, func(i, j int) bool { - return blockData[i].Block.Number.Cmp(blockData[j].Block.Number) < 0 - }) - - return blockData, nil -} - -func (s *S3Connector) FindMissingBlockNumbers(chainId *big.Int, startBlock *big.Int, endBlock *big.Int) ([]*big.Int, error) { - // Build a set of all block numbers we have - blockSet := make(map[string]bool) - - // First add blocks from buffer - bufferBlocks := s.buffer.GetBlocksInRange(chainId, startBlock, endBlock) - for _, block := range bufferBlocks { - blockSet[block.Block.Number.String()] = true - } - - // Then check S3 files in range - files, err := s.findFilesInRange(chainId, startBlock, endBlock) - if err != nil { - return nil, err - } - - for _, file := range files { - fileStart, fileEnd := s.extractBlockRangeFromKey(file) - if fileStart == nil || fileEnd == nil { - continue - } - - // Add all blocks in this file's range to our set - for i := new(big.Int).Set(fileStart); i.Cmp(fileEnd) <= 0; i.Add(i, big.NewInt(1)) { - if i.Cmp(startBlock) >= 0 && i.Cmp(endBlock) <= 0 { - blockSet[i.String()] = true - } - } - } - - // Find missing blocks - var missing []*big.Int - for i := new(big.Int).Set(startBlock); i.Cmp(endBlock) <= 0; i.Add(i, big.NewInt(1)) { - if !blockSet[i.String()] { - missing = append(missing, new(big.Int).Set(i)) - } - } - - return missing, nil -} - -func (s *S3Connector) GetFullBlockData(chainId *big.Int, blockNumbers []*big.Int) ([]common.BlockData, error) { - if len(blockNumbers) == 0 { - return nil, nil - } - - // Create a map for quick lookup - blockNumMap := make(map[string]bool) - for _, bn := range blockNumbers { - blockNumMap[bn.String()] = true - } - - var result []common.BlockData - - // First check buffer for requested blocks - bufferData := s.buffer.GetData() - for _, block := range bufferData { - if block.Block.ChainId.Cmp(chainId) == 0 { - if blockNumMap[block.Block.Number.String()] { - result = append(result, block) - // Remove from map so we don't fetch it from S3 - delete(blockNumMap, block.Block.Number.String()) - } - } - } - - // If all blocks were in buffer, return early - if len(blockNumMap) == 0 { - return result, nil - } - - // Sort remaining block numbers to optimize file access - var remainingBlocks []*big.Int - for blockStr := range blockNumMap { - bn, _ := new(big.Int).SetString(blockStr, 10) - remainingBlocks = append(remainingBlocks, bn) - } - sort.Slice(remainingBlocks, func(i, j int) bool { - return remainingBlocks[i].Cmp(remainingBlocks[j]) < 0 - }) - - if len(remainingBlocks) == 0 { - return result, nil - } - - minBlock := remainingBlocks[0] - maxBlock := remainingBlocks[len(remainingBlocks)-1] - - // Find relevant files for remaining blocks - files, err := s.findFilesInRange(chainId, minBlock, maxBlock) - if err != nil { - return nil, err - } - - for _, file := range files { - fileData, err := s.downloadAndParseFile(file, chainId, minBlock, maxBlock) - if err != nil { - log.Warn().Err(err).Str("file", file).Msg("Failed to parse file") - continue - } - - // Filter to only requested blocks - for _, bd := range fileData { - if blockNumMap[bd.Block.Number.String()] { - result = append(result, bd) - } - } - } - - return result, nil -} - -// Helper functions - -func (s *S3Connector) extractBlockRangeFromKey(key string) (*big.Int, *big.Int) { - // Extract block range from key like: chain_1/year=2024/blocks_1000_2000.parquet - parts := strings.Split(key, "/") - if len(parts) == 0 { - return nil, nil - } - - filename := parts[len(parts)-1] - if !strings.HasPrefix(filename, "blocks_") || !strings.HasSuffix(filename, s.formatter.GetFileExtension()) { - return nil, nil - } - - // Remove prefix and extension - rangeStr := strings.TrimPrefix(filename, "blocks_") - rangeStr = strings.TrimSuffix(rangeStr, s.formatter.GetFileExtension()) - - // Split by underscore to get start and end - rangeParts := strings.Split(rangeStr, "_") - if len(rangeParts) != 2 { - return nil, nil - } - - startBlock, ok1 := new(big.Int).SetString(rangeParts[0], 10) - endBlock, ok2 := new(big.Int).SetString(rangeParts[1], 10) - if !ok1 || !ok2 { - return nil, nil - } - - return startBlock, endBlock -} - -func (s *S3Connector) findFilesInRange(chainId *big.Int, startBlock, endBlock *big.Int) ([]string, error) { - prefix := fmt.Sprintf("chain_%d/", chainId.Uint64()) - if s.config.Prefix != "" { - prefix = fmt.Sprintf("%s/%s", s.config.Prefix, prefix) - } - - ctx := context.Background() - paginator := s3.NewListObjectsV2Paginator(s.client, &s3.ListObjectsV2Input{ - Bucket: aws.String(s.config.Bucket), - Prefix: aws.String(prefix), - }) - - var relevantFiles []string - for paginator.HasMorePages() { - page, err := paginator.NextPage(ctx) - if err != nil { - return nil, fmt.Errorf("failed to list objects: %w", err) - } - - for _, obj := range page.Contents { - if obj.Key == nil { - continue - } - - fileStart, fileEnd := s.extractBlockRangeFromKey(*obj.Key) - if fileStart == nil || fileEnd == nil { - continue - } - - // Check if this file's range overlaps with our query range - if fileEnd.Cmp(startBlock) >= 0 && fileStart.Cmp(endBlock) <= 0 { - relevantFiles = append(relevantFiles, *obj.Key) - } - } - } - - return relevantFiles, nil -} - -func (s *S3Connector) downloadAndParseFile(key string, chainId *big.Int, startBlock, endBlock *big.Int) ([]common.BlockData, error) { - ctx := context.Background() - - // Download the file - result, err := s.client.GetObject(ctx, &s3.GetObjectInput{ - Bucket: aws.String(s.config.Bucket), - Key: aws.String(key), - }) - if err != nil { - return nil, fmt.Errorf("failed to download file: %w", err) - } - defer result.Body.Close() - - // Read entire file into memory (required for parquet reader) - data, err := io.ReadAll(result.Body) - if err != nil { - return nil, fmt.Errorf("failed to read file data: %w", err) - } - - // Read the parquet file - reader := parquet.NewGenericReader[ParquetBlockData](bytes.NewReader(data)) - defer reader.Close() - - var blockData []common.BlockData - parquetRows := make([]ParquetBlockData, 100) // Read in batches - - for { - n, err := reader.Read(parquetRows) - if err != nil && err.Error() != "EOF" { - return nil, fmt.Errorf("failed to read parquet: %w", err) - } - if n == 0 { - break - } - - for i := 0; i < n; i++ { - pd := parquetRows[i] - - // Convert uint64 block number to big.Int - blockNum := new(big.Int).SetUint64(pd.BlockNumber) - - // Filter by range if specified - if startBlock != nil && blockNum.Cmp(startBlock) < 0 { - continue - } - if endBlock != nil && blockNum.Cmp(endBlock) > 0 { - continue - } - - // Unmarshal JSON data - var block common.Block - if err := json.Unmarshal(pd.Block, &block); err != nil { - log.Warn().Err(err).Uint64("block", pd.BlockNumber).Msg("Failed to unmarshal block") - continue - } - - var transactions []common.Transaction - if len(pd.Transactions) > 0 { - if err := json.Unmarshal(pd.Transactions, &transactions); err != nil { - log.Warn().Err(err).Uint64("block", pd.BlockNumber).Msg("Failed to unmarshal transactions") - } - } - - var logs []common.Log - if len(pd.Logs) > 0 { - if err := json.Unmarshal(pd.Logs, &logs); err != nil { - log.Warn().Err(err).Uint64("block", pd.BlockNumber).Msg("Failed to unmarshal logs") - } - } - - var traces []common.Trace - if len(pd.Traces) > 0 { - if err := json.Unmarshal(pd.Traces, &traces); err != nil { - log.Warn().Err(err).Uint64("block", pd.BlockNumber).Msg("Failed to unmarshal traces") - } - } - - blockData = append(blockData, common.BlockData{ - Block: block, - Transactions: transactions, - Logs: logs, - Traces: traces, - }) - } - } - - return blockData, nil -} - -func (s *S3Connector) extractBlockHeadersFromFile(key string, chainId *big.Int, from, to *big.Int) ([]common.BlockHeader, error) { - // Download and parse only the block headers - blockData, err := s.downloadAndParseFile(key, chainId, from, to) - if err != nil { - return nil, err - } - - headers := make([]common.BlockHeader, 0, len(blockData)) - for _, bd := range blockData { - headers = append(headers, common.BlockHeader{ - Number: bd.Block.Number, - Hash: bd.Block.Hash, - ParentHash: bd.Block.ParentHash, - }) - } - - return headers, nil -} diff --git a/internal/tools/clickhouse/0000_clickhouse_create_blocks_table.sql b/internal/tools/clickhouse/0000_clickhouse_create_blocks_table.sql deleted file mode 100644 index 1bab7b83..00000000 --- a/internal/tools/clickhouse/0000_clickhouse_create_blocks_table.sql +++ /dev/null @@ -1,49 +0,0 @@ -CREATE TABLE IF NOT EXISTS blocks ( - `chain_id` UInt256, - `block_number` UInt256, - `block_timestamp` DateTime CODEC(Delta, ZSTD), - `hash` FixedString(66), - `parent_hash` FixedString(66), - `sha3_uncles` FixedString(66), - `nonce` FixedString(18), - `mix_hash` FixedString(66), - `miner` FixedString(42), - `state_root` FixedString(66), - `transactions_root` FixedString(66), - `receipts_root` FixedString(66), - `logs_bloom` String, - `size` UInt64, - `extra_data` String, - `difficulty` UInt256, - `total_difficulty` UInt256, - `transaction_count` UInt64, - `gas_limit` UInt256, - `gas_used` UInt256, - `withdrawals_root` FixedString(66), - `base_fee_per_gas` Nullable(UInt64), - - `insert_timestamp` DateTime DEFAULT now(), - `is_deleted` UInt8 DEFAULT 0, - - INDEX idx_block_timestamp block_timestamp TYPE minmax GRANULARITY 1, - INDEX idx_hash hash TYPE bloom_filter GRANULARITY 2, - - PROJECTION chain_state_projection - ( - SELECT - chain_id, - count() AS count, - uniqExact(block_number) AS unique_block_count, - min(block_number) AS min_block_number, - min(block_timestamp) AS min_block_timestamp, - max(block_number) AS max_block_number, - max(block_timestamp) AS max_block_timestamp - GROUP BY - chain_id - ) - - -) ENGINE = ReplacingMergeTree(insert_timestamp, is_deleted) -ORDER BY (chain_id, block_number) -PARTITION BY (chain_id, toStartOfQuarter(block_timestamp)) -SETTINGS deduplicate_merge_projection_mode = 'rebuild', lightweight_mutation_projection_mode = 'rebuild'; \ No newline at end of file diff --git a/internal/tools/clickhouse/0001_clickhouse_create_transactions_table.sql b/internal/tools/clickhouse/0001_clickhouse_create_transactions_table.sql deleted file mode 100644 index 562f3394..00000000 --- a/internal/tools/clickhouse/0001_clickhouse_create_transactions_table.sql +++ /dev/null @@ -1,98 +0,0 @@ -CREATE TABLE IF NOT EXISTS transactions ( - `chain_id` UInt256, - `hash` FixedString(66), - `nonce` UInt64, - `block_hash` FixedString(66), - `block_number` UInt256, - `block_timestamp` DateTime CODEC(Delta, ZSTD), - `transaction_index` UInt64, - `from_address` FixedString(42), - `to_address` FixedString(42), - `value` UInt256, - `gas` UInt64, - `gas_price` UInt256, - `data` String, - `function_selector` FixedString(10), - `max_fee_per_gas` UInt128, - `max_priority_fee_per_gas` UInt128, - `max_fee_per_blob_gas` UInt256, - `blob_versioned_hashes` Array(String), - `transaction_type` UInt8, - `r` UInt256, - `s` UInt256, - `v` UInt256, - `access_list` Nullable(String), - `authorization_list` Nullable(String), - `contract_address` Nullable(FixedString(42)), - `gas_used` Nullable(UInt64), - `cumulative_gas_used` Nullable(UInt64), - `effective_gas_price` Nullable(UInt256), - `blob_gas_used` Nullable(UInt64), - `blob_gas_price` Nullable(UInt256), - `logs_bloom` Nullable(String), - `status` Nullable(UInt64), - - `insert_timestamp` DateTime DEFAULT now(), - `is_deleted` UInt8 DEFAULT 0, - - INDEX idx_block_timestamp block_timestamp TYPE minmax GRANULARITY 1, - INDEX idx_block_hash block_hash TYPE bloom_filter GRANULARITY 3, - INDEX idx_hash hash TYPE bloom_filter GRANULARITY 2, - INDEX idx_from_address from_address TYPE bloom_filter GRANULARITY 4, - INDEX idx_to_address to_address TYPE bloom_filter GRANULARITY 4, - INDEX idx_function_selector function_selector TYPE bloom_filter GRANULARITY 2, - - PROJECTION from_address_projection - ( - SELECT - _part_offset - ORDER BY - chain_id, - from_address, - block_number, - hash - ), - PROJECTION to_address_projection - ( - SELECT - _part_offset - ORDER BY - chain_id, - to_address, - block_number, - hash - ), - PROJECTION from_address_state_projection - ( - SELECT - chain_id, - from_address, - count() AS tx_count, - uniqExact(hash) AS unique_tx_count, - min(block_number) AS min_block_number, - min(block_timestamp) AS min_block_timestamp, - max(block_number) AS max_block_number, - max(block_timestamp) AS max_block_timestamp - GROUP BY - chain_id, - from_address - ), - PROJECTION to_address_state_projection - ( - SELECT - chain_id, - to_address, - count() AS tx_count, - uniqExact(hash) AS unique_tx_count, - min(block_number) AS min_block_number, - min(block_timestamp) AS min_block_timestamp, - max(block_number) AS max_block_number, - max(block_timestamp) AS max_block_timestamp - GROUP BY - chain_id, - to_address - ) -) ENGINE = ReplacingMergeTree(insert_timestamp, is_deleted) -ORDER BY (chain_id, block_number, hash) -PARTITION BY (chain_id, toStartOfQuarter(block_timestamp)) -SETTINGS deduplicate_merge_projection_mode = 'rebuild', lightweight_mutation_projection_mode = 'rebuild', allow_part_offset_column_in_projections=1; \ No newline at end of file diff --git a/internal/tools/clickhouse/0002_clickhouse_create_logs_table.sql b/internal/tools/clickhouse/0002_clickhouse_create_logs_table.sql deleted file mode 100644 index 44e598ee..00000000 --- a/internal/tools/clickhouse/0002_clickhouse_create_logs_table.sql +++ /dev/null @@ -1,78 +0,0 @@ -CREATE TABLE IF NOT EXISTS logs ( - `chain_id` UInt256, - `block_number` UInt256, - `block_hash` FixedString(66), - `block_timestamp` DateTime CODEC(Delta, ZSTD), - `transaction_hash` FixedString(66), - `transaction_index` UInt64, - `log_index` UInt64, - `address` FixedString(42), - `data` String, - `topic_0` String, - `topic_1` String, - `topic_2` String, - `topic_3` String, - - `insert_timestamp` DateTime DEFAULT now(), - `is_deleted` UInt8 DEFAULT 0, - - INDEX idx_block_timestamp block_timestamp TYPE minmax GRANULARITY 1, - INDEX idx_block_hash block_hash TYPE bloom_filter GRANULARITY 3, - INDEX idx_transaction_hash transaction_hash TYPE bloom_filter GRANULARITY 2, - INDEX idx_address address TYPE bloom_filter GRANULARITY 3, - INDEX idx_topic0 topic_0 TYPE bloom_filter GRANULARITY 3, - INDEX idx_topic1 topic_1 TYPE bloom_filter GRANULARITY 4, - INDEX idx_topic2 topic_2 TYPE bloom_filter GRANULARITY 4, - INDEX idx_topic3 topic_3 TYPE bloom_filter GRANULARITY 4, - - PROJECTION chain_address_topic0_projection - ( - SELECT - _part_offset - ORDER BY - chain_id, - address, - topic_0, - block_number, - transaction_index, - log_index - ), - PROJECTION chain_address_block_number_full_projection - ( - SELECT - * - ORDER BY - chain_id, - address, - block_number - ), - PROJECTION chain_topic0_full_projection - ( - SELECT - * - ORDER BY - chain_id, - topic_0, - block_number, - address - ), - PROJECTION address_topic0_state_projection - ( - SELECT - chain_id, - address, - topic_0, - count() AS log_count, - min(block_number) AS min_block_number, - min(block_timestamp) AS min_block_timestamp, - max(block_number) AS max_block_number, - max(block_timestamp) AS max_block_timestamp - GROUP BY - chain_id, - address, - topic_0 - ) -) ENGINE = ReplacingMergeTree(insert_timestamp, is_deleted) -ORDER BY (chain_id, block_number, transaction_hash, log_index) -PARTITION BY (chain_id, toStartOfQuarter(block_timestamp)) -SETTINGS deduplicate_merge_projection_mode = 'rebuild', lightweight_mutation_projection_mode = 'rebuild', allow_part_offset_column_in_projections=1; diff --git a/internal/tools/clickhouse/0003_clickhouse_create_traces_table.sql b/internal/tools/clickhouse/0003_clickhouse_create_traces_table.sql deleted file mode 100644 index 6b654672..00000000 --- a/internal/tools/clickhouse/0003_clickhouse_create_traces_table.sql +++ /dev/null @@ -1,58 +0,0 @@ -CREATE TABLE IF NOT EXISTS traces ( - `chain_id` UInt256, - `block_number` UInt256, - `block_hash` FixedString(66), - `block_timestamp` DateTime CODEC(Delta, ZSTD), - `transaction_hash` FixedString(66), - `transaction_index` UInt64, - `subtraces` Int64, - `trace_address` Array(Int64), - `type` LowCardinality(String), - `call_type` LowCardinality(String), - `error` Nullable(String), - `from_address` FixedString(42), - `to_address` FixedString(42), - `gas` UInt64, - `gas_used` UInt64, - `input` String, - `output` Nullable(String), - `value` UInt256, - `author` Nullable(FixedString(42)), - `reward_type` LowCardinality(Nullable(String)), - `refund_address` Nullable(FixedString(42)), - - `insert_timestamp` DateTime DEFAULT now(), - `is_deleted` UInt8 DEFAULT 0, - - INDEX idx_block_timestamp block_timestamp TYPE minmax GRANULARITY 1, - INDEX idx_block_hash block_hash TYPE bloom_filter GRANULARITY 2, - INDEX idx_from_address from_address TYPE bloom_filter GRANULARITY 3, - INDEX idx_to_address to_address TYPE bloom_filter GRANULARITY 3, - - PROJECTION from_address_projection - ( - SELECT - _part_offset - ORDER BY - chain_id, - from_address, - block_number, - transaction_hash, - trace_address - ), - PROJECTION to_address_projection - ( - SELECT - _part_offset - ORDER BY - chain_id, - to_address, - block_number, - transaction_hash, - trace_address - ) - -) ENGINE = ReplacingMergeTree(insert_timestamp, is_deleted) -ORDER BY (chain_id, transaction_hash, trace_address) -PARTITION BY (chain_id, toStartOfQuarter(block_timestamp)) -SETTINGS deduplicate_merge_projection_mode = 'rebuild', lightweight_mutation_projection_mode = 'rebuild', allow_part_offset_column_in_projections=1; diff --git a/internal/tools/clickhouse/0004_clickhouse_create_insert_null_table.sql b/internal/tools/clickhouse/0004_clickhouse_create_insert_null_table.sql deleted file mode 100644 index 63888314..00000000 --- a/internal/tools/clickhouse/0004_clickhouse_create_insert_null_table.sql +++ /dev/null @@ -1,108 +0,0 @@ -CREATE TABLE IF NOT EXISTS insert_null_block_data -( - chain_id UInt256, - block Tuple( - block_number UInt256, - block_timestamp DateTime, - hash FixedString(66), - parent_hash FixedString(66), - sha3_uncles FixedString(66), - nonce FixedString(18), - mix_hash FixedString(66), - miner FixedString(42), - state_root FixedString(66), - transactions_root FixedString(66), - receipts_root FixedString(66), - logs_bloom String, - size UInt64, - extra_data String, - difficulty UInt256, - total_difficulty UInt256, - transaction_count UInt64, - gas_limit UInt256, - gas_used UInt256, - withdrawals_root FixedString(66), - base_fee_per_gas Nullable(UInt64)), - transactions Array(Tuple( - hash FixedString(66), - nonce UInt64, - block_hash FixedString(66), - block_number UInt256, - block_timestamp DateTime, - transaction_index UInt64, - from_address FixedString(42), - to_address FixedString(42), - value UInt256, - gas UInt64, - gas_price UInt256, - data String, - function_selector FixedString(10), - max_fee_per_gas UInt128, - max_priority_fee_per_gas UInt128, - max_fee_per_blob_gas UInt256, - blob_versioned_hashes Array(String), - transaction_type UInt8, - r UInt256, - s UInt256, - v UInt256, - access_list Nullable(String), - authorization_list Nullable(String), - contract_address Nullable(FixedString(42)), - gas_used Nullable(UInt64), - cumulative_gas_used Nullable(UInt64), - effective_gas_price Nullable(UInt256), - blob_gas_used Nullable(UInt64), - blob_gas_price Nullable(UInt256), - logs_bloom Nullable(String), - status Nullable(UInt64))), - logs Array(Tuple( - block_number UInt256, - block_hash FixedString(66), - block_timestamp DateTime, - transaction_hash FixedString(66), - transaction_index UInt64, - log_index UInt64, - address FixedString(42), - data String, - topic_0 String, - topic_1 String, - topic_2 String, - topic_3 String)), - traces Array(Tuple( - block_number UInt256, - block_hash FixedString(66), - block_timestamp DateTime, - transaction_hash FixedString(66), - transaction_index UInt64, - subtraces Int64, - trace_address Array(Int64), - type LowCardinality(String), - call_type LowCardinality(String), - error Nullable(String), - from_address FixedString(42), - to_address FixedString(42), - gas UInt64, - gas_used UInt64, - input String, - output Nullable(String), - value UInt256, - author Nullable(FixedString(42)), - reward_type LowCardinality(Nullable(String)), - refund_address Nullable(FixedString(42)))), - token_transfers Array(Tuple( - token_type LowCardinality(String), - token_address FixedString(42), - token_id UInt256, - from_address FixedString(42), - to_address FixedString(42), - block_number UInt256, - block_timestamp DateTime, - transaction_hash FixedString(66), - transaction_index UInt64, - amount UInt256, - log_index UInt64, - batch_index Nullable(UInt16))), - insert_timestamp DateTime DEFAULT now(), - is_deleted UInt8 DEFAULT 0 -) -ENGINE = Null \ No newline at end of file diff --git a/internal/tools/clickhouse/0005_clickhouse_create_insert_data_mv.sql b/internal/tools/clickhouse/0005_clickhouse_create_insert_data_mv.sql deleted file mode 100644 index b10c3798..00000000 --- a/internal/tools/clickhouse/0005_clickhouse_create_insert_data_mv.sql +++ /dev/null @@ -1,122 +0,0 @@ -CREATE MATERIALIZED VIEW IF NOT EXISTS insert_blocks_mv -TO blocks -AS -SELECT - chain_id, - block.1 AS block_number, - block.2 AS block_timestamp, - block.3 AS hash, - block.4 AS parent_hash, - block.5 AS sha3_uncles, - block.6 AS nonce, - block.7 AS mix_hash, - block.8 AS miner, - block.9 AS state_root, - block.10 AS transactions_root, - block.11 AS receipts_root, - block.12 AS logs_bloom, - block.13 AS size, - block.14 AS extra_data, - block.15 AS difficulty, - block.16 AS total_difficulty, - block.17 AS transaction_count, - block.18 AS gas_limit, - block.19 AS gas_used, - block.20 AS withdrawals_root, - block.21 AS base_fee_per_gas, - insert_timestamp, - is_deleted -FROM insert_null_block_data; - -CREATE MATERIALIZED VIEW IF NOT EXISTS insert_transactions_mv -TO transactions -AS -SELECT - chain_id, - t.1 AS hash, - t.2 AS nonce, - t.3 AS block_hash, - t.4 AS block_number, - t.5 AS block_timestamp, - t.6 AS transaction_index, - t.7 AS from_address, - t.8 AS to_address, - t.9 AS value, - t.10 AS gas, - t.11 AS gas_price, - t.12 AS data, - t.13 AS function_selector, - t.14 AS max_fee_per_gas, - t.15 AS max_priority_fee_per_gas, - t.16 AS max_fee_per_blob_gas, - t.17 AS blob_versioned_hashes, - t.18 AS transaction_type, - t.19 AS r, - t.20 AS s, - t.21 AS v, - t.22 AS access_list, - t.23 AS authorization_list, - t.24 AS contract_address, - t.25 AS gas_used, - t.26 AS cumulative_gas_used, - t.27 AS effective_gas_price, - t.28 AS blob_gas_used, - t.29 AS blob_gas_price, - t.30 AS logs_bloom, - t.31 AS status, - insert_timestamp, - is_deleted -FROM insert_null_block_data -ARRAY JOIN transactions AS t; - -CREATE MATERIALIZED VIEW IF NOT EXISTS insert_logs_mv -TO logs -AS -SELECT - chain_id, - l.1 AS block_number, - l.2 AS block_hash, - l.3 AS block_timestamp, - l.4 AS transaction_hash, - l.5 AS transaction_index, - l.6 AS log_index, - l.7 AS address, - l.8 AS data, - l.9 AS topic_0, - l.10 AS topic_1, - l.11 AS topic_2, - l.12 AS topic_3, - insert_timestamp, - is_deleted -FROM insert_null_block_data -ARRAY JOIN logs AS l; - -CREATE MATERIALIZED VIEW IF NOT EXISTS insert_traces_mv -TO traces -AS -SELECT - chain_id, - tr.1 AS block_number, - tr.2 AS block_hash, - tr.3 AS block_timestamp, - tr.4 AS transaction_hash, - tr.5 AS transaction_index, - tr.6 AS subtraces, - tr.7 AS trace_address, - tr.8 AS type, - tr.9 AS call_type, - tr.10 AS error, - tr.11 AS from_address, - tr.12 AS to_address, - tr.13 AS gas, - tr.14 AS gas_used, - tr.15 AS input, - tr.16 AS output, - tr.17 AS value, - tr.18 AS author, - tr.19 AS reward_type, - tr.20 AS refund_address, - insert_timestamp, - is_deleted -FROM insert_null_block_data -ARRAY JOIN traces AS tr; diff --git a/internal/tools/clickhouse/0006_clickhouse_create_token_transfers.sql b/internal/tools/clickhouse/0006_clickhouse_create_token_transfers.sql deleted file mode 100644 index 4eb1c594..00000000 --- a/internal/tools/clickhouse/0006_clickhouse_create_token_transfers.sql +++ /dev/null @@ -1,115 +0,0 @@ -CREATE TABLE IF NOT EXISTS token_transfers -( - `chain_id` UInt256, - `token_type` LowCardinality(String), - `token_address` FixedString(42), - `token_id` UInt256, - `from_address` FixedString(42), - `to_address` FixedString(42), - `block_number` UInt256, - `block_timestamp` DateTime CODEC(Delta(4), ZSTD(1)), - `transaction_hash` FixedString(66), - `transaction_index` UInt64, - `amount` UInt256, - `log_index` UInt64, - `batch_index` Nullable(UInt16) DEFAULT NULL, - - `insert_timestamp` DateTime DEFAULT now(), - `is_deleted` UInt8 DEFAULT 0, - - INDEX idx_block_number block_number TYPE minmax GRANULARITY 1, - INDEX idx_block_timestamp block_timestamp TYPE minmax GRANULARITY 1, - INDEX idx_from_address from_address TYPE bloom_filter GRANULARITY 3, - INDEX idx_to_address to_address TYPE bloom_filter GRANULARITY 3, - INDEX idx_transaction_hash transaction_hash TYPE bloom_filter GRANULARITY 4, - - PROJECTION from_address_projection ( - SELECT - _part_offset - ORDER BY - chain_id, - from_address, - block_number, - transaction_index, - log_index - ), - PROJECTION to_address_projection ( - SELECT - _part_offset - ORDER BY - chain_id, - to_address, - block_number, - transaction_index, - log_index - ), - PROJECTION token_id_projection ( - SELECT - _part_offset - ORDER BY - chain_id, - token_address, - token_id, - block_number, - transaction_index, - log_index - ), - PROJECTION from_address_state_projection ( - SELECT - chain_id, - from_address, - token_address, - token_type, - count() AS transfer_count, - sum(toInt256(amount)) AS total_amount, - min(block_number) AS min_block_number, - min(block_timestamp) AS min_block_timestamp, - max(block_number) AS max_block_number, - max(block_timestamp) AS max_block_timestamp - GROUP BY - chain_id, - from_address, - token_address, - token_type - ), - PROJECTION to_address_state_projection ( - SELECT - chain_id, - to_address, - token_address, - token_type, - count() AS transfer_count, - sum(toInt256(amount)) AS total_amount, - min(block_number) AS min_block_number, - min(block_timestamp) AS min_block_timestamp, - max(block_number) AS max_block_number, - max(block_timestamp) AS max_block_timestamp - GROUP BY - chain_id, - to_address, - token_address, - token_type - ), - PROJECTION token_state_projection ( - SELECT - chain_id, - token_address, - token_id, - token_type, - count() AS transfer_count, - sum(toInt256(amount)) AS total_volume, - min(block_number) AS min_block_number, - min(block_timestamp) AS min_block_timestamp, - max(block_number) AS max_block_number, - max(block_timestamp) AS max_block_timestamp - GROUP BY - chain_id, - token_address, - token_id, - token_type - ) -) -ENGINE = ReplacingMergeTree(insert_timestamp, is_deleted) -PARTITION BY (chain_id, toStartOfQuarter(block_timestamp)) -ORDER BY (chain_id, token_address, block_number, transaction_index, log_index) -SETTINGS index_granularity = 8192, lightweight_mutation_projection_mode = 'rebuild', deduplicate_merge_projection_mode = 'rebuild', allow_part_offset_column_in_projections=1; \ No newline at end of file diff --git a/internal/tools/clickhouse/0008_clickhouse_create_token_balances.sql b/internal/tools/clickhouse/0008_clickhouse_create_token_balances.sql deleted file mode 100644 index ecf58e80..00000000 --- a/internal/tools/clickhouse/0008_clickhouse_create_token_balances.sql +++ /dev/null @@ -1,68 +0,0 @@ -CREATE TABLE IF NOT EXISTS token_balances -( - `chain_id` UInt256, - `token_type` LowCardinality(String), - `token_address` FixedString(42), - `owner_address` FixedString(42), - `token_id` UInt256, - - -- Normalized delta: positive for incoming, negative for outgoing - `balance_delta` Int256, - - -- Transaction details for ordering and deduplication - `block_number` UInt256, - `block_timestamp` DateTime, - `transaction_hash` FixedString(66), - `transaction_index` UInt64, - `log_index` UInt64, - `direction` Enum8('from' = 1, 'to' = 2), -- To make each transfer create 2 unique rows - - `insert_timestamp` DateTime DEFAULT now(), - `is_deleted` UInt8 DEFAULT 0, - - INDEX idx_block_number block_number TYPE minmax GRANULARITY 1, - INDEX idx_block_timestamp block_timestamp TYPE minmax GRANULARITY 1, - INDEX idx_token_address token_address TYPE bloom_filter GRANULARITY 3, - INDEX idx_owner_address owner_address TYPE bloom_filter GRANULARITY 3, - - PROJECTION owner_balances_projection - ( - SELECT - chain_id, - owner_address, - token_address, - token_id, - sum(balance_delta * if(is_deleted = 0, 1, -1)) AS balance_state, - min(block_number) AS min_block_number_state, - min(block_timestamp) AS min_block_timestamp_state, - max(block_number) AS max_block_number_state, - max(block_timestamp) AS max_block_timestamp_state - GROUP BY chain_id, owner_address, token_address, token_id - ), - - PROJECTION token_balances_projection - ( - SELECT - chain_id, - token_address, - token_id, - owner_address, - sum(balance_delta * if(is_deleted = 0, 1, -1)) AS balance_state, - min(block_number) AS min_block_number_state, - min(block_timestamp) AS min_block_timestamp_state, - max(block_number) AS max_block_number_state, - max(block_timestamp) AS max_block_timestamp_state - GROUP BY chain_id, token_address, token_id, owner_address - ), - - PROJECTION token_projection - ( - SELECT - _part_offset - ORDER BY chain_id, token_address, token_id, owner_address - ) -) -ENGINE = ReplacingMergeTree(insert_timestamp, is_deleted) -PARTITION BY (chain_id, toStartOfQuarter(block_timestamp)) -ORDER BY (chain_id, owner_address, token_address, token_id, block_number, transaction_index, log_index, direction) -SETTINGS index_granularity = 8192, lightweight_mutation_projection_mode = 'rebuild', deduplicate_merge_projection_mode = 'rebuild', allow_part_offset_column_in_projections=1; \ No newline at end of file diff --git a/internal/tools/clickhouse/0009_clickhouse_create_token_balances_mv.sql b/internal/tools/clickhouse/0009_clickhouse_create_token_balances_mv.sql deleted file mode 100644 index 933f6d4a..00000000 --- a/internal/tools/clickhouse/0009_clickhouse_create_token_balances_mv.sql +++ /dev/null @@ -1,172 +0,0 @@ --- ERC20 -CREATE MATERIALIZED VIEW IF NOT EXISTS token_balances_erc20_from_mv -TO token_balances -AS -SELECT - chain_id, - token_type, - token_address, - from_address AS owner_address, - token_id, - -toInt256(amount) AS balance_delta, - block_number, - block_timestamp, - transaction_hash, - transaction_index, - log_index, - 'from' AS direction, - insert_timestamp, - is_deleted -FROM token_transfers -WHERE token_type = 'erc20'; - -CREATE MATERIALIZED VIEW IF NOT EXISTS token_balances_erc20_to_mv -TO token_balances -AS --- TO side (incoming, positive delta) -SELECT - chain_id, - token_type, - token_address, - to_address AS owner_address, - token_id, - toInt256(amount) AS balance_delta, - block_number, - block_timestamp, - transaction_hash, - transaction_index, - log_index, - 'to' AS direction, - insert_timestamp, - is_deleted -FROM token_transfers -WHERE token_type = 'erc20'; - --- ERC721 -CREATE MATERIALIZED VIEW IF NOT EXISTS token_balances_erc721_from_mv -TO token_balances -AS -SELECT - chain_id, - token_type, - token_address, - from_address AS owner_address, - token_id, - -1 AS balance_delta, - block_number, - block_timestamp, - transaction_hash, - transaction_index, - log_index, - 'from' AS direction, - insert_timestamp, - is_deleted -FROM token_transfers -WHERE token_type = 'erc721'; - -CREATE MATERIALIZED VIEW IF NOT EXISTS token_balances_erc721_to_mv -TO token_balances -AS -SELECT - chain_id, - token_type, - token_address, - to_address AS owner_address, - token_id, - 1 AS balance_delta, - block_number, - block_timestamp, - transaction_hash, - transaction_index, - log_index, - 'to' AS direction, - insert_timestamp, - is_deleted -FROM token_transfers -WHERE token_type = 'erc721'; - --- ERC1155 -CREATE MATERIALIZED VIEW IF NOT EXISTS token_balances_erc1155_from_mv -TO token_balances -AS -SELECT - chain_id, - token_type, - token_address, - from_address AS owner_address, - token_id, - -toInt256(amount) AS balance_delta, - block_number, - block_timestamp, - transaction_hash, - transaction_index, - log_index, - 'from' AS direction, - insert_timestamp, - is_deleted -FROM token_transfers -WHERE token_type = 'erc1155'; - -CREATE MATERIALIZED VIEW IF NOT EXISTS token_balances_erc1155_to_mv -TO token_balances -AS -SELECT - chain_id, - token_type, - token_address, - to_address AS owner_address, - token_id, - toInt256(amount) AS balance_delta, - block_number, - block_timestamp, - transaction_hash, - transaction_index, - log_index, - 'to' AS direction, - insert_timestamp, - is_deleted -FROM token_transfers -WHERE token_type = 'erc1155'; - --- ERC6909 -CREATE MATERIALIZED VIEW IF NOT EXISTS token_balances_erc6909_from_mv -TO token_balances -AS -SELECT - chain_id, - token_type, - token_address, - from_address AS owner_address, - token_id, - -toInt256(amount) AS balance_delta, - block_number, - block_timestamp, - transaction_hash, - transaction_index, - log_index, - 'from' AS direction, - insert_timestamp, - is_deleted -FROM token_transfers -WHERE token_type = 'erc6909'; - -CREATE MATERIALIZED VIEW IF NOT EXISTS token_balances_erc6909_to_mv -TO token_balances -AS -SELECT - chain_id, - token_type, - token_address, - to_address AS owner_address, - token_id, - toInt256(amount) AS balance_delta, - block_number, - block_timestamp, - transaction_hash, - transaction_index, - log_index, - 'to' AS direction, - insert_timestamp, - is_deleted -FROM token_transfers -WHERE token_type = 'erc6909'; \ No newline at end of file diff --git a/internal/tools/clickhouse/0010_clickhouse_create_address_transactions.sql b/internal/tools/clickhouse/0010_clickhouse_create_address_transactions.sql deleted file mode 100644 index f546f40c..00000000 --- a/internal/tools/clickhouse/0010_clickhouse_create_address_transactions.sql +++ /dev/null @@ -1,63 +0,0 @@ -CREATE TABLE IF NOT EXISTS address_transactions ( - `chain_id` UInt256, - `hash` FixedString(66), - `nonce` UInt64, - `block_hash` FixedString(66), - `block_number` UInt256, - `block_timestamp` DateTime CODEC(Delta, ZSTD), - `transaction_index` UInt64, - `address` FixedString(42), - `address_type` Enum8('from' = 1, 'to' = 2), - `from_address` FixedString(42), - `to_address` FixedString(42), - `value` UInt256, - `gas` UInt64, - `gas_price` UInt256, - `data` String, - `function_selector` FixedString(10), - `max_fee_per_gas` UInt128, - `max_priority_fee_per_gas` UInt128, - `max_fee_per_blob_gas` UInt256, - `blob_versioned_hashes` Array(String), - `transaction_type` UInt8, - `r` UInt256, - `s` UInt256, - `v` UInt256, - `access_list` Nullable(String), - `authorization_list` Nullable(String), - `contract_address` Nullable(FixedString(42)), - `gas_used` Nullable(UInt64), - `cumulative_gas_used` Nullable(UInt64), - `effective_gas_price` Nullable(UInt256), - `blob_gas_used` Nullable(UInt64), - `blob_gas_price` Nullable(UInt256), - `logs_bloom` Nullable(String), - `status` Nullable(UInt64), - - `insert_timestamp` DateTime DEFAULT now(), - `is_deleted` UInt8 DEFAULT 0, - - INDEX idx_block_timestamp block_timestamp TYPE minmax GRANULARITY 1, - INDEX idx_address_type address_type TYPE bloom_filter GRANULARITY 3, - INDEX idx_from_address from_address TYPE bloom_filter GRANULARITY 4, - INDEX idx_to_address to_address TYPE bloom_filter GRANULARITY 4, - - PROJECTION address_total_count_projection - ( - SELECT - chain_id, - address, - count() AS tx_count, - uniqExact(hash) AS unique_tx_count, - min(block_number) AS min_block_number, - min(block_timestamp) AS min_block_timestamp, - max(block_number) AS max_block_number, - max(block_timestamp) AS max_block_timestamp - GROUP BY - chain_id, - address - ) -) ENGINE = ReplacingMergeTree(insert_timestamp, is_deleted) -ORDER BY (chain_id, address, block_number, hash, transaction_index) -PARTITION BY (chain_id, toStartOfQuarter(block_timestamp)) -SETTINGS deduplicate_merge_projection_mode = 'rebuild', lightweight_mutation_projection_mode = 'rebuild'; \ No newline at end of file diff --git a/internal/tools/clickhouse/0011_clickhouse_create_address_transactions_mv.sql b/internal/tools/clickhouse/0011_clickhouse_create_address_transactions_mv.sql deleted file mode 100644 index db16ab50..00000000 --- a/internal/tools/clickhouse/0011_clickhouse_create_address_transactions_mv.sql +++ /dev/null @@ -1,44 +0,0 @@ -CREATE MATERIALIZED VIEW IF NOT EXISTS address_transactions_mv -TO address_transactions -AS -SELECT - chain_id, - hash, - nonce, - block_hash, - block_number, - block_timestamp, - transaction_index, - address_tuple.1 AS address, - address_tuple.2 AS address_type, - from_address, - to_address, - value, - gas, - gas_price, - data, - function_selector, - max_fee_per_gas, - max_priority_fee_per_gas, - max_fee_per_blob_gas, - blob_versioned_hashes, - transaction_type, - r, - s, - v, - access_list, - authorization_list, - contract_address, - gas_used, - cumulative_gas_used, - effective_gas_price, - blob_gas_used, - blob_gas_price, - logs_bloom, - status, - - insert_timestamp, - is_deleted -FROM transactions -ARRAY JOIN - arrayZip([from_address, to_address], ['from', 'to']) AS address_tuple; \ No newline at end of file diff --git a/internal/tools/clickhouse/0012_clickhouse_create_address_transfers.sql b/internal/tools/clickhouse/0012_clickhouse_create_address_transfers.sql deleted file mode 100644 index c130e70c..00000000 --- a/internal/tools/clickhouse/0012_clickhouse_create_address_transfers.sql +++ /dev/null @@ -1,67 +0,0 @@ -CREATE TABLE IF NOT EXISTS address_transfers ( - `chain_id` UInt256, - `token_type` LowCardinality(String), - `token_address` FixedString(42), - `token_id` UInt256, - `address` FixedString(42), - `address_type` Enum8('from' = 1, 'to' = 2), - `from_address` FixedString(42), - `to_address` FixedString(42), - `block_number` UInt256, - `block_timestamp` DateTime CODEC(Delta(4), ZSTD(1)), - `transaction_hash` FixedString(66), - `transaction_index` UInt64, - `amount` UInt256, - `log_index` UInt64, - `batch_index` Nullable(UInt16) DEFAULT NULL, - - `insert_timestamp` DateTime DEFAULT now(), - `is_deleted` UInt8 DEFAULT 0, - - INDEX idx_block_timestamp block_timestamp TYPE minmax GRANULARITY 1, - INDEX idx_address_type address_type TYPE bloom_filter GRANULARITY 3, - INDEX idx_from_address from_address TYPE bloom_filter GRANULARITY 4, - INDEX idx_to_address to_address TYPE bloom_filter GRANULARITY 4, - - PROJECTION address_state_projection ( - SELECT - chain_id, - address, - address_type, - token_address, - token_type, - count() AS transfer_count, - sum(toInt256(amount)) AS total_amount, - min(block_number) AS min_block_number, - min(block_timestamp) AS min_block_timestamp, - max(block_number) AS max_block_number, - max(block_timestamp) AS max_block_timestamp - GROUP BY - chain_id, - address, - address_type, - token_address, - token_type - ), - PROJECTION address_total_state_projection ( - SELECT - chain_id, - address, - token_address, - token_type, - count() AS transfer_count, - sum(toInt256(amount)) AS total_amount, - min(block_number) AS min_block_number, - min(block_timestamp) AS min_block_timestamp, - max(block_number) AS max_block_number, - max(block_timestamp) AS max_block_timestamp - GROUP BY - chain_id, - address, - token_address, - token_type - ) -) ENGINE = ReplacingMergeTree(insert_timestamp, is_deleted) -ORDER BY (chain_id, address, block_number, transaction_hash, transaction_index) -PARTITION BY (chain_id, toStartOfQuarter(block_timestamp)) -SETTINGS deduplicate_merge_projection_mode = 'rebuild', lightweight_mutation_projection_mode = 'rebuild'; \ No newline at end of file diff --git a/internal/tools/clickhouse/0013_clickhouse_create_address_transfers_mv.sql b/internal/tools/clickhouse/0013_clickhouse_create_address_transfers_mv.sql deleted file mode 100644 index d3bdfcb9..00000000 --- a/internal/tools/clickhouse/0013_clickhouse_create_address_transfers_mv.sql +++ /dev/null @@ -1,24 +0,0 @@ -CREATE MATERIALIZED VIEW IF NOT EXISTS address_transfers_mv -TO address_transfers -AS -SELECT - chain_id, - token_type, - token_address, - token_id, - address_tuple.1 AS address, - address_tuple.2 AS address_type, - from_address, - to_address, - block_number, - block_timestamp, - transaction_hash, - transaction_index, - amount, - log_index, - batch_index, - insert_timestamp, - is_deleted -FROM token_transfers -ARRAY JOIN - arrayZip([from_address, to_address], ['from', 'to']) AS address_tuple; \ No newline at end of file diff --git a/internal/tools/clickhouse/0014_clickhouse_create_insert_token_transfers_mv.sql b/internal/tools/clickhouse/0014_clickhouse_create_insert_token_transfers_mv.sql deleted file mode 100644 index 1db63867..00000000 --- a/internal/tools/clickhouse/0014_clickhouse_create_insert_token_transfers_mv.sql +++ /dev/null @@ -1,20 +0,0 @@ -CREATE MATERIALIZED VIEW IF NOT EXISTS insert_token_transfers_mv -TO token_transfers -AS SELECT - chain_id, - tt.1 AS token_type, - tt.2 AS token_address, - tt.3 AS token_id, - tt.4 AS from_address, - tt.5 AS to_address, - tt.6 AS block_number, - tt.7 AS block_timestamp, - tt.8 AS transaction_hash, - tt.9 AS transaction_index, - tt.10 AS amount, - tt.11 AS log_index, - tt.12 AS batch_index, - insert_timestamp, - is_deleted -FROM insert_null_block_data -ARRAY JOIN token_transfers AS tt \ No newline at end of file diff --git a/internal/tools/clickhouse_opts/0000_clickhouse_backfill_logs_transfer.sql b/internal/tools/clickhouse_opts/0000_clickhouse_backfill_logs_transfer.sql deleted file mode 100644 index c2090e6c..00000000 --- a/internal/tools/clickhouse_opts/0000_clickhouse_backfill_logs_transfer.sql +++ /dev/null @@ -1,202 +0,0 @@ -CREATE TABLE IF NOT EXISTS backfill_logs -( - `chain_id` UInt256, - `block_number` UInt256, - `block_hash` FixedString(66), - `block_timestamp` DateTime CODEC(Delta, ZSTD), - `transaction_hash` FixedString(66), - `transaction_index` UInt64, - `log_index` UInt64, - `address` FixedString(42), - `data` String, - `topic_0` String, - `topic_1` String, - `topic_2` String, - `topic_3` String, - - `sign` Int8 DEFAULT 1, - `insert_timestamp` DateTime DEFAULT now(), -) ENGINE = Null; - - ---- Materialize view running to the correct tables --- ERC20 -CREATE MATERIALIZED VIEW IF NOT EXISTS bf__token_transfers_erc20_mv -TO token_transfers -AS -SELECT - chain_id, - address AS token_address, - 'erc20' AS token_type, - 0 AS token_id, - concat('0x', substring(topic_1, 27, 40)) AS from_address, - concat('0x', substring(topic_2, 27, 40)) AS to_address, - block_number, - block_timestamp, - transaction_hash, - transaction_index, - reinterpretAsUInt256(reverse(unhex(substring(data, 3, 64)))) AS amount, - log_index, - CAST(NULL AS Nullable(UInt16)) AS batch_index, - sign, - insert_timestamp -FROM backfill_logs -WHERE topic_0 = '0xddf252ad1be2c89b69c2b068fc378daa952ba7f163c4a11628f55a4df523b3ef' - AND length(topic_1) = 66 AND startsWith(topic_1, '0x') - AND length(topic_2) = 66 AND startsWith(topic_2, '0x') - AND topic_3 = '' - AND length(data) = 66; - --- ERC721 -CREATE MATERIALIZED VIEW IF NOT EXISTS bf__token_transfers_erc721_mv -TO token_transfers -AS -SELECT - chain_id, - address AS token_address, - 'erc721' AS token_type, - reinterpretAsUInt256(reverse(unhex(substring(topic_3, 3, 64)))) AS token_id, - concat('0x', substring(topic_1, 27, 40)) AS from_address, - concat('0x', substring(topic_2, 27, 40)) AS to_address, - block_number, - block_timestamp, - transaction_hash, - transaction_index, - toUInt8(1) AS amount, - log_index, - CAST(NULL AS Nullable(UInt16)) AS batch_index, - sign, - insert_timestamp -FROM backfill_logs -WHERE topic_0 = '0xddf252ad1be2c89b69c2b068fc378daa952ba7f163c4a11628f55a4df523b3ef' - AND length(topic_1) = 66 AND startsWith(topic_1, '0x') - AND length(topic_2) = 66 AND startsWith(topic_2, '0x') - AND length(topic_3) = 66 AND startsWith(topic_3, '0x') - AND length(data) = 2; - --- ERC1155 (single) -CREATE MATERIALIZED VIEW IF NOT EXISTS bf__token_transfers_erc1155_single_mv -TO token_transfers -AS -SELECT - chain_id, - address AS token_address, - 'erc1155' AS token_type, - reinterpretAsUInt256(reverse(unhex(substring(data, 3, 64)))) AS token_id, - concat('0x', substring(topic_2, 27, 40)) AS from_address, - concat('0x', substring(topic_3, 27, 40)) AS to_address, - block_number, - block_timestamp, - transaction_hash, - transaction_index, - reinterpretAsUInt256(reverse(unhex(substring(data, 67, 64)))) AS amount, - log_index, - toNullable(toUInt16(0)) AS batch_index, - sign, - insert_timestamp -FROM backfill_logs -WHERE topic_0 = '0xc3d58168c5ae7397731d063d5bbf3d657854427343f4c083240f7aacaa2d0f62' -- TransferSingle - AND length(topic_2) = 66 AND length(topic_3) = 66 - AND length(data) = (2 + 2*64); - --- ERC1155 (batch) -CREATE MATERIALIZED VIEW IF NOT EXISTS bf__token_transfers_erc1155_batch_mv -TO token_transfers -AS -SELECT - chain_id, - address AS token_address, - 'erc1155' AS token_type, - reinterpretAsUInt256(reverse(unhex(id_hex))) AS token_id, - concat('0x', substring(topic_2, 27, 40)) AS from_address, - concat('0x', substring(topic_3, 27, 40)) AS to_address, - block_number, - block_timestamp, - transaction_hash, - transaction_index, - reinterpretAsUInt256(reverse(unhex(amount_hex))) AS amount, - log_index, - toNullable(toUInt16(array_index - 1)) AS batch_index, - sign, - insert_timestamp -FROM ( - SELECT - chain_id, address, topic_2, topic_3, - block_number, block_timestamp, transaction_hash, transaction_index, log_index, sign, insert_timestamp, - toUInt32(reinterpretAsUInt256(reverse(unhex(substring(data, 3, 64))))) AS ids_offset, - toUInt32(reinterpretAsUInt256(reverse(unhex(substring(data, 67, 64))))) AS amounts_offset, - toUInt32(reinterpretAsUInt256(reverse(unhex(substring(data, 3 + ids_offset * 2, 64))))) AS ids_length, - toUInt32(reinterpretAsUInt256(reverse(unhex(substring(data, 3 + amounts_offset * 2, 64))))) AS amounts_length, - arrayMap(i -> substring(data, 3 + ids_offset * 2 + 64 + (i-1)*64, 64), range(1, least(ids_length, 10000) + 1)) AS ids_array, - arrayMap(i -> substring(data, 3 + amounts_offset * 2 + 64 + (i-1)*64, 64), range(1, least(amounts_length, 10000) + 1)) AS amounts_array - FROM backfill_logs - WHERE topic_0 = '0x4a39dc06d4c0dbc64b70af90fd698a233a518aa5d07e595d983b8c0526c8f7fb' - AND length(topic_2) = 66 - AND length(topic_3) = 66 - AND ids_length = amounts_length -) -ARRAY JOIN - ids_array AS id_hex, - amounts_array AS amount_hex, - arrayEnumerate(ids_array) AS array_index; - - --- ERC6909 -CREATE MATERIALIZED VIEW IF NOT EXISTS bf__token_transfers_erc6909_mv -TO token_transfers -AS -SELECT - chain_id, - lower(address) AS token_address, - 'erc6909' AS token_type, - reinterpretAsUInt256(reverse(unhex(substring(topic_3, 3, 64)))) AS token_id, - lower(concat('0x', substring(topic_1, 27, 40))) AS from_address, - lower(concat('0x', substring(topic_2, 27, 40))) AS to_address, - block_number, - block_timestamp, - transaction_hash, - transaction_index, - reinterpretAsUInt256(reverse(unhex(substring(data, 67, 64)))) AS amount, - log_index, - CAST(NULL AS Nullable(UInt16)) AS batch_index, - sign, - insert_timestamp -FROM backfill_logs -WHERE topic_0 = '0x1b3d7edb2e9c0b0e7c525b20aaaef0f5940d2ed71663c7d39266ecafac728859' - AND length(topic_1) = 66 - AND length(topic_2) = 66 - AND length(data) == 2 + 128; - ---- INITIATE BACKFILL BY RUNNING: --- INSERT INTO backfill_logs --- SELECT --- chain_id, --- block_number, --- block_hash, --- block_timestamp, --- transaction_hash , --- transaction_index, --- log_index, --- address, --- data, --- topic_0, --- topic_1, --- topic_2, --- topic_3, --- sign, --- insert_timestamp, --- FROM logs --- WHERE 1=1 --- AND chain_id = 1 --- AND block_number >= 0 AND block_number < 10000000 --- AND topic_0 IN ( --- '0xddf252ad1be2c89b69c2b068fc378daa952ba7f163c4a11628f55a4df523b3ef', -- 20/721 --- '0xc3d58168c5ae7397731d063d5bbf3d657854427343f4c083240f7aacaa2d0f62', -- 1155 single --- '0x4a39dc06d4c0dbc64b70af90fd698a233a518aa5d07e595d983b8c0526c8f7fb', -- 1155 batch --- '0x1b3d7edb2e9c0b0e7c525b20aaaef0f5940d2ed71663c7d39266ecafac728859' -- 6909 --- ); - --- DROP TABLE token_transfers, token_balance; --- DROP TABLE bf__token_transfers_erc20_mv, bf__token_transfers_erc721_mv, bf__token_transfers_erc1155_mv, bf__token_transfers_erc6909_mv; --- DROP TABLE token_transfers_erc20_mv, token_transfers_erc721_mv, token_transfers_erc1155_mv, token_transfers_erc6909_mv; --- DROP TABLE token_balance_erc20_mv, token_balance_erc721_mv, token_balance_erc1155_mv, token_balance_erc6909_mv; \ No newline at end of file diff --git a/internal/tools/clickhouse_opts/0010_clickhouse_inserts_null_table_v1.sql b/internal/tools/clickhouse_opts/0010_clickhouse_inserts_null_table_v1.sql deleted file mode 100644 index 1e03dfa2..00000000 --- a/internal/tools/clickhouse_opts/0010_clickhouse_inserts_null_table_v1.sql +++ /dev/null @@ -1,109 +0,0 @@ -CREATE TABLE IF NOT EXISTS inserts_null_table -( - chain_id UInt256, - block Tuple( - block_number UInt256, - block_timestamp DateTime, - hash FixedString(66), - parent_hash FixedString(66), - sha3_uncles FixedString(66), - nonce FixedString(18), - mix_hash FixedString(66), - miner FixedString(42), - state_root FixedString(66), - transactions_root FixedString(66), - receipts_root FixedString(66), - logs_bloom String, - size UInt64, - extra_data String, - difficulty UInt256, - total_difficulty UInt256, - transaction_count UInt64, - gas_limit UInt256, - gas_used UInt256, - withdrawals_root FixedString(66), - base_fee_per_gas Nullable(UInt64)), - transactions Array(Tuple( - hash FixedString(66), - nonce UInt64, - block_hash FixedString(66), - block_number UInt256, - block_timestamp DateTime, - transaction_index UInt64, - from_address FixedString(42), - to_address FixedString(42), - value UInt256, - gas UInt64, - gas_price UInt256, - data String, - function_selector FixedString(10), - max_fee_per_gas UInt128, - max_priority_fee_per_gas UInt128, - max_fee_per_blob_gas UInt256, - blob_versioned_hashes Array(String), - transaction_type UInt8, - r UInt256, - s UInt256, - v UInt256, - access_list Nullable(String), - authorization_list Nullable(String), - contract_address Nullable(FixedString(42)), - gas_used Nullable(UInt64), - cumulative_gas_used Nullable(UInt64), - effective_gas_price Nullable(UInt256), - blob_gas_used Nullable(UInt64), - blob_gas_price Nullable(UInt256), - logs_bloom Nullable(String), - status Nullable(UInt64))), - logs Array(Tuple( - block_number UInt256, - block_hash FixedString(66), - block_timestamp DateTime, - transaction_hash FixedString(66), - transaction_index UInt64, - log_index UInt64, - address FixedString(42), - data String, - topic_0 String, - topic_1 String, - topic_2 String, - topic_3 String)), - traces Array(Tuple( - block_number UInt256, - block_hash FixedString(66), - block_timestamp DateTime, - transaction_hash FixedString(66), - transaction_index UInt64, - subtraces Int64, - trace_address Array(Int64), - type LowCardinality(String), - call_type LowCardinality(String), - error Nullable(String), - from_address FixedString(42), - to_address FixedString(42), - gas UInt64, - gas_used UInt64, - input String, - output Nullable(String), - value UInt256, - author Nullable(FixedString(42)), - reward_type LowCardinality(Nullable(String)), - refund_address Nullable(FixedString(42)))), - token_transfers Array(Tuple( - token_type LowCardinality(String), - token_address FixedString(42), - token_id UInt256, - from_address FixedString(42), - to_address FixedString(42), - block_number UInt256, - block_timestamp DateTime, - transaction_hash FixedString(66), - transaction_index UInt64, - amount UInt256, - log_index UInt64, - batch_index Nullable(UInt16))), - insert_timestamp DateTime DEFAULT now(), - is_deleted UInt8 DEFAULT 0, - sign Int8 DEFAULT 1 -) -ENGINE = Null \ No newline at end of file diff --git a/internal/tools/clickhouse_opts/0011_clickhouse_inserts_null_table_v1_mv.sql b/internal/tools/clickhouse_opts/0011_clickhouse_inserts_null_table_v1_mv.sql deleted file mode 100644 index 8863d0bf..00000000 --- a/internal/tools/clickhouse_opts/0011_clickhouse_inserts_null_table_v1_mv.sql +++ /dev/null @@ -1,122 +0,0 @@ -CREATE MATERIALIZED VIEW IF NOT EXISTS insert_blocks_mv -TO blocks -AS -SELECT - chain_id, - block.1 AS block_number, - block.2 AS block_timestamp, - block.3 AS hash, - block.4 AS parent_hash, - block.5 AS sha3_uncles, - block.6 AS nonce, - block.7 AS mix_hash, - block.8 AS miner, - block.9 AS state_root, - block.10 AS transactions_root, - block.11 AS receipts_root, - block.12 AS logs_bloom, - block.13 AS size, - block.14 AS extra_data, - block.15 AS difficulty, - block.16 AS total_difficulty, - block.17 AS transaction_count, - block.18 AS gas_limit, - block.19 AS gas_used, - block.20 AS withdrawals_root, - block.21 AS base_fee_per_gas, - insert_timestamp, - is_deleted -FROM inserts_null_table; - -CREATE MATERIALIZED VIEW IF NOT EXISTS insert_transactions_mv -TO transactions -AS -SELECT - chain_id, - t.1 AS hash, - t.2 AS nonce, - t.3 AS block_hash, - t.4 AS block_number, - t.5 AS block_timestamp, - t.6 AS transaction_index, - t.7 AS from_address, - t.8 AS to_address, - t.9 AS value, - t.10 AS gas, - t.11 AS gas_price, - t.12 AS data, - t.13 AS function_selector, - t.14 AS max_fee_per_gas, - t.15 AS max_priority_fee_per_gas, - t.16 AS max_fee_per_blob_gas, - t.17 AS blob_versioned_hashes, - t.18 AS transaction_type, - t.19 AS r, - t.20 AS s, - t.21 AS v, - t.22 AS access_list, - t.23 AS authorization_list, - t.24 AS contract_address, - t.25 AS gas_used, - t.26 AS cumulative_gas_used, - t.27 AS effective_gas_price, - t.28 AS blob_gas_used, - t.29 AS blob_gas_price, - t.30 AS logs_bloom, - t.31 AS status, - insert_timestamp, - is_deleted -FROM inserts_null_table -ARRAY JOIN transactions AS t; - -CREATE MATERIALIZED VIEW IF NOT EXISTS insert_logs_mv -TO logs -AS -SELECT - chain_id, - l.1 AS block_number, - l.2 AS block_hash, - l.3 AS block_timestamp, - l.4 AS transaction_hash, - l.5 AS transaction_index, - l.6 AS log_index, - l.7 AS address, - l.8 AS data, - l.9 AS topic_0, - l.10 AS topic_1, - l.11 AS topic_2, - l.12 AS topic_3, - insert_timestamp, - is_deleted -FROM inserts_null_table -ARRAY JOIN logs AS l; - -CREATE MATERIALIZED VIEW IF NOT EXISTS insert_traces_mv -TO traces -AS -SELECT - chain_id, - tr.1 AS block_number, - tr.2 AS block_hash, - tr.3 AS block_timestamp, - tr.4 AS transaction_hash, - tr.5 AS transaction_index, - tr.6 AS subtraces, - tr.7 AS trace_address, - tr.8 AS type, - tr.9 AS call_type, - tr.10 AS error, - tr.11 AS from_address, - tr.12 AS to_address, - tr.13 AS gas, - tr.14 AS gas_used, - tr.15 AS input, - tr.16 AS output, - tr.17 AS value, - tr.18 AS author, - tr.19 AS reward_type, - tr.20 AS refund_address, - insert_timestamp, - is_deleted -FROM inserts_null_table -ARRAY JOIN traces AS tr; diff --git a/internal/tools/postgres/postgres_schema.sql b/internal/tools/postgres/postgres_schema.sql deleted file mode 100644 index 07c97980..00000000 --- a/internal/tools/postgres/postgres_schema.sql +++ /dev/null @@ -1,52 +0,0 @@ -CREATE TABLE IF NOT EXISTS block_failures ( - chain_id BIGINT NOT NULL, - block_number BIGINT NOT NULL, - last_error_timestamp BIGINT NOT NULL, - failure_count INTEGER DEFAULT 1, - reason TEXT, - created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, - updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, - PRIMARY KEY (chain_id, block_number) -) WITH (fillfactor = 80, autovacuum_vacuum_scale_factor = 0.1, autovacuum_analyze_scale_factor = 0.05); - -CREATE INDEX IF NOT EXISTS idx_block_failures_block_number_ordered ON block_failures(chain_id, block_number DESC); - --- Cursors table for tracking various processing positions -CREATE TABLE IF NOT EXISTS cursors ( - chain_id BIGINT NOT NULL, - cursor_type VARCHAR(30) NOT NULL, - cursor_value TEXT NOT NULL, - created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, - updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, - PRIMARY KEY (chain_id, cursor_type) -) WITH (fillfactor = 80); - - -CREATE TABLE IF NOT EXISTS block_data ( - chain_id BIGINT NOT NULL, - block_number BIGINT NOT NULL, - data JSONB NOT NULL, - created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, - updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, - PRIMARY KEY (chain_id, block_number) -) WITH (fillfactor = 80, autovacuum_vacuum_scale_factor = 0.1, autovacuum_analyze_scale_factor = 0.05); - - --- Function to automatically update the updated_at timestamp -CREATE OR REPLACE FUNCTION update_updated_at_column() -RETURNS TRIGGER AS $$ -BEGIN - NEW.updated_at = CURRENT_TIMESTAMP; - RETURN NEW; -END; -$$ language 'plpgsql'; - --- Create triggers to automatically update updated_at -CREATE TRIGGER update_block_failures_updated_at BEFORE UPDATE ON block_failures - FOR EACH ROW EXECUTE FUNCTION update_updated_at_column(); - -CREATE TRIGGER update_cursors_updated_at BEFORE UPDATE ON cursors - FOR EACH ROW EXECUTE FUNCTION update_updated_at_column(); - -CREATE TRIGGER update_block_data_updated_at BEFORE UPDATE ON block_data - FOR EACH ROW EXECUTE FUNCTION update_updated_at_column(); diff --git a/internal/validation/cursor.go b/internal/validation/cursor.go deleted file mode 100644 index d954446f..00000000 --- a/internal/validation/cursor.go +++ /dev/null @@ -1,72 +0,0 @@ -package validation - -import ( - "encoding/json" - "fmt" - "math/big" - "os" - - "github.com/thirdweb-dev/indexer/internal/storage" -) - -type Cursor struct { - LastScannedBlockNumber *big.Int - MaxBlockNumber *big.Int - ChainId *big.Int -} - -func InitCursor(chainId *big.Int, storage storage.IStorage) (*Cursor, error) { - lastScannedBlock := getLastScannedBlock(chainId) - maxBlockNumber, err := storage.MainStorage.GetMaxBlockNumber(chainId) - if err != nil { - return nil, err - } - if maxBlockNumber == nil { - maxBlockNumber = big.NewInt(0) - } - if lastScannedBlock.Cmp(maxBlockNumber) >= 0 { - return nil, fmt.Errorf("last scanned block number is greater than or equal to max block number") - } - return &Cursor{ - LastScannedBlockNumber: lastScannedBlock, - MaxBlockNumber: maxBlockNumber, - ChainId: chainId, - }, nil -} - -func (c *Cursor) Update(blockNumber *big.Int) error { - cursorFile := fmt.Sprintf("validation_cursor_%s.json", c.ChainId.String()) - jsonData, err := json.Marshal(blockNumber.String()) - if err != nil { - return err - } - - err = os.WriteFile(cursorFile, jsonData, 0644) - if err != nil { - return err - } - c.LastScannedBlockNumber = blockNumber - return nil -} - -func getLastScannedBlock(chainId *big.Int) *big.Int { - cursorFile := fmt.Sprintf("validation_cursor_%s.json", chainId.String()) - if _, err := os.Stat(cursorFile); err != nil { - return big.NewInt(0) - } - - fileData, err := os.ReadFile(cursorFile) - if err != nil { - return big.NewInt(0) - } - - var lastBlock string - err = json.Unmarshal(fileData, &lastBlock) - if err != nil { - return big.NewInt(0) - } - - lastBlockBig := new(big.Int) - lastBlockBig.SetString(lastBlock, 10) - return lastBlockBig -} diff --git a/internal/validation/duplicates.go b/internal/validation/duplicates.go deleted file mode 100644 index 9ee6164c..00000000 --- a/internal/validation/duplicates.go +++ /dev/null @@ -1,246 +0,0 @@ -package validation - -import ( - "context" - "fmt" - "math/big" - "strings" - - "github.com/ClickHouse/clickhouse-go/v2" - "github.com/rs/zerolog/log" -) - -type DuplicateTransaction struct { - BlockNumber *big.Int `json:"block_number" ch:"block_number"` - Hash string `json:"hash" ch:"hash"` -} - -type DuplicateLog struct { - BlockNumber *big.Int `json:"block_number" ch:"block_number"` - TxHash string `json:"transaction_hash" ch:"transaction_hash"` - LogIndex uint64 `json:"log_index" ch:"log_index"` -} - -func FindAndRemoveDuplicates(conn clickhouse.Conn, chainId *big.Int, startBlock *big.Int, endBlock *big.Int) error { - duplicateBlockNumbers, err := findDuplicateBlocksInRange(conn, chainId, startBlock, endBlock) - if err != nil { - return err - } - if len(duplicateBlockNumbers) == 0 { - log.Debug().Msg("No duplicate blocks found in range") - } else { - log.Debug().Msgf("Found %d duplicate blocks in range %v-%v: %v", len(duplicateBlockNumbers), startBlock, endBlock, duplicateBlockNumbers) - err = removeDuplicateBlocks(conn, chainId, duplicateBlockNumbers) - if err != nil { - return err - } - } - - duplicateTransactions, err := findDuplicateTransactionsInRange(conn, chainId, startBlock, endBlock) - if err != nil { - return err - } - if len(duplicateTransactions) == 0 { - log.Debug().Msg("No duplicate transactions found in range") - } else { - log.Debug().Msgf("Found %d duplicate transactions in range %v-%v: %v", len(duplicateTransactions), startBlock, endBlock, duplicateTransactions) - err = removeDuplicateTransactions(conn, chainId, duplicateTransactions) - if err != nil { - return err - } - } - - duplicateLogs, err := findDuplicateLogsInRange(conn, chainId, startBlock, endBlock) - if err != nil { - return err - } - if len(duplicateLogs) == 0 { - log.Debug().Msg("No duplicate logs found in range") - } else { - log.Debug().Msgf("Found %d duplicate logs in range %v-%v: %v", len(duplicateLogs), startBlock, endBlock, duplicateLogs) - err = removeDuplicateLogs(conn, chainId, duplicateLogs) - if err != nil { - return err - } - } - - return nil -} - -func findDuplicateBlocksInRange(conn clickhouse.Conn, chainId *big.Int, startBlock *big.Int, endBlock *big.Int) ([]*big.Int, error) { - query := `SELECT block_number - FROM default.blocks FINAL WHERE chain_id = ? AND block_number >= ? AND block_number <= ? - GROUP BY block_number - HAVING sum(sign) != 1 - ORDER BY block_number; - ` - rows, err := conn.Query(context.Background(), query, chainId, startBlock, endBlock) - if err != nil { - return nil, err - } - defer rows.Close() - - blockNumbers := make([]*big.Int, 0) - - for rows.Next() { - var blockNumber *big.Int - err := rows.Scan(&blockNumber) - if err != nil { - return nil, err - } - blockNumbers = append(blockNumbers, blockNumber) - } - return blockNumbers, nil -} - -func findDuplicateTransactionsInRange(conn clickhouse.Conn, chainId *big.Int, startBlock *big.Int, endBlock *big.Int) ([]DuplicateTransaction, error) { - query := `SELECT block_number, hash - FROM default.transactions FINAL WHERE chain_id = ? AND block_number >= ? AND block_number <= ? - GROUP BY block_number, hash - HAVING sum(sign) != 1 - ORDER BY block_number; - ` - rows, err := conn.Query(context.Background(), query, chainId, startBlock, endBlock) - if err != nil { - return nil, err - } - defer rows.Close() - - duplicateTransactions := make([]DuplicateTransaction, 0) - - for rows.Next() { - var duplicateTransaction DuplicateTransaction - err := rows.ScanStruct(&duplicateTransaction) - if err != nil { - return nil, err - } - duplicateTransactions = append(duplicateTransactions, duplicateTransaction) - } - return duplicateTransactions, nil -} - -func findDuplicateLogsInRange(conn clickhouse.Conn, chainId *big.Int, startBlock *big.Int, endBlock *big.Int) ([]DuplicateLog, error) { - query := `SELECT block_number, transaction_hash, log_index - FROM default.logs FINAL WHERE chain_id = ? AND block_number >= ? AND block_number <= ? - GROUP BY block_number, transaction_hash, log_index - HAVING sum(sign) != 1 - ORDER BY block_number; - ` - rows, err := conn.Query(context.Background(), query, chainId, startBlock, endBlock) - if err != nil { - return nil, err - } - defer rows.Close() - - duplicateLogs := make([]DuplicateLog, 0) - - for rows.Next() { - var duplicateLog DuplicateLog - err := rows.ScanStruct(&duplicateLog) - if err != nil { - return nil, err - } - duplicateLogs = append(duplicateLogs, duplicateLog) - } - return duplicateLogs, nil -} - -func removeDuplicateBlocks(conn clickhouse.Conn, chainId *big.Int, duplicateBlockNumbers []*big.Int) error { - query := `WITH - to_be_inserted AS ( - SELECT chain_id, block_number, block_timestamp, hash, parent_hash, sha3_uncles, nonce, mix_hash, miner, state_root, - transactions_root, receipts_root, logs_bloom, size, extra_data, difficulty, total_difficulty, transaction_count, - gas_limit, gas_used, withdrawals_root, base_fee_per_gas, insert_timestamp, -sign as sign - FROM default.blocks FINAL - WHERE chain_id = ? AND block_number IN (?) - ) - INSERT INTO blocks ( - chain_id, block_number, block_timestamp, hash, parent_hash, sha3_uncles, nonce, mix_hash, miner, state_root, - transactions_root, receipts_root, logs_bloom, size, extra_data, difficulty, total_difficulty, transaction_count, - gas_limit, gas_used, withdrawals_root, base_fee_per_gas, insert_timestamp, sign - ) SELECT * from to_be_inserted - ` - err := conn.Exec(context.Background(), query, chainId, duplicateBlockNumbers) - if err != nil { - return err - } - return nil -} - -func removeDuplicateTransactions(conn clickhouse.Conn, chainId *big.Int, duplicateTransactions []DuplicateTransaction) error { - query := `WITH - to_be_inserted AS ( - SELECT chain_id, hash, nonce, block_hash, block_number, block_timestamp, transaction_index, from_address, to_address, value, gas, gas_price, data, function_selector, - max_fee_per_gas, max_priority_fee_per_gas, max_fee_per_blob_gas, blob_versioned_hashes, transaction_type, r, s, v, access_list, authorization_list, contract_address, - gas_used, cumulative_gas_used, effective_gas_price, blob_gas_used, blob_gas_price, logs_bloom, status, insert_timestamp, -sign as sign - FROM default.transactions FINAL - WHERE chain_id = ? AND block_number IN (?) AND hash IN (?) - ) - INSERT INTO transactions ( - chain_id, hash, nonce, block_hash, block_number, block_timestamp, transaction_index, from_address, to_address, value, gas, gas_price, data, function_selector, - max_fee_per_gas, max_priority_fee_per_gas, max_fee_per_blob_gas, blob_versioned_hashes, transaction_type, r, s, v, access_list, authorization_list, contract_address, - gas_used, cumulative_gas_used, effective_gas_price, blob_gas_used, blob_gas_price, logs_bloom, status, insert_timestamp, sign - ) SELECT * from to_be_inserted - ` - - const batchSize = 1000 - for i := 0; i < len(duplicateTransactions); i += batchSize { - end := i + batchSize - if end > len(duplicateTransactions) { - end = len(duplicateTransactions) - } - - batch := duplicateTransactions[i:end] - blockNumbers := make([]*big.Int, 0, len(batch)) - hashes := make([]string, 0, len(batch)) - - for _, duplicateTransaction := range batch { - blockNumbers = append(blockNumbers, duplicateTransaction.BlockNumber) - hashes = append(hashes, duplicateTransaction.Hash) - } - - err := conn.Exec(context.Background(), query, chainId, blockNumbers, hashes) - if err != nil { - return err - } - } - return nil -} - -func removeDuplicateLogs(conn clickhouse.Conn, chainId *big.Int, duplicateLogs []DuplicateLog) error { - const batchSize = 1000 - for i := 0; i < len(duplicateLogs); i += batchSize { - end := i + batchSize - if end > len(duplicateLogs) { - end = len(duplicateLogs) - } - - batch := duplicateLogs[i:end] - blockNumbers := make([]*big.Int, 0, len(batch)) - tuples := make([]string, 0, len(batch)) - - for _, duplicateLog := range batch { - blockNumbers = append(blockNumbers, duplicateLog.BlockNumber) - tuples = append(tuples, fmt.Sprintf("('%s', %d)", duplicateLog.TxHash, duplicateLog.LogIndex)) - } - - query := fmt.Sprintf(`WITH - to_be_inserted AS ( - SELECT chain_id, block_number, block_hash, block_timestamp, transaction_hash, transaction_index, log_index, address, - data, topic_0, topic_1, topic_2, topic_3, insert_timestamp, -sign as sign - FROM default.logs FINAL - WHERE chain_id = ? AND block_number IN (?) AND (transaction_hash, log_index) IN (%s) - ) - INSERT INTO logs ( - chain_id, block_number, block_hash, block_timestamp, transaction_hash, transaction_index, log_index, address, - data, topic_0, topic_1, topic_2, topic_3, insert_timestamp, sign - ) SELECT * from to_be_inserted - `, strings.Join(tuples, ",")) - - err := conn.Exec(context.Background(), query, chainId, blockNumbers) - if err != nil { - return err - } - } - return nil -} diff --git a/internal/worker/worker.go b/internal/worker/worker.go deleted file mode 100644 index 63adcfe7..00000000 --- a/internal/worker/worker.go +++ /dev/null @@ -1,449 +0,0 @@ -package worker - -import ( - "context" - "fmt" - "math/big" - "sort" - "sync" - - "github.com/rs/zerolog/log" - "github.com/thirdweb-dev/indexer/internal/common" - "github.com/thirdweb-dev/indexer/internal/metrics" - "github.com/thirdweb-dev/indexer/internal/rpc" - "github.com/thirdweb-dev/indexer/internal/source" -) - -// SourceType represents the type of data source -type SourceType string - -const ( - // SourceTypeRPC represents RPC data source - SourceTypeRPC SourceType = "rpc" - // SourceTypeArchive represents archive data source (e.g., S3) - SourceTypeArchive SourceType = "archive" - // SourceTypeStaging represents staging data source (e.g., S3) - SourceTypeStaging SourceType = "staging" -) - -const ( - DEFAULT_RPC_CHUNK_SIZE = 25 -) - -// String returns the string representation of the source type -func (s SourceType) String() string { - return string(s) -} - -// Worker handles block data fetching from RPC and optional archive -type Worker struct { - rpc rpc.IRPCClient - archive source.ISource - staging source.ISource - rpcChunkSize int - rpcSemaphore chan struct{} // Limit concurrent RPC requests -} - -func NewWorker(rpc rpc.IRPCClient) *Worker { - chunk := rpc.GetBlocksPerRequest().Blocks - if chunk <= 0 { - chunk = DEFAULT_RPC_CHUNK_SIZE - } - return &Worker{ - rpc: rpc, - rpcChunkSize: chunk, - rpcSemaphore: make(chan struct{}, 20), - } -} - -// NewWorkerWithSources creates a new Worker with optional archive and staging support -func NewWorkerWithSources(rpc rpc.IRPCClient, archive source.ISource, staging source.ISource) *Worker { - worker := NewWorker(rpc) - worker.archive = archive - worker.staging = staging - return worker -} - -// fetchFromRPC fetches blocks directly from RPC -func (w *Worker) fetchFromRPC(ctx context.Context, blocks []*big.Int) []rpc.GetFullBlockResult { - // Acquire semaphore for rate limiting - select { - case w.rpcSemaphore <- struct{}{}: - defer func() { <-w.rpcSemaphore }() - case <-ctx.Done(): - return nil - } - - return w.rpc.GetFullBlocks(ctx, blocks) -} - -// fetchFromArchive fetches blocks from archive if available -func (w *Worker) fetchFromArchive(ctx context.Context, blocks []*big.Int) []rpc.GetFullBlockResult { - return w.archive.GetFullBlocks(ctx, blocks) -} - -func (w *Worker) fetchFromStaging(ctx context.Context, blocks []*big.Int) []rpc.GetFullBlockResult { - return w.staging.GetFullBlocks(ctx, blocks) -} - -// processChunkWithRetry processes a chunk with automatic retry on failure -func (w *Worker) processChunkWithRetry(ctx context.Context, chunk []*big.Int, fetchFunc func(context.Context, []*big.Int) []rpc.GetFullBlockResult) (success []rpc.GetFullBlockResult, failed []rpc.GetFullBlockResult) { - select { - case <-ctx.Done(): - // Return error results for all blocks if context cancelled - var results []rpc.GetFullBlockResult - for _, block := range chunk { - results = append(results, rpc.GetFullBlockResult{ - BlockNumber: block, - Error: fmt.Errorf("context cancelled"), - }) - } - return nil, results - default: - } - - // Fetch the chunk - results := fetchFunc(ctx, chunk) - - // If we got all results, return them - if len(results) == len(chunk) { - allSuccess := true - for _, r := range results { - if r.Error != nil { - allSuccess = false - break - } - } - if allSuccess { - return results, nil - } - } - - // Separate successful and failed - successMap := make(map[string]rpc.GetFullBlockResult) - var failedBlocks []*big.Int - - for i, result := range results { - if i < len(chunk) { - if result.Error == nil { - successMap[chunk[i].String()] = result - } else { - failedBlocks = append(failedBlocks, chunk[i]) - } - } - } - - // If only one block failed, retry once more - if len(failedBlocks) == 1 { - retryResults := fetchFunc(ctx, failedBlocks) - if len(retryResults) > 0 { - if retryResults[0].Error == nil { - successMap[failedBlocks[0].String()] = retryResults[0] - } else { - // Keep the error result - successMap[failedBlocks[0].String()] = rpc.GetFullBlockResult{ - BlockNumber: failedBlocks[0], - Error: retryResults[0].Error, - } - } - } - } else if len(failedBlocks) > 1 { - // Split failed blocks and retry recursively - mid := len(failedBlocks) / 2 - leftChunk := failedBlocks[:mid] - rightChunk := failedBlocks[mid:] - - log.Debug(). - Int("failed_count", len(failedBlocks)). - Int("left_chunk", len(leftChunk)). - Int("right_chunk", len(rightChunk)). - Msg("Splitting failed blocks for retry") - - // Process both halves (left and right) - var rwg sync.WaitGroup - var rwgMutex sync.Mutex - rwg.Add(2) - go func() { - defer rwg.Done() - leftResults, _ := w.processChunkWithRetry(ctx, leftChunk, fetchFunc) - // Add results to map - for _, r := range leftResults { - if r.BlockNumber != nil { - rwgMutex.Lock() - successMap[r.BlockNumber.String()] = r - rwgMutex.Unlock() - } - } - }() - - go func() { - defer rwg.Done() - rightResults, _ := w.processChunkWithRetry(ctx, rightChunk, fetchFunc) - // Add results to map - for _, r := range rightResults { - if r.BlockNumber != nil { - rwgMutex.Lock() - successMap[r.BlockNumber.String()] = r - rwgMutex.Unlock() - } - } - }() - - rwg.Wait() - } - - // Build final results in original order - var finalResults []rpc.GetFullBlockResult - var failedResults []rpc.GetFullBlockResult - for _, block := range chunk { - if result, ok := successMap[block.String()]; ok { - finalResults = append(finalResults, result) - } else { - // This should not happen as we have retried all failed blocks - failedResults = append(failedResults, rpc.GetFullBlockResult{ - BlockNumber: block, - Error: fmt.Errorf("failed to fetch block"), - }) - } - } - - return finalResults, failedResults -} - -// processChunk -func (w *Worker) processChunk(ctx context.Context, chunk []*big.Int, fetchFunc func(context.Context, []*big.Int) []rpc.GetFullBlockResult) (success []rpc.GetFullBlockResult, failed []rpc.GetFullBlockResult) { - select { - case <-ctx.Done(): - // Return error results for all blocks if context cancelled - var results []rpc.GetFullBlockResult - for _, block := range chunk { - results = append(results, rpc.GetFullBlockResult{ - BlockNumber: block, - Error: fmt.Errorf("context cancelled"), - }) - } - return nil, results - default: - } - - // Fetch the chunk - results := fetchFunc(ctx, chunk) - - // If we got all results, return them - if len(results) == len(chunk) { - allSuccess := true - for _, r := range results { - if r.Error != nil { - allSuccess = false - break - } - } - if allSuccess { - return results, nil - } - } - - // Separate successful and failed - successMap := make(map[string]rpc.GetFullBlockResult) - - for i, result := range results { - if i < len(chunk) { - if result.Error == nil { - successMap[chunk[i].String()] = result - } - } - } - - // Build final results in original order - var finalResults []rpc.GetFullBlockResult - var failedResults []rpc.GetFullBlockResult - for _, block := range chunk { - if result, ok := successMap[block.String()]; ok { - finalResults = append(finalResults, result) - } else { - // This should not happen as we have retried all failed blocks - failedResults = append(failedResults, rpc.GetFullBlockResult{ - BlockNumber: block, - Error: fmt.Errorf("failed to fetch block"), - }) - } - } - - return finalResults, failedResults -} - -// processBatch processes a batch of blocks from a specific source -func (w *Worker) processBatchWithRetry(ctx context.Context, blocks []*big.Int, sourceType SourceType, fetchFunc func(context.Context, []*big.Int) []rpc.GetFullBlockResult) (success []rpc.GetFullBlockResult, failed []rpc.GetFullBlockResult) { - if len(blocks) == 0 { - return nil, nil - } - - // Only enable chunk retrying for RPC - shouldRetry := sourceType == SourceTypeRPC - - chunkSize := len(blocks) // Fetch all at once from archive - if sourceType == SourceTypeRPC { - chunkSize = w.rpcChunkSize // TODO dynamically change this - } - - chunks := common.SliceToChunks(blocks, chunkSize) - - log.Debug(). - Str("source", sourceType.String()). - Int("total_blocks", len(blocks)). - Int("chunks", len(chunks)). - Int("chunk_size", chunkSize). - Str("first_block", blocks[0].String()). - Str("last_block", blocks[len(blocks)-1].String()). - Msgf("Processing blocks for range %s - %s", blocks[0].String(), blocks[len(blocks)-1].String()) - - var allResults []rpc.GetFullBlockResult - var allFailures []rpc.GetFullBlockResult - var mu sync.Mutex - var wg sync.WaitGroup - - for _, chunk := range chunks { - // Check context before starting new work - if ctx.Err() != nil { - log.Debug().Msg("Context canceled, skipping remaining chunks") - break // Don't start new chunks, but let existing ones finish - } - - wg.Add(1) - go func(chunk []*big.Int, shouldRetry bool) { - defer wg.Done() - - var results []rpc.GetFullBlockResult - var failed []rpc.GetFullBlockResult - - if shouldRetry { - results, failed = w.processChunkWithRetry(ctx, chunk, fetchFunc) - } else { - results, failed = w.processChunk(ctx, chunk, fetchFunc) - } - - mu.Lock() - allResults = append(allResults, results...) - allFailures = append(allFailures, failed...) - mu.Unlock() - }(chunk, shouldRetry) - } - - // Wait for all started goroutines to complete - wg.Wait() - - // Sort results by block number (only if we have results) - if len(allResults) > 0 { - sort.Slice(allResults, func(i, j int) bool { - return allResults[i].BlockNumber.Cmp(allResults[j].BlockNumber) < 0 - }) - } - if len(allFailures) > 0 { - sort.Slice(allFailures, func(i, j int) bool { - return allFailures[i].BlockNumber.Cmp(allFailures[j].BlockNumber) < 0 - }) - } - - return allResults, allFailures -} - -// shouldUseSource determines if ALL requested blocks are within source range -func (w *Worker) shouldUseSource(ctx context.Context, source source.ISource, blockNumbers []*big.Int) bool { - // Check if source is configured and we have blocks to process - if source == nil { - return false - } - if len(blockNumbers) == 0 { - return false - } - - // Get source block range - min, max, err := source.GetSupportedBlockRange(ctx) - if err != nil { - return false - } - - if min == nil || max == nil { - return false - } - - // Check if ALL blocks are within source range - for _, block := range blockNumbers { - if block.Cmp(min) < 0 || block.Cmp(max) > 0 { - // At least one block is outside archive range - return false - } - } - - // All blocks are within archive range - return true -} - -// Run processes blocks using either archive OR rpc -func (w *Worker) Run(ctx context.Context, blockNumbers []*big.Int) []rpc.GetFullBlockResult { - if len(blockNumbers) == 0 { - return nil - } - - var results []rpc.GetFullBlockResult - var errors []rpc.GetFullBlockResult - - // Determine which source to use - sourceType := SourceTypeRPC - success := false - - if w.shouldUseSource(ctx, w.staging, blockNumbers) { - sourceType = SourceTypeStaging - results, errors = w.processBatchWithRetry(ctx, blockNumbers, sourceType, w.fetchFromStaging) - success = len(results) > 0 && len(errors) == 0 - } - - if !success && w.shouldUseSource(ctx, w.archive, blockNumbers) { - sourceType = SourceTypeArchive - results, errors = w.processBatchWithRetry(ctx, blockNumbers, sourceType, w.fetchFromArchive) - success = len(results) > 0 && len(errors) == 0 - } - - if !success { - sourceType = SourceTypeRPC - results, errors = w.processBatchWithRetry(ctx, blockNumbers, sourceType, w.fetchFromRPC) - success = len(results) > 0 && len(errors) == 0 - } - - if len(errors) > 0 { - first, last := blockNumbers[0], blockNumbers[len(blockNumbers)-1] - firstError, lastError := errors[0], errors[len(errors)-1] - log.Error().Msgf("Error fetching block for range: %s - %s. Error: %s - %s (%d)", first.String(), last.String(), firstError.BlockNumber.String(), lastError.BlockNumber.String(), len(errors)) - return nil - } - - if !success || len(results) == 0 { - first, last := blockNumbers[0], blockNumbers[len(blockNumbers)-1] - log.Error().Msgf("No blocks fetched for range: %s - %s", first.String(), last.String()) - return nil - } - - // Update metrics and log summary - lastBlockNumberFloat, _ := results[len(results)-1].BlockNumber.Float64() - metrics.LastFetchedBlock.Set(lastBlockNumberFloat) - - log.Debug(). - Str("source", sourceType.String()). - Str("first_block", results[0].BlockNumber.String()). - Str("last_block", results[len(results)-1].BlockNumber.String()). - Msgf("Block fetching complete for range %s - %s", results[0].BlockNumber.String(), results[len(results)-1].BlockNumber.String()) - - return results -} - -// Close gracefully shuts down the worker and cleans up resources -func (w *Worker) Close() error { - // Close archive if it exists - if w.archive != nil { - log.Debug().Msg("Closing archive connection") - w.archive.Close() - } - - log.Debug().Msg("Worker closed successfully") - return nil -} diff --git a/test/mocks/MockIMainStorage.go b/test/mocks/MockIMainStorage.go deleted file mode 100644 index e13e4ee6..00000000 --- a/test/mocks/MockIMainStorage.go +++ /dev/null @@ -1,1090 +0,0 @@ -// Code generated by mockery v2.53.5. DO NOT EDIT. - -//go:build !production - -package mocks - -import ( - big "math/big" - - mock "github.com/stretchr/testify/mock" - common "github.com/thirdweb-dev/indexer/internal/common" - - storage "github.com/thirdweb-dev/indexer/internal/storage" -) - -// MockIMainStorage is an autogenerated mock type for the IMainStorage type -type MockIMainStorage struct { - mock.Mock -} - -type MockIMainStorage_Expecter struct { - mock *mock.Mock -} - -func (_m *MockIMainStorage) EXPECT() *MockIMainStorage_Expecter { - return &MockIMainStorage_Expecter{mock: &_m.Mock} -} - -// Close provides a mock function with no fields -func (_m *MockIMainStorage) Close() error { - ret := _m.Called() - - if len(ret) == 0 { - panic("no return value specified for Close") - } - - var r0 error - if rf, ok := ret.Get(0).(func() error); ok { - r0 = rf() - } else { - r0 = ret.Error(0) - } - - return r0 -} - -// MockIMainStorage_Close_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'Close' -type MockIMainStorage_Close_Call struct { - *mock.Call -} - -// Close is a helper method to define mock.On call -func (_e *MockIMainStorage_Expecter) Close() *MockIMainStorage_Close_Call { - return &MockIMainStorage_Close_Call{Call: _e.mock.On("Close")} -} - -func (_c *MockIMainStorage_Close_Call) Run(run func()) *MockIMainStorage_Close_Call { - _c.Call.Run(func(args mock.Arguments) { - run() - }) - return _c -} - -func (_c *MockIMainStorage_Close_Call) Return(_a0 error) *MockIMainStorage_Close_Call { - _c.Call.Return(_a0) - return _c -} - -func (_c *MockIMainStorage_Close_Call) RunAndReturn(run func() error) *MockIMainStorage_Close_Call { - _c.Call.Return(run) - return _c -} - -// FindMissingBlockNumbers provides a mock function with given fields: chainId, startBlock, endBlock -func (_m *MockIMainStorage) FindMissingBlockNumbers(chainId *big.Int, startBlock *big.Int, endBlock *big.Int) ([]*big.Int, error) { - ret := _m.Called(chainId, startBlock, endBlock) - - if len(ret) == 0 { - panic("no return value specified for FindMissingBlockNumbers") - } - - var r0 []*big.Int - var r1 error - if rf, ok := ret.Get(0).(func(*big.Int, *big.Int, *big.Int) ([]*big.Int, error)); ok { - return rf(chainId, startBlock, endBlock) - } - if rf, ok := ret.Get(0).(func(*big.Int, *big.Int, *big.Int) []*big.Int); ok { - r0 = rf(chainId, startBlock, endBlock) - } else { - if ret.Get(0) != nil { - r0 = ret.Get(0).([]*big.Int) - } - } - - if rf, ok := ret.Get(1).(func(*big.Int, *big.Int, *big.Int) error); ok { - r1 = rf(chainId, startBlock, endBlock) - } else { - r1 = ret.Error(1) - } - - return r0, r1 -} - -// MockIMainStorage_FindMissingBlockNumbers_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'FindMissingBlockNumbers' -type MockIMainStorage_FindMissingBlockNumbers_Call struct { - *mock.Call -} - -// FindMissingBlockNumbers is a helper method to define mock.On call -// - chainId *big.Int -// - startBlock *big.Int -// - endBlock *big.Int -func (_e *MockIMainStorage_Expecter) FindMissingBlockNumbers(chainId interface{}, startBlock interface{}, endBlock interface{}) *MockIMainStorage_FindMissingBlockNumbers_Call { - return &MockIMainStorage_FindMissingBlockNumbers_Call{Call: _e.mock.On("FindMissingBlockNumbers", chainId, startBlock, endBlock)} -} - -func (_c *MockIMainStorage_FindMissingBlockNumbers_Call) Run(run func(chainId *big.Int, startBlock *big.Int, endBlock *big.Int)) *MockIMainStorage_FindMissingBlockNumbers_Call { - _c.Call.Run(func(args mock.Arguments) { - run(args[0].(*big.Int), args[1].(*big.Int), args[2].(*big.Int)) - }) - return _c -} - -func (_c *MockIMainStorage_FindMissingBlockNumbers_Call) Return(blockNumbers []*big.Int, err error) *MockIMainStorage_FindMissingBlockNumbers_Call { - _c.Call.Return(blockNumbers, err) - return _c -} - -func (_c *MockIMainStorage_FindMissingBlockNumbers_Call) RunAndReturn(run func(*big.Int, *big.Int, *big.Int) ([]*big.Int, error)) *MockIMainStorage_FindMissingBlockNumbers_Call { - _c.Call.Return(run) - return _c -} - -// GetAggregations provides a mock function with given fields: table, qf -func (_m *MockIMainStorage) GetAggregations(table string, qf storage.QueryFilter) (storage.QueryResult[interface{}], error) { - ret := _m.Called(table, qf) - - if len(ret) == 0 { - panic("no return value specified for GetAggregations") - } - - var r0 storage.QueryResult[interface{}] - var r1 error - if rf, ok := ret.Get(0).(func(string, storage.QueryFilter) (storage.QueryResult[interface{}], error)); ok { - return rf(table, qf) - } - if rf, ok := ret.Get(0).(func(string, storage.QueryFilter) storage.QueryResult[interface{}]); ok { - r0 = rf(table, qf) - } else { - r0 = ret.Get(0).(storage.QueryResult[interface{}]) - } - - if rf, ok := ret.Get(1).(func(string, storage.QueryFilter) error); ok { - r1 = rf(table, qf) - } else { - r1 = ret.Error(1) - } - - return r0, r1 -} - -// MockIMainStorage_GetAggregations_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'GetAggregations' -type MockIMainStorage_GetAggregations_Call struct { - *mock.Call -} - -// GetAggregations is a helper method to define mock.On call -// - table string -// - qf storage.QueryFilter -func (_e *MockIMainStorage_Expecter) GetAggregations(table interface{}, qf interface{}) *MockIMainStorage_GetAggregations_Call { - return &MockIMainStorage_GetAggregations_Call{Call: _e.mock.On("GetAggregations", table, qf)} -} - -func (_c *MockIMainStorage_GetAggregations_Call) Run(run func(table string, qf storage.QueryFilter)) *MockIMainStorage_GetAggregations_Call { - _c.Call.Run(func(args mock.Arguments) { - run(args[0].(string), args[1].(storage.QueryFilter)) - }) - return _c -} - -func (_c *MockIMainStorage_GetAggregations_Call) Return(_a0 storage.QueryResult[interface{}], _a1 error) *MockIMainStorage_GetAggregations_Call { - _c.Call.Return(_a0, _a1) - return _c -} - -func (_c *MockIMainStorage_GetAggregations_Call) RunAndReturn(run func(string, storage.QueryFilter) (storage.QueryResult[interface{}], error)) *MockIMainStorage_GetAggregations_Call { - _c.Call.Return(run) - return _c -} - -// GetBlockCount provides a mock function with given fields: chainId, startBlock, endBlock -func (_m *MockIMainStorage) GetBlockCount(chainId *big.Int, startBlock *big.Int, endBlock *big.Int) (*big.Int, error) { - ret := _m.Called(chainId, startBlock, endBlock) - - if len(ret) == 0 { - panic("no return value specified for GetBlockCount") - } - - var r0 *big.Int - var r1 error - if rf, ok := ret.Get(0).(func(*big.Int, *big.Int, *big.Int) (*big.Int, error)); ok { - return rf(chainId, startBlock, endBlock) - } - if rf, ok := ret.Get(0).(func(*big.Int, *big.Int, *big.Int) *big.Int); ok { - r0 = rf(chainId, startBlock, endBlock) - } else { - if ret.Get(0) != nil { - r0 = ret.Get(0).(*big.Int) - } - } - - if rf, ok := ret.Get(1).(func(*big.Int, *big.Int, *big.Int) error); ok { - r1 = rf(chainId, startBlock, endBlock) - } else { - r1 = ret.Error(1) - } - - return r0, r1 -} - -// MockIMainStorage_GetBlockCount_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'GetBlockCount' -type MockIMainStorage_GetBlockCount_Call struct { - *mock.Call -} - -// GetBlockCount is a helper method to define mock.On call -// - chainId *big.Int -// - startBlock *big.Int -// - endBlock *big.Int -func (_e *MockIMainStorage_Expecter) GetBlockCount(chainId interface{}, startBlock interface{}, endBlock interface{}) *MockIMainStorage_GetBlockCount_Call { - return &MockIMainStorage_GetBlockCount_Call{Call: _e.mock.On("GetBlockCount", chainId, startBlock, endBlock)} -} - -func (_c *MockIMainStorage_GetBlockCount_Call) Run(run func(chainId *big.Int, startBlock *big.Int, endBlock *big.Int)) *MockIMainStorage_GetBlockCount_Call { - _c.Call.Run(func(args mock.Arguments) { - run(args[0].(*big.Int), args[1].(*big.Int), args[2].(*big.Int)) - }) - return _c -} - -func (_c *MockIMainStorage_GetBlockCount_Call) Return(blockCount *big.Int, err error) *MockIMainStorage_GetBlockCount_Call { - _c.Call.Return(blockCount, err) - return _c -} - -func (_c *MockIMainStorage_GetBlockCount_Call) RunAndReturn(run func(*big.Int, *big.Int, *big.Int) (*big.Int, error)) *MockIMainStorage_GetBlockCount_Call { - _c.Call.Return(run) - return _c -} - -// GetBlockHeadersDescending provides a mock function with given fields: chainId, from, to -func (_m *MockIMainStorage) GetBlockHeadersDescending(chainId *big.Int, from *big.Int, to *big.Int) ([]common.BlockHeader, error) { - ret := _m.Called(chainId, from, to) - - if len(ret) == 0 { - panic("no return value specified for GetBlockHeadersDescending") - } - - var r0 []common.BlockHeader - var r1 error - if rf, ok := ret.Get(0).(func(*big.Int, *big.Int, *big.Int) ([]common.BlockHeader, error)); ok { - return rf(chainId, from, to) - } - if rf, ok := ret.Get(0).(func(*big.Int, *big.Int, *big.Int) []common.BlockHeader); ok { - r0 = rf(chainId, from, to) - } else { - if ret.Get(0) != nil { - r0 = ret.Get(0).([]common.BlockHeader) - } - } - - if rf, ok := ret.Get(1).(func(*big.Int, *big.Int, *big.Int) error); ok { - r1 = rf(chainId, from, to) - } else { - r1 = ret.Error(1) - } - - return r0, r1 -} - -// MockIMainStorage_GetBlockHeadersDescending_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'GetBlockHeadersDescending' -type MockIMainStorage_GetBlockHeadersDescending_Call struct { - *mock.Call -} - -// GetBlockHeadersDescending is a helper method to define mock.On call -// - chainId *big.Int -// - from *big.Int -// - to *big.Int -func (_e *MockIMainStorage_Expecter) GetBlockHeadersDescending(chainId interface{}, from interface{}, to interface{}) *MockIMainStorage_GetBlockHeadersDescending_Call { - return &MockIMainStorage_GetBlockHeadersDescending_Call{Call: _e.mock.On("GetBlockHeadersDescending", chainId, from, to)} -} - -func (_c *MockIMainStorage_GetBlockHeadersDescending_Call) Run(run func(chainId *big.Int, from *big.Int, to *big.Int)) *MockIMainStorage_GetBlockHeadersDescending_Call { - _c.Call.Run(func(args mock.Arguments) { - run(args[0].(*big.Int), args[1].(*big.Int), args[2].(*big.Int)) - }) - return _c -} - -func (_c *MockIMainStorage_GetBlockHeadersDescending_Call) Return(blockHeaders []common.BlockHeader, err error) *MockIMainStorage_GetBlockHeadersDescending_Call { - _c.Call.Return(blockHeaders, err) - return _c -} - -func (_c *MockIMainStorage_GetBlockHeadersDescending_Call) RunAndReturn(run func(*big.Int, *big.Int, *big.Int) ([]common.BlockHeader, error)) *MockIMainStorage_GetBlockHeadersDescending_Call { - _c.Call.Return(run) - return _c -} - -// GetBlocks provides a mock function with given fields: qf, fields -func (_m *MockIMainStorage) GetBlocks(qf storage.QueryFilter, fields ...string) (storage.QueryResult[common.Block], error) { - _va := make([]interface{}, len(fields)) - for _i := range fields { - _va[_i] = fields[_i] - } - var _ca []interface{} - _ca = append(_ca, qf) - _ca = append(_ca, _va...) - ret := _m.Called(_ca...) - - if len(ret) == 0 { - panic("no return value specified for GetBlocks") - } - - var r0 storage.QueryResult[common.Block] - var r1 error - if rf, ok := ret.Get(0).(func(storage.QueryFilter, ...string) (storage.QueryResult[common.Block], error)); ok { - return rf(qf, fields...) - } - if rf, ok := ret.Get(0).(func(storage.QueryFilter, ...string) storage.QueryResult[common.Block]); ok { - r0 = rf(qf, fields...) - } else { - r0 = ret.Get(0).(storage.QueryResult[common.Block]) - } - - if rf, ok := ret.Get(1).(func(storage.QueryFilter, ...string) error); ok { - r1 = rf(qf, fields...) - } else { - r1 = ret.Error(1) - } - - return r0, r1 -} - -// MockIMainStorage_GetBlocks_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'GetBlocks' -type MockIMainStorage_GetBlocks_Call struct { - *mock.Call -} - -// GetBlocks is a helper method to define mock.On call -// - qf storage.QueryFilter -// - fields ...string -func (_e *MockIMainStorage_Expecter) GetBlocks(qf interface{}, fields ...interface{}) *MockIMainStorage_GetBlocks_Call { - return &MockIMainStorage_GetBlocks_Call{Call: _e.mock.On("GetBlocks", - append([]interface{}{qf}, fields...)...)} -} - -func (_c *MockIMainStorage_GetBlocks_Call) Run(run func(qf storage.QueryFilter, fields ...string)) *MockIMainStorage_GetBlocks_Call { - _c.Call.Run(func(args mock.Arguments) { - variadicArgs := make([]string, len(args)-1) - for i, a := range args[1:] { - if a != nil { - variadicArgs[i] = a.(string) - } - } - run(args[0].(storage.QueryFilter), variadicArgs...) - }) - return _c -} - -func (_c *MockIMainStorage_GetBlocks_Call) Return(blocks storage.QueryResult[common.Block], err error) *MockIMainStorage_GetBlocks_Call { - _c.Call.Return(blocks, err) - return _c -} - -func (_c *MockIMainStorage_GetBlocks_Call) RunAndReturn(run func(storage.QueryFilter, ...string) (storage.QueryResult[common.Block], error)) *MockIMainStorage_GetBlocks_Call { - _c.Call.Return(run) - return _c -} - -// GetFullBlockData provides a mock function with given fields: chainId, blockNumbers -func (_m *MockIMainStorage) GetFullBlockData(chainId *big.Int, blockNumbers []*big.Int) ([]common.BlockData, error) { - ret := _m.Called(chainId, blockNumbers) - - if len(ret) == 0 { - panic("no return value specified for GetFullBlockData") - } - - var r0 []common.BlockData - var r1 error - if rf, ok := ret.Get(0).(func(*big.Int, []*big.Int) ([]common.BlockData, error)); ok { - return rf(chainId, blockNumbers) - } - if rf, ok := ret.Get(0).(func(*big.Int, []*big.Int) []common.BlockData); ok { - r0 = rf(chainId, blockNumbers) - } else { - if ret.Get(0) != nil { - r0 = ret.Get(0).([]common.BlockData) - } - } - - if rf, ok := ret.Get(1).(func(*big.Int, []*big.Int) error); ok { - r1 = rf(chainId, blockNumbers) - } else { - r1 = ret.Error(1) - } - - return r0, r1 -} - -// MockIMainStorage_GetFullBlockData_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'GetFullBlockData' -type MockIMainStorage_GetFullBlockData_Call struct { - *mock.Call -} - -// GetFullBlockData is a helper method to define mock.On call -// - chainId *big.Int -// - blockNumbers []*big.Int -func (_e *MockIMainStorage_Expecter) GetFullBlockData(chainId interface{}, blockNumbers interface{}) *MockIMainStorage_GetFullBlockData_Call { - return &MockIMainStorage_GetFullBlockData_Call{Call: _e.mock.On("GetFullBlockData", chainId, blockNumbers)} -} - -func (_c *MockIMainStorage_GetFullBlockData_Call) Run(run func(chainId *big.Int, blockNumbers []*big.Int)) *MockIMainStorage_GetFullBlockData_Call { - _c.Call.Run(func(args mock.Arguments) { - run(args[0].(*big.Int), args[1].([]*big.Int)) - }) - return _c -} - -func (_c *MockIMainStorage_GetFullBlockData_Call) Return(blocks []common.BlockData, err error) *MockIMainStorage_GetFullBlockData_Call { - _c.Call.Return(blocks, err) - return _c -} - -func (_c *MockIMainStorage_GetFullBlockData_Call) RunAndReturn(run func(*big.Int, []*big.Int) ([]common.BlockData, error)) *MockIMainStorage_GetFullBlockData_Call { - _c.Call.Return(run) - return _c -} - -// GetLogs provides a mock function with given fields: qf, fields -func (_m *MockIMainStorage) GetLogs(qf storage.QueryFilter, fields ...string) (storage.QueryResult[common.Log], error) { - _va := make([]interface{}, len(fields)) - for _i := range fields { - _va[_i] = fields[_i] - } - var _ca []interface{} - _ca = append(_ca, qf) - _ca = append(_ca, _va...) - ret := _m.Called(_ca...) - - if len(ret) == 0 { - panic("no return value specified for GetLogs") - } - - var r0 storage.QueryResult[common.Log] - var r1 error - if rf, ok := ret.Get(0).(func(storage.QueryFilter, ...string) (storage.QueryResult[common.Log], error)); ok { - return rf(qf, fields...) - } - if rf, ok := ret.Get(0).(func(storage.QueryFilter, ...string) storage.QueryResult[common.Log]); ok { - r0 = rf(qf, fields...) - } else { - r0 = ret.Get(0).(storage.QueryResult[common.Log]) - } - - if rf, ok := ret.Get(1).(func(storage.QueryFilter, ...string) error); ok { - r1 = rf(qf, fields...) - } else { - r1 = ret.Error(1) - } - - return r0, r1 -} - -// MockIMainStorage_GetLogs_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'GetLogs' -type MockIMainStorage_GetLogs_Call struct { - *mock.Call -} - -// GetLogs is a helper method to define mock.On call -// - qf storage.QueryFilter -// - fields ...string -func (_e *MockIMainStorage_Expecter) GetLogs(qf interface{}, fields ...interface{}) *MockIMainStorage_GetLogs_Call { - return &MockIMainStorage_GetLogs_Call{Call: _e.mock.On("GetLogs", - append([]interface{}{qf}, fields...)...)} -} - -func (_c *MockIMainStorage_GetLogs_Call) Run(run func(qf storage.QueryFilter, fields ...string)) *MockIMainStorage_GetLogs_Call { - _c.Call.Run(func(args mock.Arguments) { - variadicArgs := make([]string, len(args)-1) - for i, a := range args[1:] { - if a != nil { - variadicArgs[i] = a.(string) - } - } - run(args[0].(storage.QueryFilter), variadicArgs...) - }) - return _c -} - -func (_c *MockIMainStorage_GetLogs_Call) Return(logs storage.QueryResult[common.Log], err error) *MockIMainStorage_GetLogs_Call { - _c.Call.Return(logs, err) - return _c -} - -func (_c *MockIMainStorage_GetLogs_Call) RunAndReturn(run func(storage.QueryFilter, ...string) (storage.QueryResult[common.Log], error)) *MockIMainStorage_GetLogs_Call { - _c.Call.Return(run) - return _c -} - -// GetMaxBlockNumber provides a mock function with given fields: chainId -func (_m *MockIMainStorage) GetMaxBlockNumber(chainId *big.Int) (*big.Int, error) { - ret := _m.Called(chainId) - - if len(ret) == 0 { - panic("no return value specified for GetMaxBlockNumber") - } - - var r0 *big.Int - var r1 error - if rf, ok := ret.Get(0).(func(*big.Int) (*big.Int, error)); ok { - return rf(chainId) - } - if rf, ok := ret.Get(0).(func(*big.Int) *big.Int); ok { - r0 = rf(chainId) - } else { - if ret.Get(0) != nil { - r0 = ret.Get(0).(*big.Int) - } - } - - if rf, ok := ret.Get(1).(func(*big.Int) error); ok { - r1 = rf(chainId) - } else { - r1 = ret.Error(1) - } - - return r0, r1 -} - -// MockIMainStorage_GetMaxBlockNumber_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'GetMaxBlockNumber' -type MockIMainStorage_GetMaxBlockNumber_Call struct { - *mock.Call -} - -// GetMaxBlockNumber is a helper method to define mock.On call -// - chainId *big.Int -func (_e *MockIMainStorage_Expecter) GetMaxBlockNumber(chainId interface{}) *MockIMainStorage_GetMaxBlockNumber_Call { - return &MockIMainStorage_GetMaxBlockNumber_Call{Call: _e.mock.On("GetMaxBlockNumber", chainId)} -} - -func (_c *MockIMainStorage_GetMaxBlockNumber_Call) Run(run func(chainId *big.Int)) *MockIMainStorage_GetMaxBlockNumber_Call { - _c.Call.Run(func(args mock.Arguments) { - run(args[0].(*big.Int)) - }) - return _c -} - -func (_c *MockIMainStorage_GetMaxBlockNumber_Call) Return(maxBlockNumber *big.Int, err error) *MockIMainStorage_GetMaxBlockNumber_Call { - _c.Call.Return(maxBlockNumber, err) - return _c -} - -func (_c *MockIMainStorage_GetMaxBlockNumber_Call) RunAndReturn(run func(*big.Int) (*big.Int, error)) *MockIMainStorage_GetMaxBlockNumber_Call { - _c.Call.Return(run) - return _c -} - -// GetMaxBlockNumberInRange provides a mock function with given fields: chainId, startBlock, endBlock -func (_m *MockIMainStorage) GetMaxBlockNumberInRange(chainId *big.Int, startBlock *big.Int, endBlock *big.Int) (*big.Int, error) { - ret := _m.Called(chainId, startBlock, endBlock) - - if len(ret) == 0 { - panic("no return value specified for GetMaxBlockNumberInRange") - } - - var r0 *big.Int - var r1 error - if rf, ok := ret.Get(0).(func(*big.Int, *big.Int, *big.Int) (*big.Int, error)); ok { - return rf(chainId, startBlock, endBlock) - } - if rf, ok := ret.Get(0).(func(*big.Int, *big.Int, *big.Int) *big.Int); ok { - r0 = rf(chainId, startBlock, endBlock) - } else { - if ret.Get(0) != nil { - r0 = ret.Get(0).(*big.Int) - } - } - - if rf, ok := ret.Get(1).(func(*big.Int, *big.Int, *big.Int) error); ok { - r1 = rf(chainId, startBlock, endBlock) - } else { - r1 = ret.Error(1) - } - - return r0, r1 -} - -// MockIMainStorage_GetMaxBlockNumberInRange_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'GetMaxBlockNumberInRange' -type MockIMainStorage_GetMaxBlockNumberInRange_Call struct { - *mock.Call -} - -// GetMaxBlockNumberInRange is a helper method to define mock.On call -// - chainId *big.Int -// - startBlock *big.Int -// - endBlock *big.Int -func (_e *MockIMainStorage_Expecter) GetMaxBlockNumberInRange(chainId interface{}, startBlock interface{}, endBlock interface{}) *MockIMainStorage_GetMaxBlockNumberInRange_Call { - return &MockIMainStorage_GetMaxBlockNumberInRange_Call{Call: _e.mock.On("GetMaxBlockNumberInRange", chainId, startBlock, endBlock)} -} - -func (_c *MockIMainStorage_GetMaxBlockNumberInRange_Call) Run(run func(chainId *big.Int, startBlock *big.Int, endBlock *big.Int)) *MockIMainStorage_GetMaxBlockNumberInRange_Call { - _c.Call.Run(func(args mock.Arguments) { - run(args[0].(*big.Int), args[1].(*big.Int), args[2].(*big.Int)) - }) - return _c -} - -func (_c *MockIMainStorage_GetMaxBlockNumberInRange_Call) Return(maxBlockNumber *big.Int, err error) *MockIMainStorage_GetMaxBlockNumberInRange_Call { - _c.Call.Return(maxBlockNumber, err) - return _c -} - -func (_c *MockIMainStorage_GetMaxBlockNumberInRange_Call) RunAndReturn(run func(*big.Int, *big.Int, *big.Int) (*big.Int, error)) *MockIMainStorage_GetMaxBlockNumberInRange_Call { - _c.Call.Return(run) - return _c -} - -// GetTokenBalances provides a mock function with given fields: qf, fields -func (_m *MockIMainStorage) GetTokenBalances(qf storage.BalancesQueryFilter, fields ...string) (storage.QueryResult[common.TokenBalance], error) { - _va := make([]interface{}, len(fields)) - for _i := range fields { - _va[_i] = fields[_i] - } - var _ca []interface{} - _ca = append(_ca, qf) - _ca = append(_ca, _va...) - ret := _m.Called(_ca...) - - if len(ret) == 0 { - panic("no return value specified for GetTokenBalances") - } - - var r0 storage.QueryResult[common.TokenBalance] - var r1 error - if rf, ok := ret.Get(0).(func(storage.BalancesQueryFilter, ...string) (storage.QueryResult[common.TokenBalance], error)); ok { - return rf(qf, fields...) - } - if rf, ok := ret.Get(0).(func(storage.BalancesQueryFilter, ...string) storage.QueryResult[common.TokenBalance]); ok { - r0 = rf(qf, fields...) - } else { - r0 = ret.Get(0).(storage.QueryResult[common.TokenBalance]) - } - - if rf, ok := ret.Get(1).(func(storage.BalancesQueryFilter, ...string) error); ok { - r1 = rf(qf, fields...) - } else { - r1 = ret.Error(1) - } - - return r0, r1 -} - -// MockIMainStorage_GetTokenBalances_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'GetTokenBalances' -type MockIMainStorage_GetTokenBalances_Call struct { - *mock.Call -} - -// GetTokenBalances is a helper method to define mock.On call -// - qf storage.BalancesQueryFilter -// - fields ...string -func (_e *MockIMainStorage_Expecter) GetTokenBalances(qf interface{}, fields ...interface{}) *MockIMainStorage_GetTokenBalances_Call { - return &MockIMainStorage_GetTokenBalances_Call{Call: _e.mock.On("GetTokenBalances", - append([]interface{}{qf}, fields...)...)} -} - -func (_c *MockIMainStorage_GetTokenBalances_Call) Run(run func(qf storage.BalancesQueryFilter, fields ...string)) *MockIMainStorage_GetTokenBalances_Call { - _c.Call.Run(func(args mock.Arguments) { - variadicArgs := make([]string, len(args)-1) - for i, a := range args[1:] { - if a != nil { - variadicArgs[i] = a.(string) - } - } - run(args[0].(storage.BalancesQueryFilter), variadicArgs...) - }) - return _c -} - -func (_c *MockIMainStorage_GetTokenBalances_Call) Return(_a0 storage.QueryResult[common.TokenBalance], _a1 error) *MockIMainStorage_GetTokenBalances_Call { - _c.Call.Return(_a0, _a1) - return _c -} - -func (_c *MockIMainStorage_GetTokenBalances_Call) RunAndReturn(run func(storage.BalancesQueryFilter, ...string) (storage.QueryResult[common.TokenBalance], error)) *MockIMainStorage_GetTokenBalances_Call { - _c.Call.Return(run) - return _c -} - -// GetTokenTransfers provides a mock function with given fields: qf, fields -func (_m *MockIMainStorage) GetTokenTransfers(qf storage.TransfersQueryFilter, fields ...string) (storage.QueryResult[common.TokenTransfer], error) { - _va := make([]interface{}, len(fields)) - for _i := range fields { - _va[_i] = fields[_i] - } - var _ca []interface{} - _ca = append(_ca, qf) - _ca = append(_ca, _va...) - ret := _m.Called(_ca...) - - if len(ret) == 0 { - panic("no return value specified for GetTokenTransfers") - } - - var r0 storage.QueryResult[common.TokenTransfer] - var r1 error - if rf, ok := ret.Get(0).(func(storage.TransfersQueryFilter, ...string) (storage.QueryResult[common.TokenTransfer], error)); ok { - return rf(qf, fields...) - } - if rf, ok := ret.Get(0).(func(storage.TransfersQueryFilter, ...string) storage.QueryResult[common.TokenTransfer]); ok { - r0 = rf(qf, fields...) - } else { - r0 = ret.Get(0).(storage.QueryResult[common.TokenTransfer]) - } - - if rf, ok := ret.Get(1).(func(storage.TransfersQueryFilter, ...string) error); ok { - r1 = rf(qf, fields...) - } else { - r1 = ret.Error(1) - } - - return r0, r1 -} - -// MockIMainStorage_GetTokenTransfers_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'GetTokenTransfers' -type MockIMainStorage_GetTokenTransfers_Call struct { - *mock.Call -} - -// GetTokenTransfers is a helper method to define mock.On call -// - qf storage.TransfersQueryFilter -// - fields ...string -func (_e *MockIMainStorage_Expecter) GetTokenTransfers(qf interface{}, fields ...interface{}) *MockIMainStorage_GetTokenTransfers_Call { - return &MockIMainStorage_GetTokenTransfers_Call{Call: _e.mock.On("GetTokenTransfers", - append([]interface{}{qf}, fields...)...)} -} - -func (_c *MockIMainStorage_GetTokenTransfers_Call) Run(run func(qf storage.TransfersQueryFilter, fields ...string)) *MockIMainStorage_GetTokenTransfers_Call { - _c.Call.Run(func(args mock.Arguments) { - variadicArgs := make([]string, len(args)-1) - for i, a := range args[1:] { - if a != nil { - variadicArgs[i] = a.(string) - } - } - run(args[0].(storage.TransfersQueryFilter), variadicArgs...) - }) - return _c -} - -func (_c *MockIMainStorage_GetTokenTransfers_Call) Return(_a0 storage.QueryResult[common.TokenTransfer], _a1 error) *MockIMainStorage_GetTokenTransfers_Call { - _c.Call.Return(_a0, _a1) - return _c -} - -func (_c *MockIMainStorage_GetTokenTransfers_Call) RunAndReturn(run func(storage.TransfersQueryFilter, ...string) (storage.QueryResult[common.TokenTransfer], error)) *MockIMainStorage_GetTokenTransfers_Call { - _c.Call.Return(run) - return _c -} - -// GetTraces provides a mock function with given fields: qf, fields -func (_m *MockIMainStorage) GetTraces(qf storage.QueryFilter, fields ...string) (storage.QueryResult[common.Trace], error) { - _va := make([]interface{}, len(fields)) - for _i := range fields { - _va[_i] = fields[_i] - } - var _ca []interface{} - _ca = append(_ca, qf) - _ca = append(_ca, _va...) - ret := _m.Called(_ca...) - - if len(ret) == 0 { - panic("no return value specified for GetTraces") - } - - var r0 storage.QueryResult[common.Trace] - var r1 error - if rf, ok := ret.Get(0).(func(storage.QueryFilter, ...string) (storage.QueryResult[common.Trace], error)); ok { - return rf(qf, fields...) - } - if rf, ok := ret.Get(0).(func(storage.QueryFilter, ...string) storage.QueryResult[common.Trace]); ok { - r0 = rf(qf, fields...) - } else { - r0 = ret.Get(0).(storage.QueryResult[common.Trace]) - } - - if rf, ok := ret.Get(1).(func(storage.QueryFilter, ...string) error); ok { - r1 = rf(qf, fields...) - } else { - r1 = ret.Error(1) - } - - return r0, r1 -} - -// MockIMainStorage_GetTraces_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'GetTraces' -type MockIMainStorage_GetTraces_Call struct { - *mock.Call -} - -// GetTraces is a helper method to define mock.On call -// - qf storage.QueryFilter -// - fields ...string -func (_e *MockIMainStorage_Expecter) GetTraces(qf interface{}, fields ...interface{}) *MockIMainStorage_GetTraces_Call { - return &MockIMainStorage_GetTraces_Call{Call: _e.mock.On("GetTraces", - append([]interface{}{qf}, fields...)...)} -} - -func (_c *MockIMainStorage_GetTraces_Call) Run(run func(qf storage.QueryFilter, fields ...string)) *MockIMainStorage_GetTraces_Call { - _c.Call.Run(func(args mock.Arguments) { - variadicArgs := make([]string, len(args)-1) - for i, a := range args[1:] { - if a != nil { - variadicArgs[i] = a.(string) - } - } - run(args[0].(storage.QueryFilter), variadicArgs...) - }) - return _c -} - -func (_c *MockIMainStorage_GetTraces_Call) Return(traces storage.QueryResult[common.Trace], err error) *MockIMainStorage_GetTraces_Call { - _c.Call.Return(traces, err) - return _c -} - -func (_c *MockIMainStorage_GetTraces_Call) RunAndReturn(run func(storage.QueryFilter, ...string) (storage.QueryResult[common.Trace], error)) *MockIMainStorage_GetTraces_Call { - _c.Call.Return(run) - return _c -} - -// GetTransactions provides a mock function with given fields: qf, fields -func (_m *MockIMainStorage) GetTransactions(qf storage.QueryFilter, fields ...string) (storage.QueryResult[common.Transaction], error) { - _va := make([]interface{}, len(fields)) - for _i := range fields { - _va[_i] = fields[_i] - } - var _ca []interface{} - _ca = append(_ca, qf) - _ca = append(_ca, _va...) - ret := _m.Called(_ca...) - - if len(ret) == 0 { - panic("no return value specified for GetTransactions") - } - - var r0 storage.QueryResult[common.Transaction] - var r1 error - if rf, ok := ret.Get(0).(func(storage.QueryFilter, ...string) (storage.QueryResult[common.Transaction], error)); ok { - return rf(qf, fields...) - } - if rf, ok := ret.Get(0).(func(storage.QueryFilter, ...string) storage.QueryResult[common.Transaction]); ok { - r0 = rf(qf, fields...) - } else { - r0 = ret.Get(0).(storage.QueryResult[common.Transaction]) - } - - if rf, ok := ret.Get(1).(func(storage.QueryFilter, ...string) error); ok { - r1 = rf(qf, fields...) - } else { - r1 = ret.Error(1) - } - - return r0, r1 -} - -// MockIMainStorage_GetTransactions_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'GetTransactions' -type MockIMainStorage_GetTransactions_Call struct { - *mock.Call -} - -// GetTransactions is a helper method to define mock.On call -// - qf storage.QueryFilter -// - fields ...string -func (_e *MockIMainStorage_Expecter) GetTransactions(qf interface{}, fields ...interface{}) *MockIMainStorage_GetTransactions_Call { - return &MockIMainStorage_GetTransactions_Call{Call: _e.mock.On("GetTransactions", - append([]interface{}{qf}, fields...)...)} -} - -func (_c *MockIMainStorage_GetTransactions_Call) Run(run func(qf storage.QueryFilter, fields ...string)) *MockIMainStorage_GetTransactions_Call { - _c.Call.Run(func(args mock.Arguments) { - variadicArgs := make([]string, len(args)-1) - for i, a := range args[1:] { - if a != nil { - variadicArgs[i] = a.(string) - } - } - run(args[0].(storage.QueryFilter), variadicArgs...) - }) - return _c -} - -func (_c *MockIMainStorage_GetTransactions_Call) Return(transactions storage.QueryResult[common.Transaction], err error) *MockIMainStorage_GetTransactions_Call { - _c.Call.Return(transactions, err) - return _c -} - -func (_c *MockIMainStorage_GetTransactions_Call) RunAndReturn(run func(storage.QueryFilter, ...string) (storage.QueryResult[common.Transaction], error)) *MockIMainStorage_GetTransactions_Call { - _c.Call.Return(run) - return _c -} - -// GetValidationBlockData provides a mock function with given fields: chainId, startBlock, endBlock -func (_m *MockIMainStorage) GetValidationBlockData(chainId *big.Int, startBlock *big.Int, endBlock *big.Int) ([]common.BlockData, error) { - ret := _m.Called(chainId, startBlock, endBlock) - - if len(ret) == 0 { - panic("no return value specified for GetValidationBlockData") - } - - var r0 []common.BlockData - var r1 error - if rf, ok := ret.Get(0).(func(*big.Int, *big.Int, *big.Int) ([]common.BlockData, error)); ok { - return rf(chainId, startBlock, endBlock) - } - if rf, ok := ret.Get(0).(func(*big.Int, *big.Int, *big.Int) []common.BlockData); ok { - r0 = rf(chainId, startBlock, endBlock) - } else { - if ret.Get(0) != nil { - r0 = ret.Get(0).([]common.BlockData) - } - } - - if rf, ok := ret.Get(1).(func(*big.Int, *big.Int, *big.Int) error); ok { - r1 = rf(chainId, startBlock, endBlock) - } else { - r1 = ret.Error(1) - } - - return r0, r1 -} - -// MockIMainStorage_GetValidationBlockData_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'GetValidationBlockData' -type MockIMainStorage_GetValidationBlockData_Call struct { - *mock.Call -} - -// GetValidationBlockData is a helper method to define mock.On call -// - chainId *big.Int -// - startBlock *big.Int -// - endBlock *big.Int -func (_e *MockIMainStorage_Expecter) GetValidationBlockData(chainId interface{}, startBlock interface{}, endBlock interface{}) *MockIMainStorage_GetValidationBlockData_Call { - return &MockIMainStorage_GetValidationBlockData_Call{Call: _e.mock.On("GetValidationBlockData", chainId, startBlock, endBlock)} -} - -func (_c *MockIMainStorage_GetValidationBlockData_Call) Run(run func(chainId *big.Int, startBlock *big.Int, endBlock *big.Int)) *MockIMainStorage_GetValidationBlockData_Call { - _c.Call.Run(func(args mock.Arguments) { - run(args[0].(*big.Int), args[1].(*big.Int), args[2].(*big.Int)) - }) - return _c -} - -func (_c *MockIMainStorage_GetValidationBlockData_Call) Return(blocks []common.BlockData, err error) *MockIMainStorage_GetValidationBlockData_Call { - _c.Call.Return(blocks, err) - return _c -} - -func (_c *MockIMainStorage_GetValidationBlockData_Call) RunAndReturn(run func(*big.Int, *big.Int, *big.Int) ([]common.BlockData, error)) *MockIMainStorage_GetValidationBlockData_Call { - _c.Call.Return(run) - return _c -} - -// InsertBlockData provides a mock function with given fields: data -func (_m *MockIMainStorage) InsertBlockData(data []common.BlockData) error { - ret := _m.Called(data) - - if len(ret) == 0 { - panic("no return value specified for InsertBlockData") - } - - var r0 error - if rf, ok := ret.Get(0).(func([]common.BlockData) error); ok { - r0 = rf(data) - } else { - r0 = ret.Error(0) - } - - return r0 -} - -// MockIMainStorage_InsertBlockData_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'InsertBlockData' -type MockIMainStorage_InsertBlockData_Call struct { - *mock.Call -} - -// InsertBlockData is a helper method to define mock.On call -// - data []common.BlockData -func (_e *MockIMainStorage_Expecter) InsertBlockData(data interface{}) *MockIMainStorage_InsertBlockData_Call { - return &MockIMainStorage_InsertBlockData_Call{Call: _e.mock.On("InsertBlockData", data)} -} - -func (_c *MockIMainStorage_InsertBlockData_Call) Run(run func(data []common.BlockData)) *MockIMainStorage_InsertBlockData_Call { - _c.Call.Run(func(args mock.Arguments) { - run(args[0].([]common.BlockData)) - }) - return _c -} - -func (_c *MockIMainStorage_InsertBlockData_Call) Return(_a0 error) *MockIMainStorage_InsertBlockData_Call { - _c.Call.Return(_a0) - return _c -} - -func (_c *MockIMainStorage_InsertBlockData_Call) RunAndReturn(run func([]common.BlockData) error) *MockIMainStorage_InsertBlockData_Call { - _c.Call.Return(run) - return _c -} - -// ReplaceBlockData provides a mock function with given fields: data -func (_m *MockIMainStorage) ReplaceBlockData(data []common.BlockData) ([]common.BlockData, error) { - ret := _m.Called(data) - - if len(ret) == 0 { - panic("no return value specified for ReplaceBlockData") - } - - var r0 []common.BlockData - var r1 error - if rf, ok := ret.Get(0).(func([]common.BlockData) ([]common.BlockData, error)); ok { - return rf(data) - } - if rf, ok := ret.Get(0).(func([]common.BlockData) []common.BlockData); ok { - r0 = rf(data) - } else { - if ret.Get(0) != nil { - r0 = ret.Get(0).([]common.BlockData) - } - } - - if rf, ok := ret.Get(1).(func([]common.BlockData) error); ok { - r1 = rf(data) - } else { - r1 = ret.Error(1) - } - - return r0, r1 -} - -// MockIMainStorage_ReplaceBlockData_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'ReplaceBlockData' -type MockIMainStorage_ReplaceBlockData_Call struct { - *mock.Call -} - -// ReplaceBlockData is a helper method to define mock.On call -// - data []common.BlockData -func (_e *MockIMainStorage_Expecter) ReplaceBlockData(data interface{}) *MockIMainStorage_ReplaceBlockData_Call { - return &MockIMainStorage_ReplaceBlockData_Call{Call: _e.mock.On("ReplaceBlockData", data)} -} - -func (_c *MockIMainStorage_ReplaceBlockData_Call) Run(run func(data []common.BlockData)) *MockIMainStorage_ReplaceBlockData_Call { - _c.Call.Run(func(args mock.Arguments) { - run(args[0].([]common.BlockData)) - }) - return _c -} - -func (_c *MockIMainStorage_ReplaceBlockData_Call) Return(_a0 []common.BlockData, _a1 error) *MockIMainStorage_ReplaceBlockData_Call { - _c.Call.Return(_a0, _a1) - return _c -} - -func (_c *MockIMainStorage_ReplaceBlockData_Call) RunAndReturn(run func([]common.BlockData) ([]common.BlockData, error)) *MockIMainStorage_ReplaceBlockData_Call { - _c.Call.Return(run) - return _c -} - -// NewMockIMainStorage creates a new instance of MockIMainStorage. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations. -// The first argument is typically a *testing.T value. -func NewMockIMainStorage(t interface { - mock.TestingT - Cleanup(func()) -}) *MockIMainStorage { - mock := &MockIMainStorage{} - mock.Mock.Test(t) - - t.Cleanup(func() { mock.AssertExpectations(t) }) - - return mock -} diff --git a/test/mocks/MockIStagingStorage.go b/test/mocks/MockIStagingStorage.go deleted file mode 100644 index f53e831f..00000000 --- a/test/mocks/MockIStagingStorage.go +++ /dev/null @@ -1,304 +0,0 @@ -// Code generated by mockery v2.53.5. DO NOT EDIT. - -//go:build !production - -package mocks - -import ( - big "math/big" - - mock "github.com/stretchr/testify/mock" - common "github.com/thirdweb-dev/indexer/internal/common" - - storage "github.com/thirdweb-dev/indexer/internal/storage" -) - -// MockIStagingStorage is an autogenerated mock type for the IStagingStorage type -type MockIStagingStorage struct { - mock.Mock -} - -type MockIStagingStorage_Expecter struct { - mock *mock.Mock -} - -func (_m *MockIStagingStorage) EXPECT() *MockIStagingStorage_Expecter { - return &MockIStagingStorage_Expecter{mock: &_m.Mock} -} - -// Close provides a mock function with no fields -func (_m *MockIStagingStorage) Close() error { - ret := _m.Called() - - if len(ret) == 0 { - panic("no return value specified for Close") - } - - var r0 error - if rf, ok := ret.Get(0).(func() error); ok { - r0 = rf() - } else { - r0 = ret.Error(0) - } - - return r0 -} - -// MockIStagingStorage_Close_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'Close' -type MockIStagingStorage_Close_Call struct { - *mock.Call -} - -// Close is a helper method to define mock.On call -func (_e *MockIStagingStorage_Expecter) Close() *MockIStagingStorage_Close_Call { - return &MockIStagingStorage_Close_Call{Call: _e.mock.On("Close")} -} - -func (_c *MockIStagingStorage_Close_Call) Run(run func()) *MockIStagingStorage_Close_Call { - _c.Call.Run(func(args mock.Arguments) { - run() - }) - return _c -} - -func (_c *MockIStagingStorage_Close_Call) Return(_a0 error) *MockIStagingStorage_Close_Call { - _c.Call.Return(_a0) - return _c -} - -func (_c *MockIStagingStorage_Close_Call) RunAndReturn(run func() error) *MockIStagingStorage_Close_Call { - _c.Call.Return(run) - return _c -} - -// DeleteStagingDataOlderThan provides a mock function with given fields: chainId, blockNumber -func (_m *MockIStagingStorage) DeleteStagingDataOlderThan(chainId *big.Int, blockNumber *big.Int) error { - ret := _m.Called(chainId, blockNumber) - - if len(ret) == 0 { - panic("no return value specified for DeleteStagingDataOlderThan") - } - - var r0 error - if rf, ok := ret.Get(0).(func(*big.Int, *big.Int) error); ok { - r0 = rf(chainId, blockNumber) - } else { - r0 = ret.Error(0) - } - - return r0 -} - -// MockIStagingStorage_DeleteStagingDataOlderThan_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'DeleteStagingDataOlderThan' -type MockIStagingStorage_DeleteStagingDataOlderThan_Call struct { - *mock.Call -} - -// DeleteStagingDataOlderThan is a helper method to define mock.On call -// - chainId *big.Int -// - blockNumber *big.Int -func (_e *MockIStagingStorage_Expecter) DeleteStagingDataOlderThan(chainId interface{}, blockNumber interface{}) *MockIStagingStorage_DeleteStagingDataOlderThan_Call { - return &MockIStagingStorage_DeleteStagingDataOlderThan_Call{Call: _e.mock.On("DeleteStagingDataOlderThan", chainId, blockNumber)} -} - -func (_c *MockIStagingStorage_DeleteStagingDataOlderThan_Call) Run(run func(chainId *big.Int, blockNumber *big.Int)) *MockIStagingStorage_DeleteStagingDataOlderThan_Call { - _c.Call.Run(func(args mock.Arguments) { - run(args[0].(*big.Int), args[1].(*big.Int)) - }) - return _c -} - -func (_c *MockIStagingStorage_DeleteStagingDataOlderThan_Call) Return(_a0 error) *MockIStagingStorage_DeleteStagingDataOlderThan_Call { - _c.Call.Return(_a0) - return _c -} - -func (_c *MockIStagingStorage_DeleteStagingDataOlderThan_Call) RunAndReturn(run func(*big.Int, *big.Int) error) *MockIStagingStorage_DeleteStagingDataOlderThan_Call { - _c.Call.Return(run) - return _c -} - -// GetStagingData provides a mock function with given fields: qf -func (_m *MockIStagingStorage) GetStagingData(qf storage.QueryFilter) ([]common.BlockData, error) { - ret := _m.Called(qf) - - if len(ret) == 0 { - panic("no return value specified for GetStagingData") - } - - var r0 []common.BlockData - var r1 error - if rf, ok := ret.Get(0).(func(storage.QueryFilter) ([]common.BlockData, error)); ok { - return rf(qf) - } - if rf, ok := ret.Get(0).(func(storage.QueryFilter) []common.BlockData); ok { - r0 = rf(qf) - } else { - if ret.Get(0) != nil { - r0 = ret.Get(0).([]common.BlockData) - } - } - - if rf, ok := ret.Get(1).(func(storage.QueryFilter) error); ok { - r1 = rf(qf) - } else { - r1 = ret.Error(1) - } - - return r0, r1 -} - -// MockIStagingStorage_GetStagingData_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'GetStagingData' -type MockIStagingStorage_GetStagingData_Call struct { - *mock.Call -} - -// GetStagingData is a helper method to define mock.On call -// - qf storage.QueryFilter -func (_e *MockIStagingStorage_Expecter) GetStagingData(qf interface{}) *MockIStagingStorage_GetStagingData_Call { - return &MockIStagingStorage_GetStagingData_Call{Call: _e.mock.On("GetStagingData", qf)} -} - -func (_c *MockIStagingStorage_GetStagingData_Call) Run(run func(qf storage.QueryFilter)) *MockIStagingStorage_GetStagingData_Call { - _c.Call.Run(func(args mock.Arguments) { - run(args[0].(storage.QueryFilter)) - }) - return _c -} - -func (_c *MockIStagingStorage_GetStagingData_Call) Return(data []common.BlockData, err error) *MockIStagingStorage_GetStagingData_Call { - _c.Call.Return(data, err) - return _c -} - -func (_c *MockIStagingStorage_GetStagingData_Call) RunAndReturn(run func(storage.QueryFilter) ([]common.BlockData, error)) *MockIStagingStorage_GetStagingData_Call { - _c.Call.Return(run) - return _c -} - -// GetStagingDataBlockRange provides a mock function with given fields: chainId -func (_m *MockIStagingStorage) GetStagingDataBlockRange(chainId *big.Int) (*big.Int, *big.Int, error) { - ret := _m.Called(chainId) - - if len(ret) == 0 { - panic("no return value specified for GetStagingDataBlockRange") - } - - var r0 *big.Int - var r1 *big.Int - var r2 error - if rf, ok := ret.Get(0).(func(*big.Int) (*big.Int, *big.Int, error)); ok { - return rf(chainId) - } - if rf, ok := ret.Get(0).(func(*big.Int) *big.Int); ok { - r0 = rf(chainId) - } else { - if ret.Get(0) != nil { - r0 = ret.Get(0).(*big.Int) - } - } - - if rf, ok := ret.Get(1).(func(*big.Int) *big.Int); ok { - r1 = rf(chainId) - } else { - if ret.Get(1) != nil { - r1 = ret.Get(1).(*big.Int) - } - } - - if rf, ok := ret.Get(2).(func(*big.Int) error); ok { - r2 = rf(chainId) - } else { - r2 = ret.Error(2) - } - - return r0, r1, r2 -} - -// MockIStagingStorage_GetStagingDataBlockRange_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'GetStagingDataBlockRange' -type MockIStagingStorage_GetStagingDataBlockRange_Call struct { - *mock.Call -} - -// GetStagingDataBlockRange is a helper method to define mock.On call -// - chainId *big.Int -func (_e *MockIStagingStorage_Expecter) GetStagingDataBlockRange(chainId interface{}) *MockIStagingStorage_GetStagingDataBlockRange_Call { - return &MockIStagingStorage_GetStagingDataBlockRange_Call{Call: _e.mock.On("GetStagingDataBlockRange", chainId)} -} - -func (_c *MockIStagingStorage_GetStagingDataBlockRange_Call) Run(run func(chainId *big.Int)) *MockIStagingStorage_GetStagingDataBlockRange_Call { - _c.Call.Run(func(args mock.Arguments) { - run(args[0].(*big.Int)) - }) - return _c -} - -func (_c *MockIStagingStorage_GetStagingDataBlockRange_Call) Return(minBlockNumber *big.Int, maxBlockNumber *big.Int, err error) *MockIStagingStorage_GetStagingDataBlockRange_Call { - _c.Call.Return(minBlockNumber, maxBlockNumber, err) - return _c -} - -func (_c *MockIStagingStorage_GetStagingDataBlockRange_Call) RunAndReturn(run func(*big.Int) (*big.Int, *big.Int, error)) *MockIStagingStorage_GetStagingDataBlockRange_Call { - _c.Call.Return(run) - return _c -} - -// InsertStagingData provides a mock function with given fields: data -func (_m *MockIStagingStorage) InsertStagingData(data []common.BlockData) error { - ret := _m.Called(data) - - if len(ret) == 0 { - panic("no return value specified for InsertStagingData") - } - - var r0 error - if rf, ok := ret.Get(0).(func([]common.BlockData) error); ok { - r0 = rf(data) - } else { - r0 = ret.Error(0) - } - - return r0 -} - -// MockIStagingStorage_InsertStagingData_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'InsertStagingData' -type MockIStagingStorage_InsertStagingData_Call struct { - *mock.Call -} - -// InsertStagingData is a helper method to define mock.On call -// - data []common.BlockData -func (_e *MockIStagingStorage_Expecter) InsertStagingData(data interface{}) *MockIStagingStorage_InsertStagingData_Call { - return &MockIStagingStorage_InsertStagingData_Call{Call: _e.mock.On("InsertStagingData", data)} -} - -func (_c *MockIStagingStorage_InsertStagingData_Call) Run(run func(data []common.BlockData)) *MockIStagingStorage_InsertStagingData_Call { - _c.Call.Run(func(args mock.Arguments) { - run(args[0].([]common.BlockData)) - }) - return _c -} - -func (_c *MockIStagingStorage_InsertStagingData_Call) Return(_a0 error) *MockIStagingStorage_InsertStagingData_Call { - _c.Call.Return(_a0) - return _c -} - -func (_c *MockIStagingStorage_InsertStagingData_Call) RunAndReturn(run func([]common.BlockData) error) *MockIStagingStorage_InsertStagingData_Call { - _c.Call.Return(run) - return _c -} - -// NewMockIStagingStorage creates a new instance of MockIStagingStorage. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations. -// The first argument is typically a *testing.T value. -func NewMockIStagingStorage(t interface { - mock.TestingT - Cleanup(func()) -}) *MockIStagingStorage { - mock := &MockIStagingStorage{} - mock.Mock.Test(t) - - t.Cleanup(func() { mock.AssertExpectations(t) }) - - return mock -} From 69c2d064db739deb81ffa57e61ec7b3773ab7f92 Mon Sep 17 00:00:00 2001 From: nischit Date: Mon, 27 Oct 2025 14:05:45 +0545 Subject: [PATCH 4/4] minor change --- docker-compose.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index 17f1726e..4eb6026a 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -6,8 +6,6 @@ services: volumes: - redis_data:/data command: redis-server --appendonly yes - profiles: - - redis # PostgreSQL database postgres: