# Session-Based Trader Feature Extraction

This notebook extracts trader features from 1-2 hour trading sessions for real-time trader classification.
Designed to work with limited data available during live inference.

In [4]:
import sys
sys.path.append('/Users/noel/projects/trading_eda/solana')

from solana_eda_utils import SolanaDataAnalyzer, format_large_number, truncate_address
import pandas as pd
import numpy as np
import duckdb
from datetime import datetime, timedelta
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Initialize analyzer
analyzer = SolanaDataAnalyzer()
print("Database connection established")

Connected to database: /Volumes/Extreme SSD/DuckDB/solana.duckdb
Database connection established


## 1. Data Exploration - Understanding Available Time Windows

In [5]:
# Get overview of data availability
summary = analyzer.get_summary_stats()
print("Dataset Summary:")
for col, val in summary.iloc[0].items():
    if 'timestamp' in col:
        print(f"{col}: {val}")
    else:
        print(f"{col}: {format_large_number(val) if isinstance(val, (int, float)) else val}")

Dataset Summary:
total_trades: 325171663
unique_traders: 10060971
unique_tokens: 5877
unique_from_tokens: 6505
unique_to_tokens: 6293
successful_trades: 325.2M
earliest_trade: 2021-03-03 17:52:54-05:00
latest_trade: 2025-06-15 19:59:46-04:00
avg_from_amount: 3.0B
avg_to_amount: 3.2B
median_from_amount: 6
p95_from_amount: 4.8M


In [8]:
# Create table with first 2 hours of trading for each coin
query = """
       CREATE OR REPLACE TABLE coin_first_two_hours AS
       WITH coin_start_times AS (
           SELECT 
               mint,
               MIN(block_timestamp) as first_trade_time
           FROM first_day_trades
           WHERE succeeded = true
           GROUP BY mint
       )
       SELECT 
           t.*
       FROM first_day_trades t
       JOIN coin_start_times s ON t.mint = s.mint
       WHERE t.succeeded = true
       AND t.block_timestamp >= s.first_trade_time
       AND t.block_timestamp < s.first_trade_time + INTERVAL 2 HOURS
       """

       # Close read-only connection and create writable one
analyzer.close()
import duckdb
con = duckdb.connect('/Volumes/Extreme SSD/DuckDB/solana.duckdb')
con.execute(query)
print('Table coin_first_two_hours created successfully')

       # Verify the table
count = con.execute('SELECT COUNT(*) as count FROM coin_first_two_hours').fetchdf()
print(f'Total trades in first 2 hours: {count.iloc[0]["count"]:,}')

coins = con.execute('SELECT COUNT(DISTINCT mint) as coins FROM coin_first_two_hours').fetchdf()
print(f'Total coins: {coins.iloc[0]["coins"]:,}')

con.close()

Table coin_first_two_hours created successfully
Total trades in first 2 hours: 133,394,160
Total coins: 5,877


In [9]:
# 1. TRADING ACTIVITY FEATURES
# Extract basic trading activity metrics for each trader-coin combination

activity_features_query = """
WITH coin_start_times AS (
    SELECT 
        mint,
        MIN(block_timestamp) as coin_launch_time
    FROM coin_first_two_hours
    GROUP BY mint
),
trader_activity AS (
    SELECT 
        t.swapper as trader_id,
        t.mint as coin_id,
        s.coin_launch_time,
        COUNT(*) as trade_count,
        COUNT(*) / 2.0 as trades_per_hour,
        EXTRACT(EPOCH FROM (MAX(t.block_timestamp) - MIN(t.block_timestamp))) / 60.0 as time_span_minutes,
        EXTRACT(EPOCH FROM (MIN(t.block_timestamp) - s.coin_launch_time)) / 60.0 as early_entry_minutes
    FROM coin_first_two_hours t
    JOIN coin_start_times s ON t.mint = s.mint
    GROUP BY t.swapper, t.mint, s.coin_launch_time
)
SELECT * FROM trader_activity
LIMIT 10
"""

# Reconnect to database
import duckdb
con = duckdb.connect('/Volumes/Extreme SSD/DuckDB/solana.duckdb', read_only=True)

activity_features = con.execute(activity_features_query).fetchdf()
print("Trading Activity Features Sample:")
print(activity_features)

con.close()

Trading Activity Features Sample:
                                      trader_id  \
0  4ZybS8a76riwZPF8dZrxUAi55M4UeULx8uMKCtyqDBkA   
1  83YcNPvgUneq5F2tCd6Lo9THUhK3F8gySboEcf7xrwWF   
2  Ft2odmM3kxC6uVCeMpRAGZR3haqCr1n98PEmX31ZAuvc   
3  DgEAQNicTGBvQqSqLoURSv5F2k9fxaGzmf2k86svCCyM   
4  5rgbnftRaNhSmMsUo54a5oLiZCwtnnGJjyLRhkvLcVWc   
5  3XnsqASTxPUtPCRD5FE9CyLwsW5a4GhUQkmcspGvw9Yg   
6  97QcDwuZPmiY4QrZA4k3nrkFQ6FxEbFwqrcR2pxeZ8zy   
7  CohiCsuW71qHxvN8wpcwy6PLctawbnj7shgjHcY4wKgL   
8  7PFgypbCnordYaMuEWhesAbGyEGjAf83GM3w39BvF2kE   
9  6fXJHi6QEGJLAuYHsKLJBiLbQS96vgqXwtCrXDyL9HDN   

                                        coin_id          coin_launch_time  \
0  2fUFhZyd47Mapv9wcfXh5gnQwFXtqcYu9xAN4THBpump 2024-06-07 10:09:05-04:00   
1  2fUFhZyd47Mapv9wcfXh5gnQwFXtqcYu9xAN4THBpump 2024-06-07 10:09:05-04:00   
2  2fUFhZyd47Mapv9wcfXh5gnQwFXtqcYu9xAN4THBpump 2024-06-07 10:09:05-04:00   
3  2fUFhZyd47Mapv9wcfXh5gnQwFXtqcYu9xAN4THBpump 2024-06-07 10:09:05-04:00   
4  2fUFhZyd47Mapv9w

In [10]:
# 2. VOLUME & SIZE FEATURES
# Extract trading volume and transaction size patterns

volume_features_query = """
WITH trader_volume AS (
    SELECT 
        swapper as trader_id,
        mint as coin_id,
        SUM(swap_from_amount) as total_volume_traded,
        AVG(swap_from_amount) as avg_trade_size,
        STDDEV(swap_from_amount) as trade_size_std,
        MAX(swap_from_amount) as largest_trade_size,
        COUNT(*) as trade_count
    FROM coin_first_two_hours
    GROUP BY swapper, mint
),
volume_features AS (
    SELECT 
        trader_id,
        coin_id,
        total_volume_traded,
        avg_trade_size,
        CASE 
            WHEN avg_trade_size > 0 THEN trade_size_std / avg_trade_size 
            ELSE 0 
        END as trade_size_cv,
        largest_trade_size,
        CASE 
            WHEN total_volume_traded > 0 THEN largest_trade_size / total_volume_traded 
            ELSE 0 
        END as volume_concentration
    FROM trader_volume
)
SELECT * FROM volume_features
LIMIT 10
"""

# Reconnect to database
import duckdb
con = duckdb.connect('/Volumes/Extreme SSD/DuckDB/solana.duckdb', read_only=True)

volume_features = con.execute(volume_features_query).fetchdf()
print("Volume & Size Features Sample:")
print(volume_features)

con.close()

Volume & Size Features Sample:
                                      trader_id  \
0  55JjnuaDpxcd1cTzdFDM9kd2zkiZQH9sfhwS2U2FXAYb   
1  5vKZfJdhuCsV8Q17thNTPNybDVE2dYx7eGu6baviwtF6   
2  E5wsaQBv1bp1F1U1nwd4zq4Q8KfSUCpmBkYvgvFGvfR9   
3  H5KSe19MC12y3fNNoTBsHS3zdmXKMHtCtHeZVYSrQFpf   
4  3WbkskdttEH4Buthmx8PbG1GJiAEJX9uLMPkHo5gEr5Z   
5  HnfoKqejN9QEuZuyE2GhxmDAdAo5nfgnzu9rGPwcuHcJ   
6  5C6iqVsA19nphVfhtfCUSq1DEHNfjSGTj7BBnjdoFgZd   
7  6wHv5xjDQJTW39ahke8BwGnJ2YLgVQj8vQ4g9JptkVUb   
8  6biF3LYbc4m16JauRd5aRT1TxjqmRnBYc6Ek5fqhid2U   
9  7EpxXv7hrbcXng9gyHG8RuaHav1H71jgHRPE2aiWb5Fh   

                                        coin_id  total_volume_traded  \
0  BDW8YHasD3NSDjSHU9Xy6KXtshGayMGQfj5bJpLcpump        122697.479342   
1  BDW8YHasD3NSDjSHU9Xy6KXtshGayMGQfj5bJpLcpump         23208.885733   
2  BDW8YHasD3NSDjSHU9Xy6KXtshGayMGQfj5bJpLcpump             0.048000   
3  BDW8YHasD3NSDjSHU9Xy6KXtshGayMGQfj5bJpLcpump          4463.119687   
4  BDW8YHasD3NSDjSHU9Xy6KXtshGayMGQfj5bJpLcpump

In [13]:
# 3. DIVERSIFICATION FEATURES (REVISED)
# Extract meaningful trading diversification patterns

diversification_features_query = """
WITH trader_patterns AS (
    SELECT 
        swapper as trader_id,
        mint as coin_id,
        COUNT(DISTINCT CONCAT(swap_from_mint, '|', swap_to_mint)) as unique_trading_pairs,
        COUNT(*) as total_trades,
        SUM(CASE WHEN swap_from_mint = 'So11111111111111111111111111111111111111112' 
                      OR swap_to_mint = 'So11111111111111111111111111111111111111112' THEN 1 ELSE 0 END) as sol_trades,
        SUM(CASE WHEN swap_to_mint = mint THEN 1 ELSE 0 END) as buy_trades
    FROM coin_first_two_hours
    GROUP BY swapper, mint
),
diversification_features AS (
    SELECT 
        trader_id,
        coin_id,
        unique_trading_pairs,
        CAST(sol_trades AS FLOAT) / total_trades as sol_involvement_ratio,
        CAST(buy_trades AS FLOAT) / total_trades as buy_ratio
    FROM trader_patterns
)
SELECT * FROM diversification_features
LIMIT 10
"""

# Reconnect to database  
import duckdb
con = duckdb.connect('/Volumes/Extreme SSD/DuckDB/solana.duckdb', read_only=True)

diversification_features = con.execute(diversification_features_query).fetchdf()
print("Diversification Features Sample (Revised):")
print(diversification_features)

con.close()

Diversification Features Sample (Revised):
                                      trader_id  \
0  J8bA3YDwbDxBgK1qkh8oyaiHVdQUX3hfosXrKK3Npicp   
1  9jEDfvjgaHGNRRnxyRHoMPwi1woF1uMHWKLUhGFhFttM   
2  AUJ6gJFtq7vZ2B51DM2PScVDyfafCiTdDa8he6sW4MuZ   
3  5u3QzctsgcBir5napgcWi4K2631oDFaXr55ps9FasvFt   
4   G51QnvQNDr1SfxhcqmNoBrYUXgSJ4ekf3CTS23LNFnz   
5  GxgdtVfogoZuG1PL4YKMtzxe5qQ3ZT2Vp8BwzL9wYrCr   
6  63EbUyj9a4K86NimRCdpFzobzDUKjAZHa18raKvyqdsA   
7  4BkPRYFLLMt2tanhW4oEi7cxGmQvc1UTkJWJ88fNKSq1   
8   GfAfvkc3CVFcu3aBGTGQqVVFwQkhiEHZZcMHHRSnvUV   
9  7AxeNytDUKXTSXixPMwtgR25V1bAudHigVVQmxJfcAZU   

                                        coin_id  unique_trading_pairs  \
0  3MadWqcN9cSrULn8ikDnan9mF3znoQmBPXtVy6BfSTDB                     2   
1  3MadWqcN9cSrULn8ikDnan9mF3znoQmBPXtVy6BfSTDB                     2   
2  3MadWqcN9cSrULn8ikDnan9mF3znoQmBPXtVy6BfSTDB                     2   
3  3MadWqcN9cSrULn8ikDnan9mF3znoQmBPXtVy6BfSTDB                     2   
4  3MadWqcN9cSrULn8ikDnan9mF3z

In [17]:
# 4. BEHAVIORAL PATTERN FEATURES
# Extract bot detection and trading behavior patterns

behavioral_features_query = """
WITH trader_behavior AS (
    SELECT 
        swapper as trader_id,
        mint as coin_id,
        COUNT(*) as total_trades,
        SUM(CASE WHEN swap_from_amount IN (-- Small round amounts
                0.01, 0.05, 0.1, 0.2, 0.25, 0.5, 0.75,
                -- Whole numbers 1-10
                1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0,
                -- Common trading amounts
                15.0, 20.0, 25.0, 50.0, 75.0, 100.0,
                -- Larger round amounts  
                150.0, 200.0, 250.0, 500.0, 750.0, 1000.0,
                -- Very large round amounts
                1500.0, 2000.0, 2500.0, 5000.0, 10000.0) THEN 1 ELSE 0 END) as round_number_trades,
        COUNT(DISTINCT swap_from_amount) as unique_trade_sizes,
        COUNT(DISTINCT DATE_TRUNC('minute', block_timestamp)) as unique_minutes_active,
        EXTRACT(EPOCH FROM (MAX(block_timestamp) - MIN(block_timestamp))) as total_time_span_seconds
    FROM coin_first_two_hours
    GROUP BY swapper, mint
),
behavioral_features AS (
    SELECT 
        trader_id,
        coin_id,
        total_trades,
        CAST(round_number_trades AS FLOAT) / total_trades as round_number_ratio,
        CAST(unique_trade_sizes AS FLOAT) / total_trades as trade_size_diversity,
        CASE 
                   WHEN total_trades > 1 AND total_time_span_seconds > 0 
                   THEN total_time_span_seconds / (total_trades - 1)
                   ELSE 0 
               END as avg_seconds_between_trades
    FROM trader_behavior
)
SELECT * FROM behavioral_features
LIMIT 10
"""

# Reconnect to database
import duckdb
con = duckdb.connect('/Volumes/Extreme SSD/DuckDB/solana.duckdb', read_only=True)

behavioral_features = con.execute(behavioral_features_query).fetchdf()
print("Behavioral Pattern Features Sample:")
print(behavioral_features)

con.close()

Behavioral Pattern Features Sample:
                                      trader_id  \
0  CVyXpJN1X3UQpgtV2YvmSkcSK4YLZHEY1ycRHcz7eiid   
1  3b211gSn6JFaHBNuXPttKG2bRp9CpksDvo5PAbhgUgeJ   
2  5rx2YDT3YZTNpJS93F3rxAjySSRfVpMQ1pgUoR12xny9   
3  BMkU6Nfs71uXy2t8dwSEnGJpwxeMJpA48S87jg4sexzC   
4  AqtBqGjum94wyWEZUxYMHeLidQTXypKpytryVELs4zPr   
5  ACCUigPFKy4oU7gvLBSgDCrFcSn4atffKuWiHvrWGCY3   
6  EAdBeyoeMJGcmC4o29L93q1qR2UJssGQj2XkyUJQc3T2   
7  HDYLWZm2B8PJtjMvdHa5tzGTPG77vZCGsiipLYNxstiY   
8  3hz1UuMpWfWPSKjvFUjAf92DxcQsVNx2wHgdftngfz3U   
9  AqFTWUu5h2ib5PTeGuUQqXefgvU6oRqqyy5wzvPMMDMb   

                                        coin_id  total_trades  \
0  3MadWqcN9cSrULn8ikDnan9mF3znoQmBPXtVy6BfSTDB           140   
1  3MadWqcN9cSrULn8ikDnan9mF3znoQmBPXtVy6BfSTDB             4   
2  3MadWqcN9cSrULn8ikDnan9mF3znoQmBPXtVy6BfSTDB            73   
3  3MadWqcN9cSrULn8ikDnan9mF3znoQmBPXtVy6BfSTDB            12   
4  3MadWqcN9cSrULn8ikDnan9mF3znoQmBPXtVy6BfSTDB            58   
5  3MadWqcN9

In [21]:
# 5. TIMING PATTERN FEATURES (SIMPLIFIED)
# Extract when trader first entered relative to coin launch

timing_features_query = """
WITH coin_start_times AS (
    SELECT 
        mint,
        MIN(block_timestamp) as coin_launch_time
    FROM coin_first_two_hours
    GROUP BY mint
),
timing_features AS (
    SELECT 
        t.swapper as trader_id,
        t.mint as coin_id,
        MIN(EXTRACT(EPOCH FROM t.block_timestamp - s.coin_launch_time) / 60.0) as first_trade_minutes
    FROM coin_first_two_hours t
    JOIN coin_start_times s ON t.mint = s.mint
    GROUP BY t.swapper, t.mint
)
SELECT * FROM timing_features
LIMIT 10
"""

# Reconnect to database
import duckdb
con = duckdb.connect('/Volumes/Extreme SSD/DuckDB/solana.duckdb', read_only=True)

timing_features = con.execute(timing_features_query).fetchdf()
print("Timing Pattern Features Sample (Simplified):")
print(timing_features)

con.close()

Timing Pattern Features Sample (Simplified):
                                      trader_id  \
0  7cLdFe2w8fRBwBX1gwai75hEL4VHT4ReUDgjUgTRdhMA   
1  6u9paYroF9hyA223oXfeGfK1RTB5XyLAed13HN7Fvfme   
2  5wbJhCvBqz7bh6fUEQ3xL2bp6H9zTmJrxmgavEHB3mZm   
3  2baRCaz6zCcUyfaMrdwrm2XaAAzbzJovhZKju8WC7i7u   
4  7YNznAiZcjeQSAUkQuXvKnyTLViUBMrhNbJKKczFHaWR   
5  6mK9LA4Qwc8KmxeG6XR4S6TfonW9HT3QNtXPnrr7LGN7   
6  4o9V9xn5vgzkLXbW5wenMxueBE2QR8Nw4MigczZj96Ra   
7  DQyuUzxgn7451PzkDpyxz5shDTod2wD6Pfv79tHD1Nj7   
8  A7FMMgue4aZmPLLoutVtbC7gJcyqkHybUieiaDg9aaVE   
9  6i7YVSExzJUdZxmfrbBuwjrP6W5P2UvRpAYoY89ebhYz   

                                        coin_id  first_trade_minutes  
0  BDW8YHasD3NSDjSHU9Xy6KXtshGayMGQfj5bJpLcpump            16.433333  
1  BDW8YHasD3NSDjSHU9Xy6KXtshGayMGQfj5bJpLcpump            12.333333  
2  BDW8YHasD3NSDjSHU9Xy6KXtshGayMGQfj5bJpLcpump            17.250000  
3  BDW8YHasD3NSDjSHU9Xy6KXtshGayMGQfj5bJpLcpump            10.583333  
4  BDW8YHasD3NSDjSHU9Xy6KXtshGayMGQfj5