# Solana and Ethereum Liquidity Pool Analysis

This notebook fetches and analyzes liquidity pool data from decentralized exchanges (DEXes) on Solana and Ethereum blockchains using the GeckoTerminal API. It allows interactive selection of chains, DEXes, and filtering parameters such as TVL and volume ranges.

## Setup
Install dependencies and import required modules.

In [None]:
import pandas as pd
import ipywidgets as widgets
from IPython.display import display, HTML
import matplotlib.pyplot as plt
from src.clients.geckoterminal import GeckoTerminalClient
from src.constants import Network, NETWORK_CONFIG
from src.utils.analyzer import Analyzer
from datetime import datetime, timedelta
import os
import glob
import numpy as np
import traceback

# Initialize styles for better output display
display(HTML('<style>.output {max-height: 500px; overflow-y: auto;}</style>'))

# Define available chains and DEXes
chains = [Network.SOLANA.value, Network.ETHEREUM.value, Network.BSC.value, Network.POLYGONPOS.value,
          Network.ARBITRUM.value, Network.SUI.value, Network.BASE.value]
dexes_by_chain = {
    chain: NETWORK_CONFIG[chain]['dexes'] for chain in chains
}

# Create widgets with multiple selection
chain_dropdown = widgets.SelectMultiple(
    options=chains, 
    description='Chains:', 
    value=[chains[0]],
    rows=5
)
dex_dropdown = widgets.SelectMultiple(
    options=dexes_by_chain[chains[0]], 
    description='DEXes:', 
    value=[],
    rows=5
)
min_tvl = widgets.FloatText(value=10000, description='Min TVL ($):')
max_tvl = widgets.FloatText(value=10000000000.0, description='Max TVL ($):')
min_volume = widgets.FloatText(value=5000, description='Min Volume ($):')
no_pivots = widgets.Checkbox(value=False, description='Exclude Pivot Tokens')
no_stables = widgets.Checkbox(value=True, description='Exclude Stablecoins')
utility_pairs = widgets.Checkbox(value=False, description='Utility Pairs Only')
force_redownload = widgets.Checkbox(value=False, description='Force Re-Download Data')
fetch_button = widgets.Button(description='Fetch Pools', button_style='success')
output = widgets.Output()

# Update DEX dropdown based on chain selection (for multiple chains, show union of DEXes)
def update_dex_dropdown(change):
    selected_chains = change['new']
    all_dexes = set()
    for chain in selected_chains:
        all_dexes.update(dexes_by_chain.get(chain, []))
    dex_dropdown.options = sorted(all_dexes)
chain_dropdown.observe(update_dex_dropdown, names='value')

# Display widgets
display(widgets.VBox([
    chain_dropdown, dex_dropdown, min_tvl, max_tvl, min_volume,
    no_pivots, no_stables, utility_pairs, force_redownload, fetch_button, output
]))

# Define fetch_pools function
def fetch_pools(b):
    with output:
        output.clear_output()
        print('Starting fetch_pools function...')
        try:
            selected_chains = list(chain_dropdown.value)
            selected_dexes = set(dex_dropdown.value)  # Use set for faster lookup
            
            if not selected_chains:
                print("No chains selected.")
                return
            
            all_pools = []
            for chain in selected_chains:
                print(f"Processing chain: {chain}")
                chain_dexes = dexes_by_chain.get(chain, [])
                
                # If no specific DEXes selected, fetch from all DEXes for the chain
                dexes_to_fetch = [dex for dex in chain_dexes if dex in selected_dexes] if selected_dexes else chain_dexes
                
                if not dexes_to_fetch:
                    print(f"No DEXes to fetch for chain {chain}. Skipping.")
                    continue
                
                for dex_name in dexes_to_fetch:
                    print(f"Fetching from DEX: {dex_name} on chain {chain}")
                    
                    # Initialize client with DEX name as string
                    client = GeckoTerminalClient(
                        network=chain,
                        dex=dex_name
                    )
                    
                    # Fetch pools
                    pools = client.fetch_liquidity_pools(
                        all_pages=True,
                        min_tvl=min_tvl.value,
                        max_tvl=max_tvl.value,
                        min_volume=min_volume.value,
                        no_pivots=no_pivots.value,
                        no_stables=no_stables.value,
                        utility_pairs=utility_pairs.value
                    )
                    
                    # Filter pools that are at least 2 weeks old and save data
                    filtered_pools = []
                    for pool in pools:
                        pool.chain = chain
                        pool.dex = dex_name
                        
                        # Create data folder structure
                        pair = f'{pool.token0_symbol}_{pool.token1_symbol}'
                        data_folder = os.path.join('data', chain, dex_name, pair)
                        os.makedirs(data_folder, exist_ok=True)
                        
                        base_usd_path = os.path.join(data_folder, 'base_usd.csv')
                        quote_usd_path = os.path.join(data_folder, 'quote_usd.csv')
                        cross_path = os.path.join(data_folder, 'cross.csv')
                        
                        # Check if data exists and not forcing re-download
                        if not force_redownload.value and os.path.exists(base_usd_path) and os.path.exists(quote_usd_path) and os.path.exists(cross_path):
                            print(f"Loading existing data for {pair} from {data_folder}")
                            base_df = pd.read_csv(base_usd_path)
                            quote_df = pd.read_csv(quote_usd_path)
                            cross_df = pd.read_csv(cross_path)
                        else:
                            print(f"Fetching data for {pair}")
                            # Fetch USD bars for base token
                            base_usd_bars = client.get_price_bars(
                                pool_address=pool.address,
                                timeframe='hour',
                                aggregate=1,
                                currency='usd',
                                token='base',
                                limit=1000
                            )
                            
                            # Fetch USD bars for quote token
                            quote_usd_bars = client.get_price_bars(
                                pool_address=pool.address,
                                timeframe='hour',
                                aggregate=1,
                                currency='usd',
                                token='quote',
                                limit=1000
                            )
                            
                            # Fetch cross price bars (base in quote token)
                            cross_bars = client.get_price_bars(
                                pool_address=pool.address,
                                timeframe='hour',
                                aggregate=1,
                                currency='token',
                                token='base',
                                limit=1000
                            )
                            
                            if base_usd_bars and quote_usd_bars and cross_bars:
                                base_df = base_usd_bars.data
                                quote_df = quote_usd_bars.data
                                cross_df = cross_bars.data
                                
                                # Save to CSV
                                base_df.to_csv(base_usd_path, index=False)
                                quote_df.to_csv(quote_usd_path, index=False)
                                cross_df.to_csv(cross_path, index=False)
                                print(f"Saved data for {pair} to {data_folder}")
                            else:
                                print(f"Failed to fetch data for {pair}. Skipping.")
                                continue
                        
                        # Check pool age (at least 2 weeks old)
                        # Assuming timeframe='hour', aggregate=1 (1 hour intervals), check if data spans at least 336 hours (14 days * 24 hours)
                        if len(cross_df) < 336:  # Minimum bars for 2 weeks at 1h interval
                            print(f"Pool {pair} is too young (less than 2 weeks of data). Discarding.")
                            continue
                        
                        # Calculate time span
                        time_span_hours = (cross_df['timestamp'].max() - cross_df['timestamp'].min()).total_seconds() / 3600
                        if time_span_hours < 336:
                            print(f"Pool {pair} data spans only {time_span_hours:.2f} hours (< 2 weeks). Discarding.")
                            continue
                        
                        filtered_pools.append(pool)
                        print(f"Added pool {pair} (age OK)")
                    
                    all_pools.extend(filtered_pools)
                    print(f"Filtered {len(filtered_pools)} pools (age >= 2 weeks) from {dex_name} on {chain}")
            
            if not all_pools:
                print('No pools found matching the criteria.')
                return
            
            print(f"Total filtered pools: {len(all_pools)}")
            # Convert to DataFrame with added chain and DEX columns
            df = pd.DataFrame([
                {
                    'Chain': pool.chain,
                    'DEX': pool.dex,
                    'Pair': f'{pool.token0_symbol}/{pool.token1_symbol}',
                    'TVL': pool.tvl,
                    'Volume': pool.volume,
                    'Address': pool.address
                } for pool in all_pools
            ])
            
            # Display results
            print(f'Displaying {len(all_pools)} pools:')
            display(df)
            
            # Save to CSV
            output_file = f'pools_multi_{datetime.now().strftime("%Y%m%d_%H%M%S")}.csv'
            df.to_csv(output_file, index=False)
            print(f'Saved results to {output_file}')
            
            # Visualize TVL and Volume
            plt.figure(figsize=(10, 6))
            plt.subplot(1, 2, 1)
            plt.hist(df['TVL'], bins=20, color='blue', alpha=0.7)
            plt.title('TVL Distribution')
            plt.xlabel('TVL ($)')
            plt.ylabel('Count')
            plt.subplot(1, 2, 2)
            plt.hist(df['Volume'], bins=20, color='green', alpha=0.7)
            plt.title('Volume Distribution')
            plt.xlabel('Volume ($)')
            plt.ylabel('Count')
            plt.tight_layout()
            plt.show()
        
        except Exception as e:
            print(f'Error fetching pools: {e}')
            traceback.print_exc()

# Register button click handler
fetch_button.on_click(fetch_pools)

# Widgets for price correlation analysis
pool_select = widgets.SelectMultiple(
    options=[],
    description='Select Pools:',
    rows=10
)
days = widgets.IntSlider(value=7, min=1, max=30, description='Days:')
use_all_pools_checkbox = widgets.Checkbox(value=True, description='Use All Pools for Correlations')
analyze_button = widgets.Button(description='Analyze Correlations', button_style='info')
corr_output = widgets.Output()
stats_output = widgets.Output()

# Display correlation widgets
display(widgets.VBox([use_all_pools_checkbox, pool_select, days, analyze_button, corr_output, stats_output]))

# Update pool options and calculate stats after fetching
def update_pool_options_and_calculate_stats(change):
    with output:
        print("Updating pool options and calculating stats...")
        try:
            # Look for the latest CSV
            csv_files = glob.glob('pools_multi_*.csv')
            if not csv_files:
                print("No pools CSV found. Please fetch pools first.")
                return
            latest_csv = max(csv_files, key=os.path.getctime)
            df = pd.read_csv(latest_csv)
            pool_select.options = [f'{row["Chain"]}/{row["DEX"]}/{row["Pair"]} ({row["Address"][:8]}...)' for _, row in df.iterrows()]
            print(f"Loaded {len(pool_select.options)} pools from {latest_csv} for selection.")
            
            # Calculate stats for all pools using saved data
            with stats_output:
                stats_output.clear_output()
                print("Calculating metrics for all pools...")
                metrics_list = []
                for _, row in df.iterrows():
                    chain = row['Chain']
                    dex_name = row['DEX']
                    address = row['Address']
                    tvl = row['TVL']
                    pair = row['Pair']
                    token0_symbol, token1_symbol = pair.split('/')
                    
                    # Load from data folder
                    data_folder = os.path.join('data', chain, dex_name, f'{token0_symbol}_{token1_symbol}')
                    base_usd_path = os.path.join(data_folder, 'base_usd.csv')
                    quote_usd_path = os.path.join(data_folder, 'quote_usd.csv')
                    cross_path = os.path.join(data_folder, 'cross.csv')
                    
                    if os.path.exists(base_usd_path) and os.path.exists(quote_usd_path) and os.path.exists(cross_path):
                        base_df = pd.read_csv(base_usd_path)
                        quote_df = pd.read_csv(quote_usd_path)
                        cross_df = pd.read_csv(cross_path)
                        
                        metrics = calculate_pool_metrics(
                            tvl=tvl,
                            base_df=base_df,
                            cross_price_df=cross_df,
                            days_interval=days.value
                        )
                        metrics['Chain'] = chain
                        metrics['DEX'] = dex_name
                        metrics['Pair'] = pair
                        metrics['Address'] = address
                        metrics['TVL'] = tvl
                        metrics_list.append(metrics)
                    else:
                        print(f"Data not found for {pair} on {chain}/{dex_name}. Skipping metrics.")
                
                if metrics_list:
                    stats_df = pd.DataFrame(metrics_list)
                    print("Pool Metrics:")
                    display(stats_df)
                else:
                    print("No metrics calculated.")
        except Exception as e:
            print(f"Error updating pool options or calculating stats: {e}")
            traceback.print_exc()

fetch_button.on_click(update_pool_options_and_calculate_stats)

# Analyze correlations
def analyze_correlations(b):
    with corr_output:
        corr_output.clear_output()
        print("Starting correlation analysis...")
        selected_pools = pool_select.value if not use_all_pools_checkbox.value else pool_select.options
        
        if not selected_pools:
            print('Please select at least one pool or check "Use All Pools".')
            return
        
        try:
            data_list = []
            for pool_str in selected_pools:
                print(f"Processing pool: {pool_str}")
                # Parse chain, DEX, pair, address from the option string
                parts = pool_str.split('/')
                if len(parts) < 3:
                    print(f"Invalid pool format: {pool_str}. Skipping.")
                    continue
                chain = parts[0]
                dex_name = parts[1]
                pair_address_str = '/'.join(parts[2:])
                pair = pair_address_str.split('(')[0].strip()
                address = pair_address_str.split('(')[1][:-1].split('...')[0]
                
                token_symbol = pair.split('/')[0]  # Assume first token for simplicity
                
                # Load from data folder for cross prices (for correlation, we use close prices)
                token0_symbol, token1_symbol = pair.split('/')
                data_folder = os.path.join('data', chain, dex_name, f'{token0_symbol}_{token1_symbol}')
                cross_path = os.path.join(data_folder, 'cross.csv')
                
                if os.path.exists(cross_path):
                    cross_df = pd.read_csv(cross_path)
                    data_list.append((token_symbol, cross_df))
                    print(f"Loaded {len(cross_df)} bars for {token_symbol} from saved data.")
                else:
                    print(f"Cross data not found for {pair}. Skipping.")
            
            if not data_list:
                print('No price data available for selected pools.')
                return
            
            corr_matrix = Analyzer.compute_correlation_matrix_from_dataframes(data_list)
            if not corr_matrix.empty:
                print('Correlation Matrix:')
                display(corr_matrix)
                
                # Visualize correlation matrix
                plt.figure(figsize=(8, 6))
                plt.imshow(corr_matrix, cmap='coolwarm', interpolation='nearest')
                plt.colorbar(label='Correlation')
                plt.xticks(range(len(corr_matrix)), corr_matrix.columns, rotation=45)
                plt.yticks(range(len(corr_matrix)), corr_matrix.index)
                plt.title('Price Correlation Matrix')
                plt.tight_layout()
                plt.show()
            else:
                print('No correlation data available.')
        except Exception as e:
            print(f'Error analyzing correlations: {e}')
            traceback.print_exc()

analyze_button.on_click(analyze_correlations)

# Automatically try to update pool options and calculate stats if CSV exists
update_pool_options_and_calculate_stats(None)

VBox(children=(SelectMultiple(description='Chains:', index=(0,), options=('solana', 'eth', 'bsc', 'polygon_pos…

VBox(children=(Checkbox(value=True, description='Use All Pools for Correlations'), SelectMultiple(description=…

## Configuration
Select the blockchain, DEX, and filtering parameters using interactive widgets.

In [None]:
import pandas as pd
import ipywidgets as widgets
from IPython.display import display, HTML
import matplotlib.pyplot as plt
from src.clients.geckoterminal import GeckoTerminalClient
from src.constants import Network, NETWORK_CONFIG
from src.utils.analyzer import Analyzer
from datetime import datetime, timedelta
import os
import glob
import numpy as np
import traceback

# Initialize styles for better output display
display(HTML('<style>.output {max-height: 500px; overflow-y: auto;}</style>'))

# Define available chains and DEXes
chains = [Network.SOLANA.value, Network.ETHEREUM.value, Network.BSC.value, Network.POLYGONPOS.value,
          Network.ARBITRUM.value, Network.SUI.value, Network.BASE.value]
dexes_by_chain = {
    chain: NETWORK_CONFIG[chain]['dexes'] for chain in chains
}

# Create widgets with multiple selection
chain_dropdown = widgets.SelectMultiple(
    options=chains, 
    description='Chains:', 
    value=[chains[0]],
    rows=5
)
dex_dropdown = widgets.SelectMultiple(
    options=dexes_by_chain[chains[0]], 
    description='DEXes:', 
    value=[],
    rows=5
)
min_tvl = widgets.FloatText(value=10000, description='Min TVL ($):')
max_tvl = widgets.FloatText(value=10000000000.0, description='Max TVL ($):')
min_volume = widgets.FloatText(value=5000, description='Min Volume ($):')
no_pivots = widgets.Checkbox(value=False, description='Exclude Pivot Tokens')
no_stables = widgets.Checkbox(value=True, description='Exclude Stablecoins')
utility_pairs = widgets.Checkbox(value=False, description='Utility Pairs Only')
force_redownload = widgets.Checkbox(value=False, description='Force Re-Download Data')
fetch_button = widgets.Button(description='Fetch Pools', button_style='success')
output = widgets.Output()

# Update DEX dropdown based on chain selection (for multiple chains, show union of DEXes)
def update_dex_dropdown(change):
    selected_chains = change['new']
    all_dexes = set()
    for chain in selected_chains:
        all_dexes.update(dexes_by_chain.get(chain, []))
    dex_dropdown.options = sorted(all_dexes)
chain_dropdown.observe(update_dex_dropdown, names='value')

# Display widgets
display(widgets.VBox([
    chain_dropdown, dex_dropdown, min_tvl, max_tvl, min_volume,
    no_pivots, no_stables, utility_pairs, force_redownload, fetch_button, output
]))

# Define fetch_pools function
def fetch_pools(b):
    with output:
        output.clear_output()
        print('Starting fetch_pools function...')
        try:
            selected_chains = list(chain_dropdown.value)
            selected_dexes = set(dex_dropdown.value)  # Use set for faster lookup
            
            if not selected_chains:
                print("No chains selected.")
                return
            
            all_pools = []
            for chain in selected_chains:
                print(f"Processing chain: {chain}")
                chain_dexes = dexes_by_chain.get(chain, [])
                
                # If no specific DEXes selected, fetch from all DEXes for the chain
                dexes_to_fetch = [dex for dex in chain_dexes if dex in selected_dexes] if selected_dexes else chain_dexes
                
                if not dexes_to_fetch:
                    print(f"No DEXes to fetch for chain {chain}. Skipping.")
                    continue
                
                for dex_name in dexes_to_fetch:
                    print(f"Fetching from DEX: {dex_name} on chain {chain}")
                    
                    # Initialize client with DEX name as string
                    client = GeckoTerminalClient(
                        network=chain,
                        dex=dex_name
                    )
                    
                    # Fetch pools
                    pools = client.fetch_liquidity_pools(
                        all_pages=True,
                        min_tvl=min_tvl.value,
                        max_tvl=max_tvl.value,
                        min_volume=min_volume.value,
                        no_pivots=no_pivots.value,
                        no_stables=no_stables.value,
                        utility_pairs=utility_pairs.value
                    )
                    
                    # Filter pools that are at least 2 weeks old and save data
                    filtered_pools = []
                    for pool in pools:
                        pool.chain = chain
                        pool.dex = dex_name
                        
                        # Create data folder structure
                        pair = f'{pool.token0_symbol}_{pool.token1_symbol}'
                        data_folder = os.path.join('data', chain, dex_name, pair)
                        os.makedirs(data_folder, exist_ok=True)
                        
                        base_usd_path = os.path.join(data_folder, 'base_usd.csv')
                        quote_usd_path = os.path.join(data_folder, 'quote_usd.csv')
                        cross_path = os.path.join(data_folder, 'cross.csv')
                        
                        # Check if data exists and not forcing re-download
                        if not force_redownload.value and os.path.exists(base_usd_path) and os.path.exists(quote_usd_path) and os.path.exists(cross_path):
                            print(f"Loading existing data for {pair} from {data_folder}")
                            base_df = pd.read_csv(base_usd_path)
                            quote_df = pd.read_csv(quote_usd_path)
                            cross_df = pd.read_csv(cross_path)
                        else:
                            print(f"Fetching data for {pair}")
                            # Fetch USD bars for base token
                            base_usd_bars = client.get_price_bars(
                                pool_address=pool.address,
                                timeframe='hour',
                                aggregate=1,
                                currency='usd',
                                token='base',
                                limit=1000
                            )
                            
                            # Fetch USD bars for quote token
                            quote_usd_bars = client.get_price_bars(
                                pool_address=pool.address,
                                timeframe='hour',
                                aggregate=1,
                                currency='usd',
                                token='quote',
                                limit=1000
                            )
                            
                            # Fetch cross price bars (base in quote token)
                            cross_bars = client.get_price_bars(
                                pool_address=pool.address,
                                timeframe='hour',
                                aggregate=1,
                                currency='token',
                                token='base',
                                limit=1000
                            )
                            
                            if base_usd_bars and quote_usd_bars and cross_bars:
                                base_df = base_usd_bars.data
                                quote_df = quote_usd_bars.data
                                cross_df = cross_bars.data
                                
                                # Save to CSV
                                base_df.to_csv(base_usd_path, index=False)
                                quote_df.to_csv(quote_usd_path, index=False)
                                cross_df.to_csv(cross_path, index=False)
                                print(f"Saved data for {pair} to {data_folder}")
                            else:
                                print(f"Failed to fetch data for {pair}. Skipping.")
                                continue
                        
                        # Check pool age (at least 2 weeks old)
                        # Assuming timeframe='hour', aggregate=1 (1 hour intervals), check if data spans at least 336 hours (14 days * 24 hours)
                        if len(cross_df) < 336:  # Minimum bars for 2 weeks at 1h interval
                            print(f"Pool {pair} is too young (less than 2 weeks of data). Discarding.")
                            continue
                        
                        # Calculate time span
                        time_span_hours = (cross_df['timestamp'].max() - cross_df['timestamp'].min()).total_seconds() / 3600
                        if time_span_hours < 336:
                            print(f"Pool {pair} data spans only {time_span_hours:.2f} hours (< 2 weeks). Discarding.")
                            continue
                        
                        filtered_pools.append(pool)
                        print(f"Added pool {pair} (age OK)")
                    
                    all_pools.extend(filtered_pools)
                    print(f"Filtered {len(filtered_pools)} pools (age >= 2 weeks) from {dex_name} on {chain}")
            
            if not all_pools:
                print('No pools found matching the criteria.')
                return
            
            print(f"Total filtered pools: {len(all_pools)}")
            # Convert to DataFrame with added chain and DEX columns
            df = pd.DataFrame([
                {
                    'Chain': pool.chain,
                    'DEX': pool.dex,
                    'Pair': f'{pool.token0_symbol}/{pool.token1_symbol}',
                    'TVL': pool.tvl,
                    'Volume': pool.volume,
                    'Address': pool.address
                } for pool in all_pools
            ])
            
            # Display results
            print(f'Displaying {len(all_pools)} pools:')
            display(df)
            
            # Save to CSV
            output_file = f'pools_multi_{datetime.now().strftime("%Y%m%d_%H%M%S")}.csv'
            df.to_csv(output_file, index=False)
            print(f'Saved results to {output_file}')
            
            # Visualize TVL and Volume
            plt.figure(figsize=(10, 6))
            plt.subplot(1, 2, 1)
            plt.hist(df['TVL'], bins=20, color='blue', alpha=0.7)
            plt.title('TVL Distribution')
            plt.xlabel('TVL ($)')
            plt.ylabel('Count')
            plt.subplot(1, 2, 2)
            plt.hist(df['Volume'], bins=20, color='green', alpha=0.7)
            plt.title('Volume Distribution')
            plt.xlabel('Volume ($)')
            plt.ylabel('Count')
            plt.tight_layout()
            plt.show()
        
        except Exception as e:
            print(f'Error fetching pools: {e}')
            traceback.print_exc()

# Register button click handler
fetch_button.on_click(fetch_pools)

# Widgets for price correlation analysis
pool_select = widgets.SelectMultiple(
    options=[],
    description='Select Pools:',
    rows=10
)
days = widgets.IntSlider(value=7, min=1, max=30, description='Days:')
use_all_pools_checkbox = widgets.Checkbox(value=True, description='Use All Pools for Correlations')
analyze_button = widgets.Button(description='Analyze Correlations', button_style='info')
corr_output = widgets.Output()
stats_output = widgets.Output()

# Display correlation widgets
display(widgets.VBox([use_all_pools_checkbox, pool_select, days, analyze_button, corr_output, stats_output]))

# Update pool options and calculate stats after fetching
def update_pool_options_and_calculate_stats(change):
    with output:
        print("Updating pool options and calculating stats...")
        try:
            # Look for the latest CSV
            csv_files = glob.glob('pools_multi_*.csv')
            if not csv_files:
                print("No pools CSV found. Please fetch pools first.")
                return
            latest_csv = max(csv_files, key=os.path.getctime)
            df = pd.read_csv(latest_csv)
            pool_select.options = [f'{row["Chain"]}/{row["DEX"]}/{row["Pair"]} ({row["Address"][:8]}...)' for _, row in df.iterrows()]
            print(f"Loaded {len(pool_select.options)} pools from {latest_csv} for selection.")
            
            # Calculate stats for all pools using saved data
            with stats_output:
                stats_output.clear_output()
                print("Calculating metrics for all pools...")
                metrics_list = []
                for _, row in df.iterrows():
                    chain = row['Chain']
                    dex_name = row['DEX']
                    address = row['Address']
                    tvl = row['TVL']
                    pair = row['Pair']
                    token0_symbol, token1_symbol = pair.split('/')
                    
                    # Load from data folder
                    data_folder = os.path.join('data', chain, dex_name, f'{token0_symbol}_{token1_symbol}')
                    base_usd_path = os.path.join(data_folder, 'base_usd.csv')
                    quote_usd_path = os.path.join(data_folder, 'quote_usd.csv')
                    cross_path = os.path.join(data_folder, 'cross.csv')
                    
                    if os.path.exists(base_usd_path) and os.path.exists(quote_usd_path) and os.path.exists(cross_path):
                        base_df = pd.read_csv(base_usd_path)
                        quote_df = pd.read_csv(quote_usd_path)
                        cross_df = pd.read_csv(cross_path)
                        
                        metrics = calculate_pool_metrics(
                            tvl=tvl,
                            base_df=base_df,
                            cross_price_df=cross_df,
                            days_interval=days.value
                        )
                        metrics['Chain'] = chain
                        metrics['DEX'] = dex_name
                        metrics['Pair'] = pair
                        metrics['Address'] = address
                        metrics['TVL'] = tvl
                        metrics_list.append(metrics)
                    else:
                        print(f"Data not found for {pair} on {chain}/{dex_name}. Skipping metrics.")
                
                if metrics_list:
                    stats_df = pd.DataFrame(metrics_list)
                    print("Pool Metrics:")
                    display(stats_df)
                else:
                    print("No metrics calculated.")
        except Exception as e:
            print(f"Error updating pool options or calculating stats: {e}")
            traceback.print_exc()

fetch_button.on_click(update_pool_options_and_calculate_stats)

# Analyze correlations
def analyze_correlations(b):
    with corr_output:
        corr_output.clear_output()
        print("Starting correlation analysis...")
        selected_pools = pool_select.value if not use_all_pools_checkbox.value else pool_select.options
        
        if not selected_pools:
            print('Please select at least one pool or check "Use All Pools".')
            return
        
        try:
            data_list = []
            for pool_str in selected_pools:
                print(f"Processing pool: {pool_str}")
                # Parse chain, DEX, pair, address from the option string
                parts = pool_str.split('/')
                if len(parts) < 3:
                    print(f"Invalid pool format: {pool_str}. Skipping.")
                    continue
                chain = parts[0]
                dex_name = parts[1]
                pair_address_str = '/'.join(parts[2:])
                pair = pair_address_str.split('(')[0].strip()
                address = pair_address_str.split('(')[1][:-1].split('...')[0]
                
                token_symbol = pair.split('/')[0]  # Assume first token for simplicity
                
                # Load from data folder for cross prices (for correlation, we use close prices)
                token0_symbol, token1_symbol = pair.split('/')
                data_folder = os.path.join('data', chain, dex_name, f'{token0_symbol}_{token1_symbol}')
                cross_path = os.path.join(data_folder, 'cross.csv')
                
                if os.path.exists(cross_path):
                    cross_df = pd.read_csv(cross_path)
                    data_list.append((token_symbol, cross_df))
                    print(f"Loaded {len(cross_df)} bars for {token_symbol} from saved data.")
                else:
                    print(f"Cross data not found for {pair}. Skipping.")
            
            if not data_list:
                print('No price data available for selected pools.')
                return
            
            corr_matrix = Analyzer.compute_correlation_matrix_from_dataframes(data_list)
            if not corr_matrix.empty:
                print('Correlation Matrix:')
                display(corr_matrix)
                
                # Visualize correlation matrix
                plt.figure(figsize=(8, 6))
                plt.imshow(corr_matrix, cmap='coolwarm', interpolation='nearest')
                plt.colorbar(label='Correlation')
                plt.xticks(range(len(corr_matrix)), corr_matrix.columns, rotation=45)
                plt.yticks(range(len(corr_matrix)), corr_matrix.index)
                plt.title('Price Correlation Matrix')
                plt.tight_layout()
                plt.show()
            else:
                print('No correlation data available.')
        except Exception as e:
            print(f'Error analyzing correlations: {e}')
            traceback.print_exc()

analyze_button.on_click(analyze_correlations)

# Automatically try to update pool options and calculate stats if CSV exists
update_pool_options_and_calculate_stats(None)

VBox(children=(SelectMultiple(description='Chains:', index=(0,), options=('solana', 'eth', 'bsc', 'polygon_pos…

VBox(children=(Checkbox(value=True, description='Use All Pools for Correlations'), SelectMultiple(description=…

## Fetch Liquidity Pools
Fetch liquidity pools based on the selected parameters and display the results.

## Analyze Price Correlations
Select pools to fetch price data and compute a correlation matrix.

In [None]:
from datetime import datetime, timedelta

# Widgets for price correlation analysis
pool_select = widgets.SelectMultiple(
    options=[],
    description='Select Pools:',
    rows=10
)
days = widgets.IntSlider(value=7, min=1, max=30, description='Days:')
analyze_button = widgets.Button(description='Analyze Correlations', button_style='info')
corr_output = widgets.Output()

# Update pool options after fetching
def update_pool_options(change):
    with output:
        print("Updating pool options...")
        try:
            df = pd.read_csv(f'pools_{chain_dropdown.value}_{dex_dropdown.value}.csv')
            pool_select.options = [f'{row["Pair"]} ({row["Address"]})' for _, row in df.iterrows()]
            print(f"Loaded {len(pool_select.options)} pools for selection.")
        except Exception as e:
            print(f"Error loading pools CSV: {e}. Please fetch pools first.")
            pool_select.options = []

fetch_button.on_click(update_pool_options)

# Analyze correlations
def analyze_correlations(b):
    with corr_output:
        corr_output.clear_output()
        print("Starting correlation analysis...")
        if not pool_select.value:
            print('Please select at least one pool.')
            return
        
        try:
            print(f"Selected chain: {chain_dropdown.value}, DEX: {dex_dropdown.value}")
            
            # Use DEX name as string directly
            dex_name = dex_dropdown.value
            
            client = GeckoTerminalClient(
                network=chain_dropdown.value,
                dex=dex_name
            )
            print("GeckoTerminalClient initialized.")
            
            data_list = []
            for pool_str in pool_select.value:
                print(f"Processing pool: {pool_str}")
                address = pool_str.split('(')[1][:-1].split(')')[0]
                pair = pool_str.split('(')[0].strip()
                token_symbol = pair.split('/')[0]  # Assume first token for simplicity
                from_date = (datetime.now() - timedelta(days=days.value)).strftime('%Y-%m-%d')
                to_date = datetime.now().strftime('%Y-%m-%d')
                print(f"Fetching price bars for {token_symbol} from {from_date} to {to_date}")
                price_bar = client.get_price_bars(
                    pool_address=address,
                    timeframe='hour',
                    aggregate=4,  # For 4h intervals
                    limit=1000
                )
                if price_bar and not price_bar.data.empty:
                    data_list.append((token_symbol, price_bar.data))
                    print(f"Fetched {len(price_bar.data)} bars for {token_symbol}")
                else:
                    print(f"No price bars fetched for {token_symbol}")
            
            if not data_list:
                print('No price data available for selected pools.')
                return
            
            corr_matrix = Analyzer.compute_correlation_matrix_from_dataframes(data_list)
            if not corr_matrix.empty:
                print('Correlation Matrix:')
                display(corr_matrix)
                
                # Visualize correlation matrix
                plt.figure(figsize=(8, 6))
                plt.imshow(corr_matrix, cmap='coolwarm', interpolation='nearest')
                plt.colorbar(label='Correlation')
                plt.xticks(range(len(corr_matrix)), corr_matrix.columns, rotation=45)
                plt.yticks(range(len(corr_matrix)), corr_matrix.index)
                plt.title('Price Correlation Matrix')
                plt.tight_layout()
                plt.show()
            else:
                print('No correlation data available.')
        except Exception as e:
            print(f'Error analyzing correlations: {e}')
            import traceback
            traceback.print_exc()

analyze_button.on_click(analyze_correlations)
display(widgets.VBox([pool_select, days, analyze_button, corr_output]))

# Automatically try to update pool options if CSV exists
update_pool_options(None)

VBox(children=(SelectMultiple(description='Select Pools:', options=(), rows=10, value=()), IntSlider(value=7, …

## Notes
- Use the widgets above to select parameters and fetch pools.
- Results are saved to a CSV file for further analysis.
- Select multiple pools to compute price correlations over the specified number of days.
- Ensure API keys are set in the `.env` file for CoinGecko and Moralis.