In [9]:
# Import Required Libraries for Alpha1 Factor Analysis with Alphalens
import warnings
warnings.filterwarnings('ignore')
import pandas as pd
import numpy as np
import alphalens
import seaborn as sns
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
import requests
import time
import json
import ccxt
import os
from concurrent.futures import ThreadPoolExecutor, as_completed
from scipy import stats

# Configure plotting
%matplotlib inline
plt.style.use('default')
sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (20, 16)
plt.rcParams['font.size'] = 12
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 50)

print("✅ All libraries imported successfully!")
print("📊 Ready for Alpha1 factor analysis with Alphalens")
print("🧮 Alpha1 Factor: (rank(Ts_ArgMax(SignedPower(((returns < 0) ? stddev(returns, 20) : close), 2.), 5)) - 0.5)")

✅ All libraries imported successfully!
📊 Ready for Alpha1 factor analysis with Alphalens
🧮 Alpha1 Factor: (rank(Ts_ArgMax(SignedPower(((returns < 0) ? stddev(returns, 20) : close), 2.), 5)) - 0.5)


In [10]:
# Crypto Data Fetching Functions (Direct from Exchanges) + Alpha1 Factor Calculation

def fetch_crypto_data_from_exchanges(cryptocurrencies=['BTC', 'ETH', 'SOL', 'ADA', 'DOT', 'LINK', 'MATIC', 'AVAX'], 
                                   exchanges=['okx'], 
                                   timeframe='1h', 
                                   days_back=365):
    """
    Fetch cryptocurrency data directly from exchanges using ccxt.
    
    Args:
        cryptocurrencies: List of cryptocurrencies to fetch
        exchanges: List of exchanges to use
        timeframe: Data timeframe ('1h', '4h', '1d')
        days_back: Number of days of historical data to fetch
    
    Returns:
        Dictionary with crypto data organized by crypto and exchange
    """
    print(f"🔄 Fetching crypto data directly from exchanges...")
    print(f"🪙 Cryptocurrencies: {cryptocurrencies}")
    print(f"📊 Exchanges: {exchanges}")
    print(f"⏱️ Timeframe: {timeframe}")
    print(f"📅 Days back: {days_back}")
    
    crypto_data = {}
    
    # Initialize exchanges
    exchange_instances = {}
    for exchange_name in exchanges:
        try:
            if exchange_name == 'binance':
                exchange_instances[exchange_name] = ccxt.binance({
                    'enableRateLimit': True,
                    'sandbox': False
                })
            elif exchange_name == 'okx':
                exchange_instances[exchange_name] = ccxt.okx({
                    'enableRateLimit': True,
                    'sandbox': False
                })
            elif exchange_name == 'bybit':
                exchange_instances[exchange_name] = ccxt.bybit({
                    'enableRateLimit': True,
                    'sandbox': False
                })
            print(f"✅ {exchange_name.upper()} exchange initialized")
        except Exception as e:
            print(f"❌ Failed to initialize {exchange_name}: {e}")
    
    # Calculate time range
    end_time = datetime.now()
    start_time = end_time - timedelta(days=days_back)
    since = int(start_time.timestamp() * 1000)
    
    print(f"📅 Fetching data from {start_time.date()} to {end_time.date()}")
    
    # Fetch data for each cryptocurrency and exchange
    for crypto in cryptocurrencies:
        crypto_data[crypto] = {}
        print(f"\n📈 Fetching {crypto} data...")
        
        for exchange_name, exchange in exchange_instances.items():
            print(f"  📊 From {exchange_name.upper()}...")
            
            try:
                # Define symbol (most exchanges use this format)
                symbol = f'{crypto}/USDT'
                
                # Check if symbol exists on exchange
                markets = exchange.load_markets()
                if symbol not in markets:
                    print(f"    ⚠️ {symbol} not available on {exchange_name}")
                    continue
                
                # Fetch OHLCV data in chunks
                all_ohlcv = []
                current_since = since
                max_limit = 1000  # Most exchanges limit to 1000 candles per request
                
                while True:
                    try:
                        ohlcv = exchange.fetch_ohlcv(symbol, timeframe, current_since, max_limit)
                        
                        if not ohlcv:
                            break
                        
                        all_ohlcv.extend(ohlcv)
                        
                        # Update timestamp for next batch
                        current_since = ohlcv[-1][0] + 1
                        
                        # Progress indicator
                        current_date = datetime.fromtimestamp(ohlcv[-1][0] / 1000)
                        print(f"    📊 Progress: {current_date.strftime('%Y-%m-%d')} ({len(all_ohlcv)} records)", end='\r')
                        
                        # Stop if we've reached current time
                        if current_date >= end_time:
                            break
                        
                        # Rate limiting
                        time.sleep(exchange.rateLimit / 1000)
                        
                    except Exception as e:
                        print(f"    ❌ Error fetching batch: {e}")
                        break
                
                # Convert to DataFrame
                if all_ohlcv:
                    df = pd.DataFrame(all_ohlcv, columns=['Timestamp', 'Open', 'High', 'Low', 'Close', 'Volume'])
                    df['Timestamp'] = pd.to_datetime(df['Timestamp'], unit='ms')
                    df.set_index('Timestamp', inplace=True)
                    
                    # Remove duplicates and sort
                    df = df.drop_duplicates().sort_index()
                    
                    # Filter to requested date range
                    df = df[(df.index >= start_time) & (df.index <= end_time)]
                    
                    # Store data
                    crypto_data[crypto][f'{exchange_name}_spot'] = df
                    print(f"    ✅ {crypto} from {exchange_name}: {len(df)} records")
                else:
                    print(f"    ❌ No data retrieved for {crypto} from {exchange_name}")
                    
            except Exception as e:
                print(f"    ❌ Failed to fetch {crypto} from {exchange_name}: {e}")
    
    return crypto_data

def signed_power(x, exponent):
    """
    Apply signed power: maintains sign while applying power
    """
    return np.sign(x) * np.power(np.abs(x), exponent)

def ts_argmax(series, window):
    """
    Time-series ArgMax: returns the index of the maximum value in rolling window
    """
    return series.rolling(window=window).apply(lambda x: np.argmax(x) if len(x) == window else np.nan, raw=True)

def calculate_alpha1_factor(crypto_data, crypto_list=None):
    """
    Calculate Alpha1 factor based on the expression:
    (rank(Ts_ArgMax(SignedPower(((returns < 0) ? stddev(returns, 20) : close), 2.), 5)) - 0.5)
    
    Args:
        crypto_data: Dictionary with crypto data
        crypto_list: List of cryptos to analyze (if None, use all)
    
    Returns:
        Combined DataFrame with Alpha1 factors for Alphalens analysis
    """
    if crypto_list is None:
        crypto_list = list(crypto_data.keys())
    
    print(f"🧮 Calculating Alpha1 factors for: {crypto_list}")
    print(f"📊 Alpha1 Expression: (rank(Ts_ArgMax(SignedPower(((returns < 0) ? stddev(returns, 20) : close), 2.), 5)) - 0.5)")
    
    # Step 1-4: Calculate ts_argmax for all cryptos first
    all_ts_argmax = {}
    
    for crypto in crypto_list:
        if crypto not in crypto_data:
            print(f"❌ No data for {crypto}, skipping...")
            continue
        
        print(f"🔧 Processing {crypto} Alpha1 factor steps 1-4...")
        
        # Use primary data source (prefer spot data for consistency)
        primary_key = None
        for key_priority in ['binance_spot', 'okx_spot', 'bybit_spot', 'binance_futures', 'okx_futures']:
            if key_priority in crypto_data[crypto]:
                primary_key = key_priority
                break
        
        if primary_key is None:
            print(f"❌ No suitable data found for {crypto}")
            continue
        
        df = crypto_data[crypto][primary_key].copy()
        print(f"    📊 Using {primary_key} data: {len(df)} records")
        
        # Calculate base components
        close_prices = df['Close']
        returns = close_prices.pct_change()
        
        print(f"    🔢 Step 1: Calculating returns and rolling stddev...")
        # Calculate rolling standard deviation of returns (20 periods)
        rolling_stddev = returns.rolling(window=20).std()
        
        print(f"    🔀 Step 2: Applying conditional logic...")
        # Conditional: if returns < 0, use stddev(returns, 20), else use close
        conditional_series = np.where(returns < 0, rolling_stddev, close_prices)
        conditional_series = pd.Series(conditional_series, index=close_prices.index)
        
        print(f"    ⚡ Step 3: Applying SignedPower with exponent 2...")
        # Apply SignedPower with exponent 2
        signed_power_series = signed_power(conditional_series, 2.0)
        
        print(f"    🎯 Step 4: Calculating Ts_ArgMax over 5 periods...")
        # Apply Ts_ArgMax over 5 periods
        ts_argmax_series = ts_argmax(signed_power_series, 5)
        
        # Store for cross-sectional ranking
        all_ts_argmax[crypto] = ts_argmax_series
        print(f"    ✅ {crypto}: {len(ts_argmax_series.dropna())} ts_argmax observations")
    
    # Step 5: Cross-sectional ranking across all assets
    print(f"\n📊 Step 5: Cross-sectional ranking across all assets...")
    if all_ts_argmax:
        # Create DataFrame for cross-sectional ranking
        ts_argmax_df = pd.DataFrame(all_ts_argmax)
        ts_argmax_df = ts_argmax_df.dropna()  # Remove rows with any NaN
        
        print(f"    📊 Ts_ArgMax DataFrame shape: {ts_argmax_df.shape}")
        print(f"    📊 Assets: {list(ts_argmax_df.columns)}")
        
        # Cross-sectional ranking: rank across assets (columns) at each date (row)
        ranked_df = ts_argmax_df.rank(axis=1, pct=True)
        
        print(f"    ✅ Cross-sectional ranking complete")
        
        # Step 6: Center around 0
        print(f"⚖️ Step 6: Centering around 0...")
        alpha1_df = ranked_df - 0.5
        
        # Convert to multi-index format for Alphalens
        factor_data_list = []
        for date in alpha1_df.index:
            for asset in alpha1_df.columns:
                if not pd.isna(alpha1_df.loc[date, asset]):
                    factor_data_list.append({
                        'date': date,
                        'asset': asset,
                        'factor_value': alpha1_df.loc[date, asset]
                    })
        
        if factor_data_list:
            combined_factors = pd.DataFrame(factor_data_list)
            combined_factors = combined_factors.sort_values(['date', 'asset']).reset_index(drop=True)
            
            print(f"\n✅ Alpha1 Factor calculation complete!")
            print(f"📊 Total Alpha1 observations: {len(combined_factors):,}")
            print(f"📈 Unique assets: {combined_factors['asset'].nunique()}")
            print(f"📅 Date range: {combined_factors['date'].min()} to {combined_factors['date'].max()}")
            
            return combined_factors
        else:
            print("❌ No Alpha1 factors calculated")
            return None
    else:
        print("❌ No ts_argmax data calculated")
        return None

def prepare_crypto_prices_for_alphalens(crypto_data, crypto_list=None):
    """
    Prepare price data for Alphalens analysis.
    
    Args:
        crypto_data: Dictionary with crypto data
        crypto_list: List of cryptos to include
    
    Returns:
        DataFrame with prices (rows=dates, columns=assets)
    """
    if crypto_list is None:
        crypto_list = list(crypto_data.keys())
    
    print(f"💰 Preparing price data for: {crypto_list}")
    
    all_prices = {}
    
    for crypto in crypto_list:
        if crypto not in crypto_data:
            continue
        
        # Use primary data source (same priority as factor calculation)
        primary_key = None
        for key_priority in ['binance_spot', 'okx_spot', 'bybit_spot', 'binance_futures', 'okx_futures']:
            if key_priority in crypto_data[crypto]:
                primary_key = key_priority
                break
        
        if primary_key is None:
            continue
        
        df = crypto_data[crypto][primary_key]
        all_prices[crypto] = df['Close']
        print(f"  📊 {crypto}: Using {primary_key} data")
    
    if all_prices:
        prices_df = pd.DataFrame(all_prices)
        prices_df = prices_df.dropna(how='all')  # Remove rows where all assets are NaN
        
        print(f"✅ Price data prepared!")
        print(f"💰 Shape: {prices_df.shape}")
        print(f"📅 Date range: {prices_df.index.min()} to {prices_df.index.max()}")
        
        return prices_df
    else:
        print("❌ No price data prepared")
        return None

print("✅ Alpha1 factor calculation functions defined!")
print("🔧 Functions available:")
print("  - fetch_crypto_data_from_exchanges(): Fetch crypto data directly from exchanges")
print("  - calculate_alpha1_factor(): Calculate Alpha1 factor with proper cross-sectional ranking")
print("  - prepare_crypto_prices_for_alphalens(): Prepare price data")
print("  - signed_power(): Apply signed power transformation")
print("  - ts_argmax(): Time-series ArgMax calculation")

✅ Alpha1 factor calculation functions defined!
🔧 Functions available:
  - fetch_crypto_data_from_exchanges(): Fetch crypto data directly from exchanges
  - calculate_alpha1_factor(): Calculate Alpha1 factor with proper cross-sectional ranking
  - prepare_crypto_prices_for_alphalens(): Prepare price data
  - signed_power(): Apply signed power transformation
  - ts_argmax(): Time-series ArgMax calculation


In [11]:
# Fetch Crypto Data and Prepare Alpha1 Factor & Price Datasets
print("🚀 Starting Alpha1 Factor Analysis")
print("="*60)
print("🧮 Alpha1 Expression: (rank(Ts_ArgMax(SignedPower(((returns < 0) ? stddev(returns, 20) : close), 2.), 5)) - 0.5)")
print("="*60)

# Define cryptocurrencies to analyze - Top 10 "Real" Crypto (No Stablecoins)
target_cryptos = ['BTC', 'ETH', 'BNB', 'SOL', 'XRP', 'DOGE', 'ADA', 'AVAX', 'DOT', 'LINK']  # Top 10 real cryptocurrencies
target_exchanges = ['okx']  # Using only OKX for cleaner factor analysis

print(f"🪙 Target cryptocurrencies - Top 10 Real Crypto ({len(target_cryptos)}): {target_cryptos}")
print(f"📊 Target exchanges: {target_exchanges}")

# Fetch crypto data directly from exchanges
print(f"\n🔄 Fetching crypto data directly from exchanges...")

crypto_data = fetch_crypto_data_from_exchanges(
    cryptocurrencies=target_cryptos,
    exchanges=target_exchanges,
    timeframe='1d',  # Daily data for cleaner analysis
    days_back=365    # Full 1 year of daily data (365 data points)
)

# Display summary of fetched data
print(f"\n📊 CRYPTO DATA SUMMARY:")
print("="*50)

successful_cryptos = [crypto for crypto in crypto_data.keys() if crypto_data[crypto]]

print(f"🪙 Total cryptocurrencies requested: {len(target_cryptos)}")
print(f"🪙 Successfully fetched: {len(successful_cryptos)}")
print(f"📈 Available cryptos: {successful_cryptos}")

if len(successful_cryptos) < len(target_cryptos):
    failed_cryptos = set(target_cryptos) - set(successful_cryptos)
    print(f"❌ Failed to fetch: {list(failed_cryptos)}")

# =============================================================================
# 1. PREPARE PRICE DATA: DataFrame with rows=dates, columns=assets
# =============================================================================
print(f"\n💰 PREPARING PRICE DATA (rows=dates, columns=assets):")
print("="*60)

price_data = {}

for crypto in successful_cryptos:
    if crypto_data[crypto]:
        # Use first available exchange data
        exchange_key = list(crypto_data[crypto].keys())[0]
        df = crypto_data[crypto][exchange_key]
        price_data[crypto] = df['Close']
        print(f"📊 {crypto}: {len(df)} price records from {exchange_key}")

# Create price DataFrame: rows=dates, columns=assets
if price_data:
    prices_df = pd.DataFrame(price_data)
    prices_df = prices_df.dropna()  # Remove rows with any NaN values
    
    print(f"\n✅ PRICE DATA PREPARED!")
    print(f"📊 Shape: {prices_df.shape} (rows=dates, columns=assets)")
    print(f"📅 Date range: {prices_df.index.min()} to {prices_df.index.max()}")
    
    print(f"\n📈 FIRST 5 ROWS OF PRICE DATA:")
    print("-" * 50)
    display(prices_df.head())
else:
    print("❌ No price data prepared")
    prices_df = None

# =============================================================================
# 2. CALCULATE ALPHA1 FACTORS: Multi-Index Pandas Series (Cross-Sectional Ranking)
# =============================================================================
print(f"\n🧮 CALCULATING ALPHA1 FACTORS (Cross-Sectional Ranking):")
print("="*60)
print(f"📊 Alpha1 Formula: (rank(Ts_ArgMax(SignedPower(((returns < 0) ? stddev(returns, 20) : close), 2.), 5)) - 0.5)")
print(f"🎯 Using TRUE cross-sectional ranking: assets ranked against each other at each date")

# Use the corrected Alpha1 factor calculation function
alpha1_factor_data = calculate_alpha1_factor(crypto_data, successful_cryptos)

# Create Multi-Index Series for Alpha1 factors
if alpha1_factor_data is not None:
    # Create multi-index: (date, asset) - only 2 levels
    factor_series = alpha1_factor_data.set_index(['date', 'asset'])['factor_value']
    factor_series.name = 'alpha1_factor'
    
    print(f"\n✅ ALPHA1 FACTOR DATA PREPARED!")
    print(f"📊 Total Alpha1 observations: {len(factor_series):,}")
    print(f"📈 Factor structure: Multi-Index Series (date, asset)")
    print(f"🎯 Unique assets: {factor_series.index.get_level_values('asset').nunique()}")
    print(f"🎯 Unique dates: {factor_series.index.get_level_values('date').nunique()}")
    print(f"🧮 Factor range: [{factor_series.min():.4f}, {factor_series.max():.4f}]")
    print(f"📊 Factor mean: {factor_series.mean():.4f}, std: {factor_series.std():.4f}")
    
    print(f"\n📊 FIRST 10 OBSERVATIONS OF ALPHA1 FACTOR DATA:")
    print("-" * 50)
    display(factor_series.head(10))
    
    print(f"\n📊 ALPHA1 FACTOR DATA STRUCTURE INFO:")
    print("-" * 40)
    print(f"Index levels: {factor_series.index.names}")
    print(f"Data type: {type(factor_series)}")
    print(f"Value type: {factor_series.dtype}")
    
    # Show cross-sectional ranking validation
    print(f"\n🔍 CROSS-SECTIONAL RANKING VALIDATION:")
    print("-" * 40)
    sample_date = factor_series.index.get_level_values('date')[50]  # Pick a sample date
    sample_factors = factor_series.xs(sample_date, level='date').sort_values()
    print(f"📅 Sample date: {sample_date}")
    print(f"📊 Factor values (sorted):")
    for asset, value in sample_factors.items():
        print(f"   {asset}: {value:.4f}")
    print(f"🎯 Range: [{sample_factors.min():.4f}, {sample_factors.max():.4f}] ✓ Centered around 0")
else:
    print("❌ No Alpha1 factor data calculated")
    factor_series = None

# =============================================================================
# 3. DATA VALIDATION & SUMMARY
# =============================================================================
print(f"\n🔍 ALPHA1 DATA VALIDATION SUMMARY:")
print("="*50)

if prices_df is not None and factor_series is not None:
    # Check date alignment
    price_dates = set(prices_df.index)
    factor_dates = set(factor_series.index.get_level_values('date'))
    common_dates = price_dates & factor_dates
    
    print(f"📅 Price data dates: {len(price_dates):,}")
    print(f"📅 Alpha1 factor dates: {len(factor_dates):,}")
    print(f"📅 Common dates: {len(common_dates):,}")
    print(f"📊 Date overlap: {len(common_dates)/max(len(price_dates), len(factor_dates))*100:.1f}%")
    
    # Check asset alignment
    price_assets = set(prices_df.columns)
    factor_assets = set(factor_series.index.get_level_values('asset'))
    common_assets = price_assets & factor_assets
    
    print(f"🪙 Price data assets: {sorted(price_assets)}")
    print(f"🪙 Alpha1 factor assets: {sorted(factor_assets)}")
    print(f"🪙 Common assets: {sorted(common_assets)}")
    
    print(f"\n✅ ALPHA1 DATASETS READY FOR ALPHALENS ANALYSIS!")
    print(f"💰 Price DataFrame: {prices_df.shape}")
    print(f"🧮 Alpha1 Factor Series: {len(factor_series):,} observations")
    print(f"🎯 Cross-sectional ranking: Assets compared to each other at each date ✓")
else:
    print("❌ Alpha1 data preparation failed")

print(f"\n🎯 Next steps: Use Alpha1 factor for Alphalens analysis")
print(f"🧮 Alpha1 measures complex momentum/volatility interaction with proper cross-sectional ranking")

🚀 Starting Alpha1 Factor Analysis
🧮 Alpha1 Expression: (rank(Ts_ArgMax(SignedPower(((returns < 0) ? stddev(returns, 20) : close), 2.), 5)) - 0.5)
🪙 Target cryptocurrencies - Top 10 Real Crypto (10): ['BTC', 'ETH', 'BNB', 'SOL', 'XRP', 'DOGE', 'ADA', 'AVAX', 'DOT', 'LINK']
📊 Target exchanges: ['okx']

🔄 Fetching crypto data directly from exchanges...
🔄 Fetching crypto data directly from exchanges...
🪙 Cryptocurrencies: ['BTC', 'ETH', 'BNB', 'SOL', 'XRP', 'DOGE', 'ADA', 'AVAX', 'DOT', 'LINK']
📊 Exchanges: ['okx']
⏱️ Timeframe: 1d
📅 Days back: 365
✅ OKX exchange initialized
📅 Fetching data from 2024-08-01 to 2025-08-01

📈 Fetching BTC data...
  📊 From OKX...
    ✅ BTC from okx: 365 records5 records)

📈 Fetching ETH data...
  📊 From OKX...
    ✅ BTC from okx: 365 records5 records)

📈 Fetching ETH data...
  📊 From OKX...
    ✅ ETH from okx: 365 records5 records)

📈 Fetching BNB data...
  📊 From OKX...
    ✅ ETH from okx: 365 records0 records)

📈 Fetching BNB data...
  📊 From OKX...
    ✅ BN

Unnamed: 0_level_0,BTC,ETH,BNB,SOL,XRP,DOGE,ADA,AVAX,DOT,LINK
Timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2024-08-02,61489.9,2989.61,543.1,152.74,0.5606,0.11155,0.3636,23.607,5.126,11.92
2024-08-03,60710.0,2903.79,529.5,142.52,0.5559,0.10813,0.3647,22.67,5.043,11.683
2024-08-04,58160.1,2689.15,496.9,138.32,0.5229,0.10375,0.344,21.25,4.687,10.887
2024-08-05,54018.0,2419.73,464.3,129.78,0.4889,0.09434,0.3118,19.539,4.197,9.486
2024-08-06,56022.0,2461.21,484.9,144.45,0.506,0.09643,0.331,20.898,4.51,10.066



🧮 CALCULATING ALPHA1 FACTORS (Cross-Sectional Ranking):
📊 Alpha1 Formula: (rank(Ts_ArgMax(SignedPower(((returns < 0) ? stddev(returns, 20) : close), 2.), 5)) - 0.5)
🎯 Using TRUE cross-sectional ranking: assets ranked against each other at each date
🧮 Calculating Alpha1 factors for: ['BTC', 'ETH', 'BNB', 'SOL', 'XRP', 'DOGE', 'ADA', 'AVAX', 'DOT', 'LINK']
📊 Alpha1 Expression: (rank(Ts_ArgMax(SignedPower(((returns < 0) ? stddev(returns, 20) : close), 2.), 5)) - 0.5)
🔧 Processing BTC Alpha1 factor steps 1-4...
    📊 Using okx_spot data: 365 records
    🔢 Step 1: Calculating returns and rolling stddev...
    🔀 Step 2: Applying conditional logic...
    ⚡ Step 3: Applying SignedPower with exponent 2...
    🎯 Step 4: Calculating Ts_ArgMax over 5 periods...
    ✅ BTC: 342 ts_argmax observations
🔧 Processing ETH Alpha1 factor steps 1-4...
    📊 Using okx_spot data: 365 records
    🔢 Step 1: Calculating returns and rolling stddev...
    🔀 Step 2: Applying conditional logic...
    ⚡ Step 3: Appl

date        asset
2024-08-25  ADA      0.10
            AVAX     0.10
            BNB     -0.35
            BTC      0.50
            DOGE    -0.35
            DOT      0.10
            ETH      0.10
            LINK     0.10
            SOL      0.10
            XRP      0.10
Name: alpha1_factor, dtype: float64


📊 ALPHA1 FACTOR DATA STRUCTURE INFO:
----------------------------------------
Index levels: ['date', 'asset']
Data type: <class 'pandas.core.series.Series'>
Value type: float64

🔍 CROSS-SECTIONAL RANKING VALIDATION:
----------------------------------------
📅 Sample date: 2024-08-30 00:00:00
📊 Factor values (sorted):
   SOL: -0.4000
   BNB: -0.1500
   ETH: -0.1500
   LINK: -0.1500
   XRP: -0.1500
   ADA: 0.1500
   BTC: 0.1500
   AVAX: 0.4000
   DOGE: 0.4000
   DOT: 0.4000
🎯 Range: [-0.4000, 0.4000] ✓ Centered around 0

🔍 ALPHA1 DATA VALIDATION SUMMARY:
📅 Price data dates: 365
📅 Alpha1 factor dates: 342
📅 Common dates: 342
📊 Date overlap: 93.7%
🪙 Price data assets: ['ADA', 'AVAX', 'BNB', 'BTC', 'DOGE', 'DOT', 'ETH', 'LINK', 'SOL', 'XRP']
🪙 Alpha1 factor assets: ['ADA', 'AVAX', 'BNB', 'BTC', 'DOGE', 'DOT', 'ETH', 'LINK', 'SOL', 'XRP']
🪙 Common assets: ['ADA', 'AVAX', 'BNB', 'BTC', 'DOGE', 'DOT', 'ETH', 'LINK', 'SOL', 'XRP']

✅ ALPHA1 DATASETS READY FOR ALPHALENS ANALYSIS!
💰 Price DataFra

In [12]:
# Alphalens Analysis for Alpha1 Factor
from alphalens.utils import get_clean_factor_and_forward_returns

print(f"🔬 Running Alphalens analysis for Alpha1 factor...")
print(f"🧮 Alpha1 Expression: (rank(Ts_ArgMax(SignedPower(((returns < 0) ? stddev(returns, 20) : close), 2.), 5)) - 0.5)")
print(f"📊 Forward return periods: (1, 5, 10, 20) days")
print(f"📊 Quantiles: 5 (for factor ranking)")

# Prepare factor and forward returns data
ret = get_clean_factor_and_forward_returns(
    factor_series, 
    prices_df, 
    periods=(1, 5, 10, 20),  # 1D, 5D, 10D, 20D forward returns
    quantiles=5,             # Divide into quintiles
    groupby=None             # No grouping
)

print(f"\n✅ Alpha1 factor and forward returns data prepared!")
print(f"📊 Shape: {ret.shape}")
print(f"📈 Columns: {list(ret.columns)}")
print(f"🧮 Alpha1 Factor Summary:")
print(f"   - Range: [{ret['factor'].min():.4f}, {ret['factor'].max():.4f}]")
print(f"   - Mean: {ret['factor'].mean():.4f}")
print(f"   - Std: {ret['factor'].std():.4f}")

# Display first few rows
print(f"\n📊 FIRST 10 ROWS OF PREPARED DATA:")
display(ret.head(10))

# Create comprehensive tear sheet for Alpha1 analysis
print(f"\n📈 Generating comprehensive Alpha1 factor analysis...")
alphalens.tears.create_full_tear_sheet(ret)

print(f"\n🎯 Alpha1 Factor Analysis Complete!")
print(f"🧮 This factor combines:")
print(f"   - Conditional volatility vs price logic")
print(f"   - Signed power transformation")
print(f"   - Time-series momentum patterns")
print(f"   - Cross-sectional ranking")

🔬 Running Alphalens analysis for Alpha1 factor...
🧮 Alpha1 Expression: (rank(Ts_ArgMax(SignedPower(((returns < 0) ? stddev(returns, 20) : close), 2.), 5)) - 0.5)
📊 Forward return periods: (1, 5, 10, 20) days
📊 Quantiles: 5 (for factor ranking)
Dropped 99.7% entries from factor data: 5.8% in forward returns computation and 93.9% in binning phase (set max_loss=0 to see potentially suppressed Exceptions).


MaxLossExceededError: max_loss (35.0%) exceeded 99.7%, consider increasing it.