# Multi-Source Fundamentals: Sharadar + Custom LSEG

**Simplified Example**: Focusing on 3 common metrics

This notebook demonstrates combining Sharadar SF1 with custom LSEG data using metrics available in both sources:
- **ROE** (Return on Equity)
- **P/E Ratio** (Price to Earnings)
- **D/E Ratio** (Debt to Equity)

**What This Shows:**
1. Load and compare the same metrics from both sources
2. Build a consensus score when both sources agree
3. Run a working backtest using both datasets

**Test Universe**: Small set of stocks (AAPL, MSFT, GOOGL, etc.) that exist in your custom database

## Setup

In [None]:
import sys
import pandas as pd
import numpy as np
from pathlib import Path

# Add custom_data to path
sys.path.insert(0, '/app/examples/custom_data')

from zipline import run_algorithm
from zipline.api import (
    attach_pipeline,
    pipeline_output,
    order_target_percent,
    record,
    schedule_function,
    date_rules,
    time_rules,
)
from zipline.pipeline import Pipeline
from zipline.pipeline.data.sharadar import SharadarFundamentals
from zipline.pipeline.filters import StaticAssets
from zipline.data.bundles import load as load_bundle, register
from zipline.data.bundles.sharadar_bundle import sharadar_bundle
from zipline.data.custom import CustomSQLiteLoader
from zipline.pipeline.data.db import Database, Column

# Register bundle
register('sharadar', sharadar_bundle())

print("✓ Imports complete")

## Define Custom Fundamentals Database

In [None]:
class CustomFundamentals(Database):
    """Custom LSEG fundamentals - matching columns to Sharadar."""
    CODE = "fundamentals"
    LOOKBACK_WINDOW = 240
    
    # Common metrics (exist in both Sharadar and LSEG)
    ROE = Column(float)              # Same as Sharadar 'roe'
    PERatio = Column(float)          # Same as Sharadar 'pe'
    DebtToEquity = Column(float)     # Same as Sharadar 'de'
    
    # Additional LSEG metrics
    Revenue = Column(float)
    NetIncome = Column(float)
    
print("✓ Custom database defined")

## Setup Custom Loader (Required for Backtest)

In [None]:
def setup_custom_loader():
    """Setup custom loader with proper column registration."""
    class LoaderDict(dict):
        def get(self, key, default=None):
            if key in self:
                return self[key]
            
            # Match by dataset and column name
            if hasattr(key, 'dataset') and hasattr(key, 'name'):
                key_dataset_name = str(key.dataset).split('<')[0]
                key_col_name = key.name
                
                for registered_col, loader in self.items():
                    if hasattr(registered_col, 'dataset') and hasattr(registered_col, 'name'):
                        reg_dataset_name = str(registered_col.dataset).split('<')[0]
                        reg_col_name = registered_col.name
                        
                        if key_dataset_name == reg_dataset_name and key_col_name == reg_col_name:
                            return loader
            
            raise KeyError(key)
    
    custom_loader_dict = LoaderDict()
    db_dir = Path.home() / '.zipline' / 'data' / 'custom'
    loader = CustomSQLiteLoader("fundamentals", db_dir=db_dir)
    
    # Register all columns
    for attr_name in dir(CustomFundamentals):
        attr = getattr(CustomFundamentals, attr_name)
        if hasattr(attr, 'dataset'):
            custom_loader_dict[attr] = loader
    
    print(f"✓ Custom loader registered with {len(custom_loader_dict)} columns")
    return custom_loader_dict

custom_loader = setup_custom_loader()

## Define Multi-Source Strategy

In [None]:
# Strategy configuration
TOP_N_STOCKS = 5
UNIVERSE_TICKERS = ['AAPL', 'MSFT', 'GOOGL', 'AMZN', 'NVDA', 'META', 'JPM', 'V', 'WMT', 'XOM', 'TSLA']

def make_pipeline():
    """Pipeline using both Sharadar and Custom LSEG data."""
    
    # Load universe from bundle
    bundle_data = load_bundle('sharadar')
    
    # Get assets for our tickers
    assets = []
    for ticker in UNIVERSE_TICKERS:
        try:
            asset = bundle_data.asset_finder.lookup_symbol(ticker, as_of_date=None)
            assets.append(asset)
        except:
            pass
    
    universe = StaticAssets(assets)
    
    # Sharadar metrics
    s_roe = SharadarFundamentals.roe.latest
    s_pe = SharadarFundamentals.pe.latest
    s_de = SharadarFundamentals.de.latest
    
    # Custom LSEG metrics
    l_roe = CustomFundamentals.ROE.latest
    l_pe = CustomFundamentals.PERatio.latest
    l_de = CustomFundamentals.DebtToEquity.latest
    
    return Pipeline(
        columns={
            's_roe': s_roe,
            's_pe': s_pe,
            's_de': s_de,
            'l_roe': l_roe,
            'l_pe': l_pe,
            'l_de': l_de,
        },
        screen=universe,
    )

print("✓ Pipeline factory defined")

## Strategy Logic

In [None]:
def initialize(context):
    """Initialize multi-source strategy."""
    attach_pipeline(make_pipeline(), 'multi_source')
    
    schedule_function(
        rebalance,
        date_rules.month_start(),
        time_rules.market_open(hours=1)
    )
    
    context.stocks_held = []
    print("\n" + "="*80)
    print("Multi-Source Fundamentals Strategy")
    print("="*80)
    print(f"Universe: {len(UNIVERSE_TICKERS)} stocks")
    print(f"Common metrics: ROE, P/E, D/E")
    print(f"Top N: {TOP_N_STOCKS}")
    print("="*80 + "\n")

def before_trading_start(context, data):
    context.pipeline_data = pipeline_output('multi_source')

def rebalance(context, data):
    """Monthly rebalancing with consensus scoring."""
    df = context.pipeline_data.copy()
    
    if len(df) == 0:
        return
    
    # Consensus scoring
    df['score'] = 0
    
    # Sharadar points
    df.loc[(df['s_roe'] > 0.15) & (df['s_roe'].notna()), 'score'] += 1
    df.loc[(df['s_pe'] < 25) & (df['s_pe'] > 0), 'score'] += 1
    df.loc[(df['s_de'] < 2) & (df['s_de'].notna()), 'score'] += 1
    
    # LSEG bonus (when both sources agree)
    both_roe = (df['s_roe'].notna()) & (df['l_roe'].notna()) & (df['l_roe'] > 0.15)
    df.loc[both_roe, 'score'] += 2
    
    # Select top N by score
    ranked = df.sort_values('score', ascending=False)
    target_stocks = ranked.head(TOP_N_STOCKS).index.tolist()
    
    # Equal weight
    weight = 1.0 / len(target_stocks) if target_stocks else 0
    
    for stock in target_stocks:
        if data.can_trade(stock):
            order_target_percent(stock, weight)
    
    for stock in context.portfolio.positions:
        if stock not in target_stocks and data.can_trade(stock):
            order_target_percent(stock, 0)
    
    # Log
    lseg_confirmed = ranked.head(TOP_N_STOCKS)['l_roe'].notna().sum()
    print(f"[{context.datetime.date()}] {len(target_stocks)} stocks, {lseg_confirmed} with LSEG data")

def analyze(context, perf):
    returns = perf['returns']
    total_return = (perf['portfolio_value'].iloc[-1] / perf['portfolio_value'].iloc[0] - 1) * 100
    
    print("\n" + "="*80)
    print("Backtest Complete")
    print("="*80)
    print(f"Total Return: {total_return:.2f}%")
    print(f"Sharpe: {returns.mean() / returns.std() * np.sqrt(252):.2f}" if returns.std() > 0 else "N/A")
    print("="*80)
    return perf

print("✓ Strategy functions defined")

## Run Backtest

In [None]:
START = pd.Timestamp('2023-01-01', tz='UTC')
END = pd.Timestamp('2024-11-01', tz='UTC')

print(f"Running backtest: {START.date()} to {END.date()}\n")

try:
    results = run_algorithm(
        start=START,
        end=END,
        initialize=initialize,
        before_trading_start=before_trading_start,
        analyze=analyze,
        capital_base=100000,
        bundle='sharadar',
        custom_loader=custom_loader,  # KEY: Pass custom loader here
    )
    print("\n✓ Backtest successful!")
except Exception as e:
    print(f"\n❌ Error: {e}")
    import traceback
    traceback.print_exc()
    results = None

## Summary

This notebook demonstrated:

### ✅ Multi-Source Data Integration
- Loaded both Sharadar and custom LSEG fundamentals
- Focused on 3 common metrics: ROE, P/E, D/E
- Used proper custom loader setup for backtesting

### ✅ Consensus Scoring
- Base points from Sharadar metrics
- Bonus points when LSEG confirms (both sources agree)
- Top N stocks by combined score

### ✅ Working Backtest
- Small universe (11 stocks with custom data)
- Monthly rebalancing
- Complete performance tracking

**Key Takeaway**: By focusing on common metrics and using the proper loader setup, we can successfully combine multiple fundamental data sources in production backtests.