# Test Sharadar Metadata Filters

This notebook tests the Sharadar ticker metadata filters using the enriched fundamentals database.

In [3]:
import pandas as pd
import numpy as np
from zipline.pipeline import Pipeline
from zipline.pipeline.engine import SimplePipelineEngine

# Import custom data loaders and utilities
import sys
sys.path.insert(0, '/app/examples/utils')
from backtest_helpers import setup_auto_loader, load_bundle

# Import Sharadar filters
sys.path.insert(0, '/app/examples/strategies')
from sharadar_filters import (
    SharadarTickers,
    ExchangeFilter,
    CategoryFilter,
    ADRFilter,
    SectorFilter,
    ScaleMarketCapFilter,
    create_sharadar_universe,
)

print("✅ Imports successful")

✅ Imports successful


## 1. Set up Pipeline Engine

In [None]:
# Set up auto loader
print("Setting up auto loader...")
custom_loader = setup_auto_loader(
    bundle_name='sharadar',
    custom_db_dir='/data/custom_databases',
    enable_sid_translation=True,
)

# Load bundle
print("Loading bundle...")
bundle_data = load_bundle('sharadar')

# Create pipeline engine
print("Creating pipeline engine...")
engine = SimplePipelineEngine(
    get_loader=lambda column: custom_loader,
    asset_finder=bundle_data.asset_finder,
    calendar=bundle_data.equity_daily_bar_reader.trading_calendar,
)

print("✅ Pipeline engine ready")

## 2. Test Individual Filters

In [None]:
# Test pipeline with individual filters
from zipline.pipeline.domain import US_EQUITIES

def make_test_pipeline():
    # Get raw metadata
    exchange = SharadarTickers.sharadar_exchange.latest
    category = SharadarTickers.sharadar_category.latest
    is_adr = SharadarTickers.sharadar_is_adr.latest
    sector = SharadarTickers.sharadar_sector.latest
    scalemarketcap = SharadarTickers.sharadar_scalemarketcap.latest
    
    # Create filters
    exchange_filter = ExchangeFilter()
    category_filter = CategoryFilter()
    adr_filter = ADRFilter()
    
    return Pipeline(
        columns={
            'exchange': exchange,
            'category': category,
            'is_adr': is_adr,
            'sector': sector,
            'scalemarketcap': scalemarketcap,
            'exchange_filter': exchange_filter,
            'category_filter': category_filter,
            'adr_filter': adr_filter,
        },
        domain=US_EQUITIES,
    )

# Run for one date
test_date = pd.Timestamp('2024-01-05')
print(f"Running pipeline for {test_date}...")
pipeline = make_test_pipeline()
result = engine.run_pipeline(pipeline, test_date, test_date)
print(f"\n✅ Pipeline executed: {len(result)} assets")
print(f"\nSample data:")
print(result.head(20))

## 3. Check Filter Results

In [None]:
# Check how many assets pass each filter
print("=" * 80)
print("FILTER RESULTS")
print("=" * 80)

total_assets = len(result)
print(f"Total assets: {total_assets:,}")
print()

# Exchange filter
exchange_pass = result['exchange_filter'].sum()
print(f"Exchange Filter (NYSE/NASDAQ/NYSEMKT): {exchange_pass:,} ({exchange_pass/total_assets*100:.1f}%)")

# Category filter
category_pass = result['category_filter'].sum()
print(f"Category Filter (Domestic Common Stock): {category_pass:,} ({category_pass/total_assets*100:.1f}%)")

# ADR filter
adr_count = result['adr_filter'].sum()
print(f"Non-ADR count: {adr_count:,} ({adr_count/total_assets*100:.1f}%)")

# Combined filter
combined_filter = result['exchange_filter'] & result['category_filter'] & result['adr_filter']
combined_pass = combined_filter.sum()
print(f"\nCombined Filter (all three): {combined_pass:,} ({combined_pass/total_assets*100:.1f}%)")

print()
print("=" * 80)