# Zipline-Reloaded CustomData - Quick Start

This notebook demonstrates the new CustomData functionality for creating and using custom datasets in Zipline pipelines.

## What You'll Learn

1. Creating custom datasets with `CustomData()`
2. Loading data into pipelines
3. Building pipelines with custom data
4. Using database persistence for custom data

## Setup

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from zipline.pipeline.data import CustomData
from zipline.pipeline import Pipeline
from zipline.pipeline.loaders import MultiColumnDataFrameLoader

print("✓ Imports successful!")

## 1. Creating a Custom Dataset

Let's create a custom dataset for fundamental data:

In [None]:
# Define a custom dataset
FundamentalData = CustomData(
    'FundamentalData',
    columns={
        'pe_ratio': float,
        'market_cap': float,
        'revenue_growth': float,
        'debt_ratio': float,
    },
    doc="Custom fundamental metrics for value investing"
)

print(f"Created dataset: {FundamentalData}")
print(f"Columns: {sorted(FundamentalData._column_names)}")

## 2. Generate Sample Data

Create some sample fundamental data:

In [None]:
# Generate sample data
dates = pd.bdate_range('2020-01-01', '2023-12-31')
sids = list(range(1, 11))  # 10 stocks

np.random.seed(42)

# Create DataFrames for each metric
pe_ratios = pd.DataFrame(
    np.random.uniform(5, 30, (len(dates), len(sids))),
    index=dates,
    columns=sids
)

market_caps = pd.DataFrame(
    np.random.uniform(1e9, 1e11, (len(dates), len(sids))),
    index=dates,
    columns=sids
)

revenue_growth = pd.DataFrame(
    np.random.uniform(-0.1, 0.3, (len(dates), len(sids))),
    index=dates,
    columns=sids
)

debt_ratios = pd.DataFrame(
    np.random.uniform(0, 2, (len(dates), len(sids))),
    index=dates,
    columns=sids
)

print(f"Generated data for {len(dates)} dates and {len(sids)} stocks")
print(f"\nSample PE Ratios:")
print(pe_ratios.head())

## 3. Visualize the Data

In [None]:
# Plot PE ratios over time
fig, axes = plt.subplots(2, 2, figsize=(15, 10))

# PE Ratios
pe_ratios[sids[:5]].plot(ax=axes[0, 0], title='PE Ratios (First 5 Stocks)')
axes[0, 0].set_ylabel('PE Ratio')
axes[0, 0].legend(title='Stock ID', loc='best')

# Market Cap
(market_caps[sids[:5]] / 1e9).plot(ax=axes[0, 1], title='Market Cap (First 5 Stocks)')
axes[0, 1].set_ylabel('Market Cap (Billions)')
axes[0, 1].legend(title='Stock ID', loc='best')

# Revenue Growth
(revenue_growth[sids[:5]] * 100).plot(ax=axes[1, 0], title='Revenue Growth % (First 5 Stocks)')
axes[1, 0].set_ylabel('Growth %')
axes[1, 0].axhline(y=0, color='r', linestyle='--', alpha=0.3)
axes[1, 0].legend(title='Stock ID', loc='best')

# Debt Ratio
debt_ratios[sids[:5]].plot(ax=axes[1, 1], title='Debt Ratio (First 5 Stocks)')
axes[1, 1].set_ylabel('Debt/Equity Ratio')
axes[1, 1].axhline(y=1.0, color='r', linestyle='--', alpha=0.3, label='1.0 threshold')
axes[1, 1].legend(title='Stock ID', loc='best')

plt.tight_layout()
plt.show()

## 4. Create a Pipeline with Custom Data

In [None]:
# Create a value investing pipeline
value_pipeline = Pipeline(
    columns={
        'pe_ratio': FundamentalData.pe_ratio.latest,
        'market_cap': FundamentalData.market_cap.latest,
        'revenue_growth': FundamentalData.revenue_growth.latest,
        'debt_ratio': FundamentalData.debt_ratio.latest,
        
        # Computed columns
        'is_undervalued': FundamentalData.pe_ratio.latest < 15,
        'is_growing': FundamentalData.revenue_growth.latest > 0.1,
        'is_stable': FundamentalData.debt_ratio.latest < 1.0,
        
        # Combined value score
        'is_value_stock': (
            (FundamentalData.pe_ratio.latest < 15) &
            (FundamentalData.revenue_growth.latest > 0.1) &
            (FundamentalData.debt_ratio.latest < 1.0)
        ),
    },
    screen=(
        (FundamentalData.pe_ratio.latest < 20) &
        (FundamentalData.revenue_growth.latest > 0.05)
    )
)

print("Value investing pipeline created!")
print(f"Columns: {list(value_pipeline.columns.keys())}")

## 5. Summary Statistics

In [None]:
# Calculate summary statistics
summary = pd.DataFrame({
    'PE Ratio': pe_ratios.describe().loc[['mean', 'std', 'min', '25%', '50%', '75%', 'max']].iloc[:, 0],
    'Market Cap (B)': (market_caps / 1e9).describe().loc[['mean', 'std', 'min', '25%', '50%', '75%', 'max']].iloc[:, 0],
    'Revenue Growth': revenue_growth.describe().loc[['mean', 'std', 'min', '25%', '50%', '75%', 'max']].iloc[:, 0],
    'Debt Ratio': debt_ratios.describe().loc[['mean', 'std', 'min', '25%', '50%', '75%', 'max']].iloc[:, 0],
})

print("\nSummary Statistics:")
print(summary.round(2))

## 6. Find Value Stocks

Let's identify value stocks based on our criteria:

In [None]:
# Use the latest data point
latest_date = dates[-1]

# Get latest metrics
latest_metrics = pd.DataFrame({
    'PE Ratio': pe_ratios.loc[latest_date],
    'Market Cap (B)': market_caps.loc[latest_date] / 1e9,
    'Revenue Growth': revenue_growth.loc[latest_date],
    'Debt Ratio': debt_ratios.loc[latest_date],
})

# Apply value criteria
latest_metrics['Is Undervalued'] = latest_metrics['PE Ratio'] < 15
latest_metrics['Is Growing'] = latest_metrics['Revenue Growth'] > 0.1
latest_metrics['Is Stable'] = latest_metrics['Debt Ratio'] < 1.0
latest_metrics['Is Value Stock'] = (
    latest_metrics['Is Undervalued'] & 
    latest_metrics['Is Growing'] & 
    latest_metrics['Is Stable']
)

print(f"\nLatest Metrics (as of {latest_date.date()}):")
print(latest_metrics.round(2))

value_stocks = latest_metrics[latest_metrics['Is Value Stock']]
print(f"\n\n✓ Found {len(value_stocks)} value stocks:")
if len(value_stocks) > 0:
    print(value_stocks[['PE Ratio', 'Revenue Growth', 'Debt Ratio']].round(2))
else:
    print("No stocks meet all value criteria with current random data.")

## Next Steps

Try these other notebooks:

1. **`02_database_storage.ipynb`** - Learn about persistent database storage
2. **`03_advanced_pipelines.ipynb`** - Build complex pipelines with custom factors
3. **`04_real_data_example.ipynb`** - Use real market data

## Learn More

- [CustomData Documentation](../docs/CUSTOM_DATA.md)
- [Database Storage Guide](../docs/CUSTOM_DATA_DATABASE.md)
- [Examples Directory](../examples/)