In [12]:
import pandas as pd
import numpy as np
from collections import OrderedDict

import zipline
import alphalens
import pyfolio

from zipline.data import bundles
from zipline.pipeline import Pipeline
from zipline.utils.calendars import get_calendar
from zipline.pipeline.engine import SimplePipelineEngine
from zipline.pipeline.factors import CustomFactor, SimpleBeta, DailyReturns, AverageDollarVolume

from zipline.data.bundles import register
from zipline.data.bundles.csvdir import csvdir_equities

from zipline.pipeline.data import USEquityPricing
from zipline.pipeline.loaders import USEquityPricingLoader
from zipline.assets._assets import Equity
from zipline.api import symbol

In [10]:
"""
Custom Bundle for Loading the SEP daily stock dataset from Sharadar, from a dump.

Created by Peter Harrington (pbharrin) on 3/8/18.
"""


import pandas as pd
from zipline.utils.calendars import get_calendar
import sys


METADATA_HEADERS = ['start_date', 'end_date', 'auto_close_date',
                    'symbol', 'exchange', 'asset_name']


def check_for_abnormal_returns(df, thresh=3.0):
    """Checks to see if any days have abnormal returns"""
    returns = df['close'].pct_change()
    abnormal_rets = returns[returns > thresh]
    if abnormal_rets.shape[0] > 0:
        sys.stderr.write('Abnormal returns for: {}\n'.format(df.ix[0]['ticker']))
        sys.stderr.write('{}\n'.format(str(abnormal_rets)))


def from_sep_dump(file_name, start=None, end=None):
    """
    ticker,date,open,high,low,close,volume,dividends,lastupdated
    A,2008-01-02,36.67,36.8,36.12,36.3,1858900.0,0.0,2017-11-01

    To use this make your ~/.zipline/extension.py look similar this:

    from zipline.data.bundles import register
    from alphacompiler.data.loaders.sep_quandl import from_sep_dump

    register("sep",
         from_sep_dump("/path/to/your/SEP/dump/SHARADAR_SEP_69.csv"),)

    """
    us_calendar = get_calendar("NYSE").all_sessions
    ticker2sid_map = {}

    def ingest(environ,
               asset_db_writer,
               minute_bar_writer,  # unused
               daily_bar_writer,
               adjustment_writer,
               calendar,
               cache,
               show_progress,
               output_dir,
               # pass these as defaults to make them 'nonlocal' in py2
               start=start,
               end=end):

        print("starting ingesting data from: {}".format(file_name))

        # read in the whole dump (will require ~7GB of RAM)
        df = pd.read_csv(file_name, index_col='date',
                         parse_dates=['date'], na_values=['NA'])

        # drop unused columns, dividends will be used later
        df = df.drop(['lastupdated', 'dividends', 'closeunadj'], axis=1)

        # counter of valid securites, this will be our primary key
        sec_counter = 0
        data_list = []  # list to send to daily_bar_writer
        metadata_list = []  # list to send to asset_db_writer (metadata)

        # iterate over all the unique securities and pack data, and metadata
        # for writing
        for tkr, df_tkr in df.groupby('ticker'):
            df_tkr = df_tkr.sort_index()

            row0 = df_tkr.ix[0]  # get metadata from row

            print(" preparing {}".format(row0["ticker"]))
            check_for_abnormal_returns(df_tkr)

            # check to see if there are missing dates in the middle
            this_cal = us_calendar[(us_calendar >= df_tkr.index[0]) & (us_calendar <= df_tkr.index[-1])]
            if len(this_cal) != df_tkr.shape[0]:
                print('MISSING interstitial dates for: %s using forward fill' % row0["ticker"])
                print('number of dates missing: {}'.format(len(this_cal) - df_tkr.shape[0]))
                df_desired = pd.DataFrame(index=this_cal.tz_localize(None))
                df_desired = df_desired.join(df_tkr)
                df_tkr = df_desired.fillna(method='ffill')

            # update metadata; 'start_date', 'end_date', 'auto_close_date',
            # 'symbol', 'exchange', 'asset_name'
            metadata_list.append((df_tkr.index[0],
                                  df_tkr.index[-1],
                                  df_tkr.index[-1] + pd.Timedelta(days=1),
                                  row0["ticker"],
                                  "SEP",  # all have exchange = SEP
                                  row0["ticker"]  # TODO: can we delete this?
                                  )
                                 )

            # drop metadata columns
            df_tkr = df_tkr.drop(['ticker'], axis=1)

            # pack data to be written by daily_bar_writer
            data_list.append((sec_counter, df_tkr))
            ticker2sid_map[tkr] = sec_counter  # record the sid for use later
            sec_counter += 1

        print("writing data for {} securities".format(len(metadata_list)))
        daily_bar_writer.write(data_list, show_progress=False)

        # write metadata
        asset_db_writer.write(equities=pd.DataFrame(metadata_list,
                                                    columns=METADATA_HEADERS))
        print("a total of {} securities were loaded into this bundle".format(
            sec_counter))

        # read in Dividend History
        dfd = pd.read_csv(file_name, index_col='date',
                         parse_dates=['date'], na_values=['NA'])
        # drop rows where dividends == 0.0
        dfd = dfd[dfd["dividends"] != 0.0]
        dfd = dfd.dropna()

        dfd.loc[:, 'ex_date'] = dfd.loc[:, 'record_date'] = dfd.index
        dfd.loc[:, 'declared_date'] = dfd.loc[:, 'pay_date'] = dfd.index
        dfd.loc[:, 'sid'] = dfd.loc[:, 'ticker'].apply(lambda x: ticker2sid_map[x])
        dfd = dfd.rename(columns={'dividends': 'amount'})
        dfd = dfd.drop(['open', 'high', 'low', 'close', 'volume', 'lastupdated', 'ticker', 'closeunadj'], axis=1)

        # # format dfd to have sid
        adjustment_writer.write(dividends=dfd)

    return ingest


In [37]:
zipline.assets.Equity(8554,'NYSE',symbol='SPY')

Equity(8554 [SPY])

In [2]:
from zipline.pipeline import CustomFactor, Pipeline  
from zipline.pipeline.data import USEquityPricing  
from zipline.pipeline.engine import PipelineEngine 

In [13]:

from zipline.pipeline.engine import SimplePipelineEngine
  # Required for USEquityPricing
from zipline.pipeline import Pipeline

def register_data(start_date, end_date, bundle_name, address):

    start_session = pd.Timestamp(start_date, tz='utc')
    end_session = pd.Timestamp(end_date, tz='utc')

    register(bundle_name, csvdir_equities(['daily'],address,),
    calendar_name='NYSE', start_session=start_session,
    end_session=end_session)


class PricingLoader(object):
    def __init__(self, bundle_data):
        self.loader = USEquityPricingLoader(
            bundle_data.equity_daily_bar_reader,
            bundle_data.adjustment_reader)

    def get_loader(self, column):
        if column not in USEquityPricing.columns:
            raise Exception('Column not in USEquityPricing')
        return self.loader

def build_pipeline_engine(bundle_data, trading_calendar):
    pricing_loader = PricingLoader(bundle_data)

    engine = SimplePipelineEngine(
        get_loader=pricing_loader.get_loader,
        calendar=trading_calendar.all_sessions,
        asset_finder=bundle_data.asset_finder)

    return engine

# Loading stock list from file
def stock_list(file_name):
    all_stocks = []
    with open(file_name, 'r') as f:
        for line in f:
            # remove linebreak which is the last character of the string
            currentPlace = line[:-1]
            # add item to the list
            all_stocks.append(currentPlace)
        return all_stocks

def get_universe_tickers(engine, universe, end_date):
    universe_end_date = pd.Timestamp(end_date, tz='UTC')

    universe_tickers = engine \
        .run_pipeline(
        Pipeline(screen=universe),
        universe_end_date,
        universe_end_date) \
        .index.get_level_values(1) \
        .values.tolist()

    return universe_tickers

In [30]:
universe = AverageDollarVolume(window_length=120).top(50)

In [4]:
import os 
load_extensions( default=True, extensions=[], strict=True, environ=os.environ, )

NameError: name 'load_extensions' is not defined

In [8]:
help(bundles.register)

Help on curry in module zipline.data.bundles.core:

register(name='__no__default__', f='__no__default__', calendar_name='NYSE', start_session=None, end_session=None, minutes_per_day=390, create_writers=True)
    Register a data bundle ingest function.
    
    Parameters
    ----------
    name : str
        The name of the bundle.
    f : callable
        The ingest function. This function will be passed:
    
          environ : mapping
              The environment this is being run with.
          asset_db_writer : AssetDBWriter
              The asset db writer to write into.
          minute_bar_writer : BcolzMinuteBarWriter
              The minute bar writer to write into.
          daily_bar_writer : BcolzDailyBarWriter
              The daily bar writer to write into.
          adjustment_writer : SQLiteAdjustmentWriter
              The adjustment db writer to write into.
          calendar : trading_calendars.TradingCalendar
              The trading calendar to ingest for.

In [14]:
universe = AverageDollarVolume(window_length=120).top(1500)
trading_calendar = get_calendar('NYSE') 
ingest_func = bundles.csvdir.csvdir_equities(['daily'], 'sep')
bundles.register('sep', from_sep_dump('.'))
bundle_data = bundles.load('sep')
engine = build_pipeline_engine(bundle_data, trading_calendar)

  after removing the cwd from sys.path.


In [32]:
def make_pipeline(factors, universe):
    factors_pipe = OrderedDict()
        
    for name, f in factors.items():
        factors_pipe[name] = f
                    
    pipe = Pipeline(screen=universe, columns=factors_pipe)
    
    return pipe


def make_factors():
    
    def BetaToSPY(): 
        return SimpleBeta(target=Equity(8554,'NYSE'),regression_length=260) 

    class AverageMarketReturn(CustomFactor):
            inputs = [DailyReturns()]
            window_length = 1
            window_safe = True
            mask = universe

            def compute(self, today, assets, out, returns):
                # returns are days in rows, assets across columns
                out[:] = np.nanmean(returns)

    def Market_style():
        return BetaToSPY() * AverageMarketReturn()
    
    #def Market_style():
    #    return DailyReturns()
    
    all_factors = {
        'Market_style': Market_style(),
    }
    
    return all_factors

In [20]:
factor_start_date = pd.Timestamp('2014-01-03', tz='UTC')
universe_end_date = pd.Timestamp('2020-02-03', tz='UTC')

In [33]:
universe_tickers = engine\
    .run_pipeline(
        Pipeline(screen=universe),
        universe_end_date,
        universe_end_date)\
    .index.get_level_values(1)\
    .values.tolist()
    
universe_tickers

[Equity(32 [AAPL]),
 Equity(240 [ADBE]),
 Equity(770 [AMD]),
 Equity(877 [AMZN]),
 Equity(1465 [AVGO]),
 Equity(1590 [BA]),
 Equity(1593 [BABA]),
 Equity(1595 [BAC]),
 Equity(1996 [BKNG]),
 Equity(2092 [BMY]),
 Equity(2244 [BRK.B]),
 Equity(2442 [BYND]),
 Equity(2456 [C]),
 Equity(3345 [CMCSA]),
 Equity(3772 [CRM]),
 Equity(3848 [CSCO]),
 Equity(4073 [CVX]),
 Equity(4378 [DHR]),
 Equity(4412 [DIS]),
 Equity(5536 [FB]),
 Equity(6316 [GE]),
 Equity(6642 [GOOG]),
 Equity(6643 [GOOGL]),
 Equity(7027 [HD]),
 Equity(7971 [INTC]),
 Equity(8342 [JNJ]),
 Equity(8364 [JPM]),
 Equity(9283 [MA]),
 Equity(9433 [MCD]),
 Equity(10021 [MRK]),
 Equity(10085 [MSFT]),
 Equity(10196 [MU]),
 Equity(10511 [NFLX]),
 Equity(10700 [NOW]),
 Equity(10915 [NVDA]),
 Equity(11726 [PFE]),
 Equity(11758 [PG]),
 Equity(12471 [PYPL]),
 Equity(12499 [QCOM]),
 Equity(13021 [ROKU]),
 Equity(13674 [SHOP]),
 Equity(14647 [T]),
 Equity(15389 [TSLA]),
 Equity(15584 [UBER]),
 Equity(15712 [UNH]),
 Equity(15875 [V]),
 Equity(16

In [36]:
pipe= make_pipeline(make_factors(),universe=universe)
engine.run_pipeline(pipeline=pipe, start_date=factor_start_date, end_date=universe_end_date)

NonExistentAssetInTimeFrame: The target asset 'Equity(8554)' does not exist for the entire timeframe between 2012-12-21 00:00:00+00:00 and 2020-02-03 00:00:00+00:00.

In [54]:
bundle_data = bundles.load('quantopian-quandl')
trading_calendar = get_calendar('NYSE')
engine = build_pipeline_engine(bundle_data, trading_calendar)

In [24]:
Equity(8554,'NYSE')

Equity(8554)