In [2]:
# imports
# https://classroom.udacity.com/nanodegrees/nd880/parts/9a3a9589-7bc6-4694-81e0-8c3cb1aea251/modules/1976c245-f4ec-42bf-9611-180753a3a4df/lessons/eb063d38-d314-46bc-aeb5-12288aa5249c/concepts/11914781-4bb1-4c91-a0a3-b124159f9ff2
import pandas as pd
import numpy as np
from quantopian.pipeline import Pipeline
from quantopian.research import run_pipeline
from zipline.pipeline.data import USEquityPricing
from quantopian.pipeline.filters import Q1500US

from quantopian.pipeline.factors import AverageDollarVolume
from zipline.utils.calendars import get_calendar

from quantopian.pipeline.factors import Returns
from zipline.pipeline.factors import DailyReturns
from quantopian.pipeline.data import morningstar as mstar

trading_calendar = get_calendar('NYSE')

In [3]:
# Equities having a morningstar most recent market cap above $20B
have_market_cap = (mstar.valuation.market_cap.latest > 20000000000)

In [4]:
# create an empty pipeline with have_market_cap screen
pipeline = Pipeline(screen = have_market_cap)

In [5]:
# create daily returns factor
daily_ret = DailyReturns(inputs = [USEquityPricing.close])

### Add factors and filters to the pipeline
in the code below we add the daily_ret to the pipeline created above

In [6]:
# add the daily returns factor to the pipeline
pipeline.add(daily_ret, 'daily_return')

# set the starting and end dates
start_date = pd.Timestamp('2014-01-01', tz='utc')
end_date =pd.Timestamp('2019-07-26', tz = 'utc')

# Run our pipeline for the given start and end dates
output = run_pipeline(pipeline, start_date, end_date)

# display teh pipeline output
output.head()



Unnamed: 0,Unnamed: 1,daily_return
2014-01-02 00:00:00+00:00,Equity(24 [AAPL]),0.012011
2014-01-02 00:00:00+00:00,Equity(62 [ABT]),-0.001562
2014-01-02 00:00:00+00:00,Equity(64 [GOLD]),0.029206
2014-01-02 00:00:00+00:00,Equity(114 [ADBE]),0.005542
2014-01-02 00:00:00+00:00,Equity(128 [ADM]),-0.006409


In [7]:
returns_df = output.daily_return.unstack().fillna(0)
returns_df.head()

Unnamed: 0,Equity(2 [ARNC]),Equity(24 [AAPL]),Equity(53 [ABMD]),Equity(62 [ABT]),Equity(64 [GOLD]),Equity(67 [ADSK]),Equity(76 [TAP]),Equity(114 [ADBE]),Equity(122 [ADI]),Equity(128 [ADM]),...,Equity(52592 [LIN]),Equity(52709 [TME]),Equity(52747 [DELL]),Equity(52968 [FOXA]),Equity(52969 [FOX]),Equity(52991 [DOW]),Equity(53023 [LYFT]),Equity(53095 [ZM]),Equity(53158 [UBER]),Equity(53196 [CTVA])
2014-01-02 00:00:00+00:00,0.0,0.012011,0.0,-0.001562,0.029206,0.0,0.0,0.005542,0.0,-0.006409,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2014-01-03 00:00:00+00:00,0.0,-0.014131,0.0,-0.002608,0.03916,0.0,0.0,-0.009855,0.0,-0.009445,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2014-01-06 00:00:00+00:00,0.0,-0.022034,0.0,0.010199,-0.009285,0.0,0.0,-0.002024,0.0,0.004186,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2014-01-07 00:00:00+00:00,0.0,0.005377,0.0,0.013461,0.012128,0.0,0.0,-0.017918,0.0,0.002547,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2014-01-08 00:00:00+00:00,0.0,-0.007205,0.0,-0.007663,-0.004357,0.0,0.0,0.014802,0.0,-0.010164,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [30]:
from quantopian.pipeline.data.morningstar import asset_classification, valuation
from quantopian.pipeline.classifiers.morningstar import Sector
#from quantopian.pipeline.classifiers import Classifier
#from zipline.utils.numpy_utils import int64_dtype
class Sector(Classifier):
    dtype = 'int64'
    window_length = 0
    inputs = ()
    missing_value = -1

    def __init__(self):
        self.data = output

    def _compute(self, arrays, dates, assets, mask):
        return np.where(
            mask,
            self.data[assets],
            self.missing_value,
        )

In [31]:
# from quantopian.pipeline.data.morningstar import asset_classification, valuation
# from quantopian.pipeline.classifiers.morningstar import Sector

# # These produce the same data, but Sector has symbolic constants and hand-written docs:
# sector_generic = asset_classification.morningstar_sector_code
# #sector_builtin = Sector()
# sector = Sector()

In [32]:
sector = Sector()
sector

Sector([], 0)

In [33]:
len(sector.data)

564122

In [35]:
sector.data

Unnamed: 0,Unnamed: 1,daily_return
2014-01-02 00:00:00+00:00,Equity(24 [AAPL]),0.012011
2014-01-02 00:00:00+00:00,Equity(62 [ABT]),-0.001562
2014-01-02 00:00:00+00:00,Equity(64 [GOLD]),0.029206
2014-01-02 00:00:00+00:00,Equity(114 [ADBE]),0.005542
2014-01-02 00:00:00+00:00,Equity(128 [ADM]),-0.006409
2014-01-02 00:00:00+00:00,Equity(157 [AEG]),0.006369
2014-01-02 00:00:00+00:00,Equity(161 [AEP]),0.002144
2014-01-02 00:00:00+00:00,Equity(168 [AET]),0.006457
2014-01-02 00:00:00+00:00,Equity(185 [AFL]),0.001574
2014-01-02 00:00:00+00:00,Equity(205 [AGN]),0.003342


In [29]:
unique, counts = np.unique(sector.data, return_counts = True)
print(np.asarray((unique, counts)).T)

[[ -0.94728044   1.        ]
 [ -0.90074813   1.        ]
 [ -0.83963801   1.        ]
 ..., 
 [  0.93710339   1.        ]
 [  0.95406673   1.        ]
 [ 18.03812163   1.        ]]
