In [41]:
# Imports
import pandas as pd
import numpy as np
from quantopian.pipeline import Pipeline
from quantopian.research import run_pipeline
from zipline.pipeline.data import USEquityPricing
from quantopian.pipeline.filters import Q1500US

from quantopian.pipeline.factors import AverageDollarVolume
from zipline.utils.calendars import get_calendar
#from pipline.engine import SimplePipelineEngine
#from zipline.pipline.loaders import USEquityPricingLoader


from quantopian.pipeline.factors import Returns
from zipline.pipeline.factors import DailyReturns

from quantopian.pipeline.data import morningstar as mstar

#from zipline.pipeline.factors import SimpleMovingAverage

trading_calendar = get_calendar('NYSE')

In [42]:
#universe = AverageDollarVolume(window_length = 1).top(10)
##universe = AverageDollarVolume(window_length = 60) > 50000
#universe

In [67]:
# Equities for which morningstar's most recent Market Cap value is above $20b.
have_market_cap = (mstar.valuation.market_cap.latest > 20000000000)

In [68]:
#universe1 = Q1500US

In [69]:
# create an empty pipeline with the Q500US screen
#pipeline = Pipeline(screen=universe)
pipeline = Pipeline(screen=have_market_cap)

In [70]:
# create daily returns factor
daily_ret = DailyReturns(inputs=[USEquityPricing.close])

#daily_ret = DailyReturns(inputs=[universe.close])

### Add factors and filters to the pipeline
in the code below, add the factors and filters you created above to the pipeline

In [71]:
# Add the daily returns factor to the pipeline
pipeline.add(daily_ret, 'daily_return')


In [72]:
#pipeline.add(universe, 'universe')

This batch of code will create a datetime indexed dataframe showing the top10 stocks with the highest dollar volume over a 1 day timeframe.

**Note:** since average dollar volume is calculated daily based on the stock price * volume of shares traded this population of stocks may vary

In [75]:
# Set starting and end dates
start_date = pd.Timestamp('2019-07-24', tz='utc')
end_date = pd.Timestamp('2019-07-26', tz='utc')

# Run our pipeline for the given start and end dates
output = run_pipeline(pipeline, start_date, end_date)


# Display the pipeline output
output.head(50)



Unnamed: 0,Unnamed: 1,daily_return
2019-07-24 00:00:00+00:00,Equity(24 [AAPL]),0.007745
2019-07-24 00:00:00+00:00,Equity(1091 [BRK_A]),0.007579
2019-07-24 00:00:00+00:00,Equity(4151 [JNJ]),0.001632
2019-07-24 00:00:00+00:00,Equity(5061 [MSFT]),0.006213
2019-07-24 00:00:00+00:00,Equity(8229 [WMT]),-0.006648
2019-07-24 00:00:00+00:00,Equity(8347 [XOM]),0.003929
2019-07-24 00:00:00+00:00,Equity(11100 [BRK_B]),0.008007
2019-07-24 00:00:00+00:00,Equity(16841 [AMZN]),0.004305
2019-07-24 00:00:00+00:00,Equity(25006 [JPM]),0.018201
2019-07-24 00:00:00+00:00,Equity(26578 [GOOG_L]),0.0079


In [76]:
output.shape

(42, 1)

In [77]:
returns_df = output.daily_return.unstack().head().fillna(0)
returns_df

Unnamed: 0,Equity(24 [AAPL]),Equity(1091 [BRK_A]),Equity(4151 [JNJ]),Equity(5061 [MSFT]),Equity(8229 [WMT]),Equity(8347 [XOM]),Equity(11100 [BRK_B]),Equity(16841 [AMZN]),Equity(25006 [JPM]),Equity(26578 [GOOG_L]),Equity(35920 [V]),Equity(42950 [FB]),Equity(46631 [GOOG]),Equity(47740 [BABA])
2019-07-24 00:00:00+00:00,0.007745,0.007579,0.001632,0.006213,-0.006648,0.003929,0.008007,0.004305,0.018201,0.0079,0.001828,0.000445,0.007283,0.024425
2019-07-25 00:00:00+00:00,-0.000742,0.008599,0.007062,0.010266,-0.000892,0.0,0.007363,0.003294,0.003867,-0.006923,0.013545,0.011167,-0.007309,0.001515
2019-07-26 00:00:00+00:00,-0.007763,-0.00141,0.010712,-0.003766,0.00259,-0.005704,-0.000289,-0.013003,-0.008732,-0.002219,-0.008891,-0.01935,-0.003388,-0.00689


In [78]:
returns_df.shape

(3, 14)

In [79]:
ann_factor = 252
cov_assets  = ann_factor*np.cov(returns_df.T)

In [80]:
cov_assets

array([[ 0.01519819,  0.00844386, -0.00892614,  0.00919295, -0.00909661,
         0.00935762,  0.00788984,  0.0164407 ,  0.02636853,  0.01048769,
         0.00942115,  0.0180709 ,  0.01099613,  0.03104352],
       [ 0.00844386,  0.00764564, -0.00472993,  0.00984045, -0.00470577,
         0.00586708,  0.00632108,  0.01334306,  0.01485199,  0.0012079 ,
         0.01398058,  0.02076186,  0.00175588,  0.01437593],
       [-0.00892614, -0.00472993,  0.00526026, -0.00503185,  0.0053696 ,
        -0.00544402, -0.00448345, -0.00932924, -0.01547097, -0.00651805,
        -0.0048544 , -0.00978122, -0.00679606, -0.01845519],
       [ 0.00919295,  0.00984045, -0.00503185,  0.01314297, -0.00494453,
         0.00673055,  0.00787646,  0.01668728,  0.0162733 , -0.00105591,
         0.01971052,  0.02810752, -0.00032303,  0.01417737],
       [-0.00909661, -0.00470577,  0.0053696 , -0.00494453,  0.00548564,
        -0.0055221 , -0.00449399, -0.0093443 , -0.0157586 , -0.00682153,
        -0.00460816, -0.00

In [81]:
cov_assets.shape

(14, 14)

In [15]:
# import seaborn as sns

In [16]:
# # view a heatmap of the covariance matrix
# sns.heatmap(cov_assets, cmap = 'Paired')
# # if the colors are not distinctive, please try a couple of these color schemes
# cmap = 'tab10'