In [1]:
# imports
# https://classroom.udacity.com/nanodegrees/nd880/parts/9a3a9589-7bc6-4694-81e0-8c3cb1aea251/modules/1976c245-f4ec-42bf-9611-180753a3a4df/lessons/de470330-cbcf-401c-9b5e-e72129b2e34d/concepts/cd2aef83-e1a4-4eac-865d-f90e87be0246
import pandas as pd
import numpy as np
from quantopian.pipeline import Pipeline
from quantopian.research import run_pipeline
from zipline.pipeline.data import USEquityPricing
from quantopian.pipeline.filters import Q1500US

from quantopian.pipeline.factors import AverageDollarVolume
from zipline.utils.calendars import get_calendar

from quantopian.pipeline.factors import Returns
from zipline.pipeline.factors import DailyReturns
from quantopian.pipeline.data import morningstar as mstar

trading_calendar = get_calendar('NYSE')

In [2]:
# Equities which have a morningstar most recent market cap above $20B
have_market_cap = (mstar.valuation.market_cap.latest > 20000000000)

In [3]:
# create an empty pipeline with have_market_cap screen
pipeline = Pipeline(screen=have_market_cap)

In [4]:
# create daily returns factor
daily_ret = DailyReturns(inputs = [USEquityPricing.close])

#### Add factors and filters to the pipeline
in the code below we add the daily_ret to the pipeline created above

In [5]:
# add the daily returns factor to the pipeline
pipeline.add(daily_ret, 'daily_return')

# set the starting and end dates
start_date = pd.Timestamp('2014-01-01', tz='utc')
end_date = pd.Timestamp('2019-07-26', tz = 'utc')

# Run our pipeline for the given start and end dates
output = run_pipeline(pipeline, start_date, end_date)

# display the pipeline output
output.head()



Unnamed: 0,Unnamed: 1,daily_return
2014-01-02 00:00:00+00:00,Equity(24 [AAPL]),0.012011
2014-01-02 00:00:00+00:00,Equity(62 [ABT]),-0.001562
2014-01-02 00:00:00+00:00,Equity(64 [GOLD]),0.029206
2014-01-02 00:00:00+00:00,Equity(114 [ADBE]),0.005542
2014-01-02 00:00:00+00:00,Equity(128 [ADM]),-0.006409


Turn the output into a datetime indexed dataframe

In [7]:
returns_df = output.daily_return.unstack().fillna(0)
returns_df

Unnamed: 0,Equity(2 [ARNC]),Equity(24 [AAPL]),Equity(53 [ABMD]),Equity(62 [ABT]),Equity(64 [GOLD]),Equity(67 [ADSK]),Equity(76 [TAP]),Equity(114 [ADBE]),Equity(122 [ADI]),Equity(128 [ADM]),...,Equity(52592 [LIN]),Equity(52709 [TME]),Equity(52747 [DELL]),Equity(52968 [FOXA]),Equity(52969 [FOX]),Equity(52991 [DOW]),Equity(53023 [LYFT]),Equity(53095 [ZM]),Equity(53158 [UBER]),Equity(53196 [CTVA])
2014-01-02 00:00:00+00:00,0.0,0.012011,0.0,-0.001562,0.029206,0.000000,0.0,0.005542,0.000000,-0.006409,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000
2014-01-03 00:00:00+00:00,0.0,-0.014131,0.0,-0.002608,0.039160,0.000000,0.0,-0.009855,0.000000,-0.009445,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000
2014-01-06 00:00:00+00:00,0.0,-0.022034,0.0,0.010199,-0.009285,0.000000,0.0,-0.002024,0.000000,0.004186,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000
2014-01-07 00:00:00+00:00,0.0,0.005377,0.0,0.013461,0.012128,0.000000,0.0,-0.017918,0.000000,0.002547,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000
2014-01-08 00:00:00+00:00,0.0,-0.007205,0.0,-0.007663,-0.004357,0.000000,0.0,0.014802,0.000000,-0.010164,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000
2014-01-09 00:00:00+00:00,0.0,0.006407,0.0,0.009009,-0.018600,0.000000,0.0,-0.001187,0.000000,-0.011435,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000
2014-01-10 00:00:00+00:00,0.0,-0.012861,0.0,0.001786,-0.011706,0.000000,0.0,0.003566,0.000000,-0.004485,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000
2014-01-13 00:00:00+00:00,0.0,-0.006673,0.0,0.007894,0.025381,0.000000,0.0,0.007445,0.000000,-0.004624,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000
2014-01-14 00:00:00+00:00,0.0,0.005048,0.0,-0.006352,0.000000,0.000000,0.0,-0.015536,0.000000,-0.010602,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000
2014-01-15 00:00:00+00:00,0.0,0.020127,0.0,0.011762,-0.020902,0.000000,0.0,0.029771,0.000000,0.019263,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000


#### Lets look at a two stock portfolio
Let's pretend we have a protfolio of 2 stocks. We'll pick Apple and Adobe in this example

In [8]:
aapl_col = returns_df.columns[1]
adbe_col = returns_df.columns[7]
asset_return_1 = returns_df[aapl_col].rename('asset_return_aapl')
asset_return_2 = returns_df[adbe_col].rename('asset_return_adbe')
asset_return_df = pd.concat([asset_return_1, asset_return_2], axis = 1)
asset_return_df.head(2)

Unnamed: 0,asset_return_aapl,asset_return_adbe
2014-01-02 00:00:00+00:00,0.012011,0.005542
2014-01-03 00:00:00+00:00,-0.014131,-0.009855


#### Factor returns
let's make up a factor by taking an average of all stocks in our list. You can think of htis as an equal weighted index of X stocks, kind of like a measure of the "market". We'll also make another factor  by calculating the median of all stocks. These are mainly intended ot help us generate some data to work with. We'll go into ho some common risk facotrs are generated later in the lessons.

Also note that we're settign axis = 1 so that we calcualte a value for each time period (row) instead of one value for each column (assets).

In [37]:
factor_return_1 = returns_df.mean(axis=1)
factor_return_2 = returns_df.median(axis=1)
factor_return_l = [factor_return_1, factor_return_2]
factor_return_l

[2014-01-02 00:00:00+00:00    0.002703
 2014-01-03 00:00:00+00:00   -0.006197
 2014-01-06 00:00:00+00:00   -0.000250
 2014-01-07 00:00:00+00:00   -0.001847
 2014-01-08 00:00:00+00:00    0.002682
 2014-01-09 00:00:00+00:00    0.000004
 2014-01-10 00:00:00+00:00   -0.000018
 2014-01-13 00:00:00+00:00    0.002897
 2014-01-14 00:00:00+00:00   -0.006029
 2014-01-15 00:00:00+00:00    0.005186
 2014-01-16 00:00:00+00:00    0.001543
 2014-01-17 00:00:00+00:00   -0.000632
 2014-01-21 00:00:00+00:00   -0.001832
 2014-01-22 00:00:00+00:00    0.001305
 2014-01-23 00:00:00+00:00    0.001113
 2014-01-24 00:00:00+00:00   -0.005343
 2014-01-27 00:00:00+00:00   -0.012185
 2014-01-28 00:00:00+00:00   -0.003111
 2014-01-29 00:00:00+00:00    0.004655
 2014-01-30 00:00:00+00:00   -0.005620
 2014-01-31 00:00:00+00:00    0.006179
 2014-02-03 00:00:00+00:00   -0.003338
 2014-02-04 00:00:00+00:00   -0.011407
 2014-02-05 00:00:00+00:00    0.004164
 2014-02-06 00:00:00+00:00   -0.000451
 2014-02-07 00:00:00+00:0

#### Factor Exposures
Factor exposures refer to how "exposed" a stock is to each factor. We'll get into this more later. For now, just think of this as one number for each stock, for each of the factors.

In [15]:
from sklearn.linear_model import LinearRegression

In [16]:
"""
for now, just assume that we're calculating a number for each
stock, for each factor, which represents how "exposed" each stock is 
to each factor.
We'll discuss how each factor exposure is calculated later in the lessons.
"""

def get_factor_exposures(factor_return_l, asset_return):
    lr = LinearRegression()
    X = np.array(factor_return_l).T
    y = np.array(asset_return.values)
    lr.fit(X,y)
    return lr.coef_

In [17]:
factor_exposure_l = []
for i in range(len(asset_return_df.columns)):
    factor_exposure_l.append(
    get_factor_exposures(factor_return_l,
                        asset_return_df[asset_return_df.columns[i]])
    )
    
factor_exposure_a = np.array(factor_exposure_l)

In [18]:
#1
factor_exposure_a[0]

array([ 1.75569096,  0.35145681])

In [19]:
#2 
factor_exposure_a[1]

array([ 2.18677739,  0.13861425])

#### Variance of stock 1
$\textrm{Var}(r_1) = \beta_{1,1}^2\textrm{Var}(f_1) \beta_{1,2}^2\textrm{Var}(f2) + 2\beta_{1,1}\beta_{1,2}\textrm{Cov}(f_1,f_2) + \textrm{Var}(s1)$

In [20]:
factor_exposure_1_1 = factor_exposure_a[0][0]
factor_exposure_1_2 = factor_exposure_a[0][1]
common_return_1 = factor_exposure_1_1 * factor_return_1 + factor_exposure_1_2 * factor_return_2
specific_return_1 = asset_return_1 - common_return_1

In [21]:
covm_f1_f2 = np.cov(factor_return_1, factor_return_2, ddof=1) #this calculates a covariance matrix

# get the variance of each factor and covariances from the covariance matrix covm_f1_f2
var_f1 = covm_f1_f2[0,0]
var_f2 = covm_f1_f2[1,1]
cov_f1_f2 = covm_f1_f2[1][0]

# calculate the specific variance
var_s_1 = np.var(specific_return_1, ddof=1)

# calculate the variance of asset 1 in terms of the factors and specific variance
var_asset_1 = (factor_exposure_1_1 **2 * var_f1) +\
                (factor_exposure_1_2 ** 2 * var_f2) +\
                (2 * factor_exposure_1_1 * factor_exposure_1_2 * cov_f1_f2) +\
                var_s_1

var_asset_1

0.00023649037560283191

#### Variance of asset 2
$\textrm{Var}(r2) = \beta_{1,1}^2\textrm{Var}(f_1) + \beta_{2,2}^2\textrm{Var}(f_2) + 2\beta_{2,1}\beta_{2,2}\textrm{Cov}(f_1,f_2) + \textrm{Var}(s_2)$

In [23]:
factor_exposure_2_1 = factor_exposure_a[1][0]
factor_exposure_2_2 = factor_exposure_a[1][1]
common_return_2 = factor_exposure_2_1 * factor_return_1 + factor_exposure_2_2 * factor_return_2
specific_return_2 = asset_return_2 - common_return_2

In [25]:
# Notice we already calculated the variance and covariances of the factors

# calculate the specific variance of asset 2
var_s_2 = np.var(specific_return_2, ddof=1)

# calculate the variance of asset 2 in terms of the factors and specific variance
var_asset_2 = (factor_exposure_2_1 **2 * var_f1) +\
                (factor_exposure_2_2 ** 2 * var_f2) +\
                (2 * factor_exposure_2_1 * factor_exposure_2_2 * cov_f1_f2) +\
                (var_s_2)

var_asset_2

0.00027579873409666757

#### Covariance of stocks 1 and 2
$\textrm{Cov}(r1,r2) = \beta_{1,1}\beta_{2,1}\textrm{Var}(f1) + \beta_{1,1}\beta_{2,2}\textrm{Cov}(f1,f2) + \beta_{1,2}\beta_{2,1}\textrm{Cov}(f1,f2) + \beta_{1,2}\beta_{2,2}\textrm{Var}(f2)$

In [28]:
cov_asset_1_2 = (factor_exposure_1_1 * factor_exposure_2_1 * var_f1) +\
                (factor_exposure_1_1 * factor_exposure_2_2 * cov_f1_f2) +\
                (factor_exposure_1_2 * factor_exposure_2_1 * cov_f1_f2) +\
                (factor_exposure_1_2 * factor_exposure_2_2 * var_f2)

cov_asset_1_2

9.1434212281581845e-05

#### Quiz 1: Calculate the porfolio variance
we'll choose stock weights for now (in a later lesson, you'll learn how to use portfolio optimization that uses alpha factors and risk factor model to choose stock weights).

$
\textrm{Var}(r_p) = x_1^2\textrm{Var}(r_1) + x_2^2\textrm{Var}(r_2) + 2x_1x_2\textrm{Cov}(r_1, r_2)
$

In [29]:
weight_1 = 0.60
weight_2 = 0.40

#todo: calcualte portfolio variance
var_portfolio = (weight_1 ** 2 * var_asset_1) + (weight_2 ** 2 * var_asset_2) + (2 * weight_1 * weight_2 * cov_asset_1_2)

var_portfolio

0.0001731527545676456

#### Quiz 2: Do it with matrices
create  matrices $\mathbf{F}$, $\mathbf{B}$, and $\mathbf{S}$, where:

$
\mathbf{F} = \begin{pmatrix}
\textrm{Var}(f1) & \textrm{Cov}(f1,f2) \\
\textrm{Cov}(f2,f1) & \textrm{Var}(f2)
\end{pmatrix}
$
is the covariance matrix for factors,

$
\mathbf{B} = \begin{pmatrix}
\beta_{1,1} & \beta_{1,2} \\
\beta_{2,1} & \beta_{2,2}
\end{pmatrix}
$
is the matrix of factor exposures, and

$
\mathbf{S} = \begin{pmatrix}
\textrm{Var}(s_i) & 0 \\
0 & \textrm{Var}(s_j)
\end{pmatrix}
$
is the matrix of specific variances.

$
\mathbf(X) = \begin{pmatrix}
x_1 \\
x_2
\end{pmatrix}
$


##### Concept Question
What are teh dimisnions of the $\textrm{Var}(r_p)$ portfolio variance? Given this, when choosing whether to multiply a row vector or a column vector on the left and righ sides of $\mathbf{BFB}^T$, which choice helps you get the diminsions of the portfolio variance term?

In other words: Given that $\mathbf{X}$ is a column vector, which makes more sense?

$
\mathbf{X}^T(\mathbf{BFB}^T + \mathbf{S})\mathbf{X}
$

or

$
\mathbf{X}(\mathbf{BFB}^T + \mathbf{S})\mathbf{X}^T ?
$

#### Answer 2
Since the portfolio variance is 1 by 1 (it's a scalar), we want the matrix multiplications to create a 1 by 1 output as well. This means we shoudl put the row vector

$
\mathbf{X}^T = \begin{pmatrix}
x_{i} &  x_{j}
\end{pmatrix}
$

on the left, and put the column vector 

$
\mathbf{X} = \begin{pmatrix}
x_{i} \\
x_{j}
\end{pmatrix}
$

on the right

So we should use:
$
\mathbf{X}^T(\mathbf{BFB}^T + \mathbf{S})\mathbf{X}
$

### Quiz 3: Calculate portfolio variance using matrices

In [31]:
# todo: covariance matrix of factors
#factor_return_1 = mean
#factor_return_1 = median
#covm_f1_f2 = np.cov(factor_return_1, factor_return_2, ddof=1)
F = covm_f1_f2
F

array([[  2.22970027e-05,   5.58946490e-06],
       [  5.58946490e-06,   3.55342221e-06]])

In [32]:
# todo: matrix of factor exposures
#factor_return_l = [factor_return_1, factor_return_2]

# factor_exposure_l = []
# for i in range(len(asset_return_df.columns)):
#     factor_exposure_l.append(
#     get_factor_exposures(factor_return_l,
#                         asset_return_df[asset_return_df.columns[i]])
#     )
    
# factor_exposure_a = np.array(factor_exposure_l)

B = factor_exposure_a
B

array([[ 1.75569096,  0.35145681],
       [ 2.18677739,  0.13861425]])

In [33]:
# matrix of specific variables
S = np.diag([var_s_1, var_s_2])
S

array([[ 0.00016042,  0.        ],
       [ 0.        ,  0.00016572]])

In [34]:
# todo: make a column vector for stock weights matrix X
X = np.array([weight_1, weight_2]).reshape(2,1)
X

array([[ 0.6],
       [ 0.4]])

In [35]:
# todo: covariance matrix of assets
var_portfolio = X.T.dot(B.dot(F).dot(B.T) + S).dot(X)
var_portfolio

array([[ 0.00017315]])