In [None]:
from gs_quant.session import Environment, GsSession
# external users should substitute their client id and secret; please skip this step if using internal jupyterhub
GsSession.use(Environment.PROD, client_id=None, client_secret=None)

# Factor Models

The GS Quant `FactorRiskModel` class allows users to access vendor factor models such as Barra. The `FactorRiskModel` interface supports date-based querying of the factor risk model outputs such as factor returns, covariance matrix and specific risk for assets.

In this tutorial, we’ll look at pulling available risk models, their coverage and how to access factor returns and the covariance matrix for each. We also show how to pull factor loadings, specific risk and total risk for assets.

### Risk Model Coverage

The third party risk models that have been onboarded onto Marquee for programmatic access are below.

| Risk Model Name | Risk Model Id | Description|
|-----------------|---------------|-------------
| US Med          | BARRA_USMEDS  |Barra (MSCI) US Total Market Equity Model for Medium-Term Investors|
| US Long         | BARRA_USSLOWL |Barra (MSCI) US Total Market Equity Model for Long-Term Investors|
| World Long      | BARRA_GEMLTL  |Barra (MSCI) Global Total Market Equity Model for Long-Term Investors|
| US Long         | BARRA_USE4L   |Barra (MSCI) US Total Market Equity Model for Long-Term Investors with Volatility Regime Adjusted Specific Risk|
| US Fast         | BARRA_USFAST  |Barra (MSCI) US Trading Model For Short-Term Investors |

After selecting a risk model, we can create an instance of the risk model to pull information on the model coverage such as the available dates, asset coverage universe, available factors and model description. The `CoverageType` enum of the model indicates whether the scope of the universe is Global, Region or Country and the `Term` enum refers to the horizon of the model.

In [None]:
from gs_quant.models.risk_model import FactorRiskModel
model_id = 'BARRA_USMEDS'
factor_model = FactorRiskModel.get(model_id)

#Check available history for a factor model to decide start and end dates
available_days = factor_model.get_dates()
print(f'Data available for {model_id} from {available_days[0]} to {available_days[-1]}')
print(f'{model_id}:\n - Name: {factor_model.name}\n - Description: {factor_model.description}\n - Coverage: {factor_model.coverage.value}\n - Horizon: {factor_model.term.value}')
print(f'For all info https://marquee.gs.com/v1/risk/models/{model_id}')

### Query Factor Model Data

The following parameters are required for querying factor data:

* `start_date` - date or datetime that is a business day
* `end_date` - date or datetime that is a business day. If an end date is not specified, it will default to the last available date
* `limit_factor` - A boolean to limit output to only exposed factors. Set to False when not querying data for a particular asset.

##### Get Available Factors

For each model, we can retrieve a list of factors available. Each factor has a `name`, `id`, `type` and `factorCategory`.

A factor's `factorCategory` can be one of the following:
* Style - balance sheet and market metrics
* Industry - an asset's line of business (i.e. Barra uses GICS classification)
* Country - reference an asset’s exchange country location


In [None]:
import datetime as dt

available_factors = factor_model.get_factor_data(dt.date(2020, 1, 4)).set_index('identifier')
available_factors.sort_values(by=['factorCategory']).tail()

##### Get All Factor Returns

The factor returns represent the regression outputs of the model for each day. The definitions of each factor vary depending on the model. More details can be found in the [Marquee Data Catalog](https://marquee.gs.com/s/developer/datasets/BARRA_USSLOWL).

To query factor returns, we can either use `get_factor_returns_by_name` to retrieve the returns with names or `get_factor_returns_by_id` to get the returns with factor ids. We can leverage [the timeseries package](https://developer.gs.com/docs/gsquant/data/data-analytics/timeseries/) to transform and visualize the results.

In [None]:
from gs_quant.timeseries import beta
import matplotlib.pyplot as plt

factor_returns = factor_model.get_factor_returns_by_name(dt.date(2020, 1, 4))
fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(15, 5))

factor_returns[['Growth', 'Momentum', 'Size']].cumsum().plot(title='Factor Performance over Time for Risk Model', ax=ax[0])
factor_beta = beta(factor_returns['Growth'], factor_returns['Momentum'], 63, prices = False)
factor_beta.plot(title='3m Rolling Beta of Growth to Momentum', ax=ax[1])
plt.show()

##### Covariance Matrix

The covariance matrix represents an N-factor by N-factor matrix with the diagonal representing the variance of each factor for each day. The covariance matrix is in daily variance units.

In [None]:
cov_data = factor_model.get_covariance_matrix(dt.date(2021, 1, 4), dt.date(2021, 2, 26))*100

#get the last available matrix
round(cov_data.loc['2021-02-26'], 6)

##### Factor Correlation and Volatility

The `Factor` Class allows for quick analytics for a specified factor to easily support comparing one factor across different models or to another factor.

The factor volatility and correlation functions use the covariance matrix for calculations:
* Volatility is the square root of the diagonal
* Correlation is derived from the covariance matrix by dividing the cov(x,y) by the vol(x) * vol(y)

In [None]:
import matplotlib.ticker as mtick
import numpy as np

from gs_quant.markets.factor import Factor

momentum = Factor(model_id, 'Momentum')
growth = Factor(model_id, 'Growth')

vol = momentum.volatility(dt.date(2020, 1, 6),  dt.date(2021, 2, 26))
corr = momentum.correlation(growth, dt.date(2020, 1, 6),  dt.date(2021, 2, 26))

#plot
fig, ax1 = plt.subplots()
ax2 = ax1.twinx()
ax1.plot(corr*100, 'g-', label='Momentum vs Growth Correlation (LHS)')
ax1.yaxis.set_major_formatter(mtick.PercentFormatter())
ax2.yaxis.set_major_formatter(mtick.PercentFormatter())
ax2.plot(vol*1e4, 'b-', label='Momentum Volatility (RHS)')
pos = np.arange(len(vol.index))
plt.xticks(pos[::63])
fig.legend(loc="lower right", bbox_to_anchor=(.75, -0.10))
plt.title('Momentum vs Growth Historical Factor Analysis')
plt.show()

### Query Asset Data

The factor model outputs regression results for a universe of assets each day. The residual or factor risk represents the beta coefficient that can be attributed to the model and specific risk refers to the error term that is not explained by the model.

The asset factor loadings are used to derive the following measures:


| Measure         | Definition    |
|-----------------|---------------|
| `Specific Risk`      | Annualized idiosyncratic risk or error term which is not attributable to factors in percent units |
| `Total Risk`        | Annualized risk which is the sum of specific and factor risk in percent units |
| `Historical Beta`          | The covariance of the residual returns relative to the model's estimation universe or benchmark (i.e results of a one factor model)  |
| `Residual Variance`    | Daily error variance that is not explained by the model which is equal to $$\frac{\sqrt{\frac{\text{Specific Risk}}{100}}}{252}$$ |
| `Universe Factor Exposure` | Z-score for each factor relative to the model's estimation universe |

We can retrieve an asset universe on a given date by passing in an empty list and an `UniverseIdentifier`. For each model, check the `universe_identifier` to see which are supported which can vary by vendor. For Barra models specifically, raw data can be queried using sedol.

##### Get Risk Model Universe Coverage

In [None]:
from gs_quant.models.risk_model import UniverseIdentifier, DataAssetsRequest

data_asset_universe_request = DataAssetsRequest(UniverseIdentifier.gsid, [])
universe_on_date = factor_model.get_asset_universe(dt.date(2021, 1, 4), assets=data_asset_universe_request)
universe_on_date

##### Query Aggregated Risk

For asset data, we can query for a specific measure or pull data for a list of measures over a range of dates.

In [None]:
from gs_quant.markets.securities import SecurityMaster, AssetIdentifier

#convert asset from bbid to gsid
asset_bbid = 'AAPL UW'
asset_gsid = SecurityMaster.get_asset(asset_bbid, AssetIdentifier.BLOOMBERG_ID).get_identifier(AssetIdentifier.GSID)

#get risk
universe_for_request = DataAssetsRequest(UniverseIdentifier.gsid, [asset_gsid])
specific_risk = factor_model.get_specific_risk(dt.date(2020, 1, 4), dt.date(2021, 2, 24), universe_for_request)
total_risk = factor_model.get_total_risk(dt.date(2020, 1, 4), dt.date(2021, 2, 24), universe_for_request)
factor_risk = total_risk - specific_risk

pos = np.arange(len(total_risk.index))
plt.stackplot(specific_risk.index, specific_risk[asset_gsid], factor_risk[asset_gsid], labels=['Specific Risk','Factor Risk'])
plt.title(f'{asset_bbid} Risk')
plt.xticks(pos[::63])
plt.legend(loc='upper right')
plt.show()

##### Get Factor Loadings

When pulling the asset factor exposures, set the `limit_factor` to True to receive only non zero exposures.

In [None]:
import seaborn as sns

factor_exposures = factor_model.get_universe_factor_exposure(dt.date(2020, 1,4), dt.date(2021, 2, 24), universe_for_request)
factor_exposures.columns = [available_factors.loc[x]['name'] for x in factor_exposures.columns]

sns.boxplot(data=factor_exposures[['Beta', 'Momentum', 'Growth', 'Profitability']])
plt.title(f'Distribution of {asset_bbid} Factor Exposures since 1/4/20')
plt.show()

##### Query Multiple Asset Measures

In [None]:
from gs_quant.models.risk_model import Measure

data_measures = [ Measure.Universe_Factor_Exposure, Measure.Asset_Universe, Measure.Historical_Beta, Measure.Specific_Risk]
asset_risk_data = factor_model.get_data(data_measures, dt.date(2021, 1, 4), dt.date(2021, 2, 24), universe_for_request,limit_factors=True)