In [1]:
import numpy as np
import pandas as pd

# to access public datasets from the knoema.com
import knoema

import bamboolib

# 0. Theoretical background

$$r_t^{eqy} = E[r_t^{eqy}] + \beta_1^{eqy} INFL_t+\beta_2^{eqy} GR_t+ \beta_3^{eqy} FS_t + \alpha_t^{eqy}$$
$$r_t^{ust} = E[r_t^{ust}] + \beta_1^{ust} INFL_t+\beta_2^{ust} GR_t+ \beta_3^{ust} FS_t + \alpha_t^{ust}$$
$$\vdots$$
$$r_t^{fxcs} = E[r_t^{fxcs}] + \beta_1^{fxcs} INFL_t+\beta_2^{fxcs} GR_t+ \beta_3^{fxcs} FS_t + \alpha_t^{fxcs}$$

, where
- $INFL_t$ stands for an *inflation* macro-factor return at time t. Likewise, $GR$ for *growth* and $FS$ for *finantial stress* factors.
- $r_t^{eqy}$ is an *excess* return of global equity markets at time t as one of the base assets. T-Bill 1M is used for the excess return calculation. A risk free return such as $r_f$ is omiited for simplicity.
  - $ust$ stands for U.S 10Yr Treasury, and the rest of notation should be the same as that of $eqy$. The same naming rule applies for the rest of base assets in denoting $r_t^{asset}$.
  - A full list of base assets used in this model : Equities($eqy$), Treasuries($ust$), Credit($cre$), Inflation-Linked Bonds($ilb$), Gold(gold), Industrial Metals($inm$), Energy commodity($eng$), U.S. Dollar($dxy$), Commodity vs safe haven currencies ($fxcs$). Abbreviations in ().
- $E[\cdot]$ is an expected excess return.
- $\beta_{\#}^{instrument}$ is a factor beta, or factor loading, for that $instrument$. This value is the **same across all periods** of time being modeled. Therefore, there is no subscript $t$. Instead, we have a digit subscript 1 for $INFL$ beta, 2 for $GR$ beta and 3 for $FS$ beta.
  - e.g. $\beta_1^{eqy}$ is a sensitivy of Equities to $INFL$ factor.
- $E[\alpha^{asset}] = 0$ for all periods of time being modeled.

# 1. Load datasets

#### Set data frequency

In [2]:
_freq = 'M'

#### From Bloomberg
- For data of more recent years.

In [3]:
bbg_filename = '../../data/raw/base_assets_price_bbg_M.log'

df_bbg = pd.read_csv(bbg_filename, header=2, parse_dates=['date'])
df_bbg = df_bbg.set_index(['date'], drop=True)

In [4]:
df_bbg.tail()

Unnamed: 0_level_0,DMEQ_idx,GLT_idx,CRE_Baa_idx,CRE_Aaa_idx,TIPS_idx,GOLD_pr,INM_idx,ENGY_idx,DXY_idx,USDCAD_rate,USDNOK_rate,AUDUSD_rate,USDJPY_rate,USDCHF_rate,TBILL3M_idx,EMEQ_idx,ACEQ_idx
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2019-11-29,2292.26,2383.16,4206.804,2246.016,316.2039,1463.98,314.6,190.08,98.273,1.3282,9.2248,0.6763,109.49,1.0002,1.5672,1040.05,546.7
2019-12-31,2358.47,2369.78,4233.494,2244.171,317.4027,1517.27,324.21,206.6327,96.389,1.299,8.7855,0.7021,108.61,0.9666,1.5436,1114.66,565.24
2020-01-31,2342.41,2427.69,4332.237,2281.815,324.0671,1589.16,301.55,174.8693,97.39,1.3237,9.2011,0.6692,108.35,0.9634,1.5382,1062.34,558.62
2020-02-28,2141.12,2492.04,4372.826,2320.946,328.5524,1585.69,298.11,154.712,98.132,1.3407,9.3939,0.6515,107.89,0.9649,1.267,1005.52,512.76
2020-03-31,1852.73,2564.12,3920.704,2333.13,322.7761,1577.18,267.86,82.0629,99.048,1.4062,10.4028,0.6131,107.54,0.9611,0.061,848.58,442.35


#### From Global Financial Data (GFD)
- For data of older years; since 1850.

In [5]:
gfd_filename = '../../data/raw/base_assets_price_gfd_M.log'

df_gfd = pd.read_csv(gfd_filename, header=0, parse_dates=['Date'])
df_gfd = df_gfd.rename(columns={'CHFUSD': 'CHFUSD_rate', 'GBPUSD':'GBPUSD_rate'})

#### We pivot this table.

In [6]:
df_gfd = pd.pivot_table(df_gfd, index=['Date'], columns=['Ticker'])

# Drop one of the top multi-index column, namely, "Close"
df_gfd.columns = df_gfd.columns.droplevel(0)

#### Add a prefix `_rate` to an FX rate column name.

In [7]:
df_gfd.columns = [col + "_rate" if col.find("USD") >= 0 else col for col in df_gfd.columns]

# 2. Preprocessing

## Merge two datasets
- Changing `date` formats to properly join

In [8]:
df_gfd.index

DatetimeIndex(['1850-01-31', '1850-02-28', '1850-03-31', '1850-04-30',
               '1850-05-31', '1850-06-30', '1850-07-31', '1850-08-31',
               '1850-09-30', '1850-10-31',
               ...
               '2019-07-31', '2019-08-31', '2019-09-30', '2019-10-31',
               '2019-11-30', '2019-12-31', '2020-01-31', '2020-02-29',
               '2020-03-31', '2020-04-30'],
              dtype='datetime64[ns]', name='Date', length=2044, freq=None)

In [9]:
df_bbg.index

DatetimeIndex(['1959-12-31', '1960-01-29', '1960-02-29', '1960-03-31',
               '1960-04-29', '1960-05-31', '1960-06-30', '1960-07-29',
               '1960-08-31', '1960-09-30',
               ...
               '2019-06-28', '2019-07-31', '2019-08-30', '2019-09-30',
               '2019-10-31', '2019-11-29', '2019-12-31', '2020-01-31',
               '2020-02-28', '2020-03-31'],
              dtype='datetime64[ns]', name='date', length=724, freq=None)

#### Change each index type to `PeriodIndex` with **M** frequency

In [10]:
gfd_index = pd.to_datetime(df_gfd.index).to_period(_freq)
bbg_index = pd.to_datetime(df_bbg.index).to_period(_freq)

In [11]:
df_gfd = df_gfd.set_index(gfd_index)
df_bbg = df_bbg.set_index(bbg_index)

#### Joining `df_gfd` and `df_bbg` to create `df`

In [12]:
df = pd.merge(df_gfd, df_bbg, how='left', left_index=True, right_index=True, suffixes=('_gfd', '_bbg'))

Data range is:

In [13]:
df.index.min(), df.index.max()

(Period('1850-01', 'M'), Period('2020-04', 'M'))

Index name change:

In [14]:
df = df.rename(columns={'Date': 'date'})

#### Drop rows where their keys are this month if `_freq` == 'M'.
- They are not really the end-of-month data, but within-month data.

In [15]:
if _freq == 'M':
    this_month = pd.Timestamp.today().to_period(_freq)
    df = df.loc[~(df.index==this_month), :]

In [16]:
df_gfd.columns

Index(['CMWXMMM', 'CPUSAM', 'GBPUSD_rate', 'IDUSAD', 'ITUSA3CMD', 'NG_P_WTD',
       'TRUSACOM', 'TRUSG10M', 'USDBEF_rate', 'USDCAD_rate', 'USDCHF_rate',
       'USDDEM_rate', 'USDFRF_rate', 'USDITL_rate', 'USDJPY_rate',
       'USDNLG_rate', 'USDNOK_rate', 'USDSEK_rate', '_DJCBPD', '_SPXD',
       '__WTC_D', '__XAU_D'],
      dtype='object')

In [17]:
df_bbg.columns

Index(['DMEQ_idx', 'GLT_idx', 'CRE_Baa_idx', 'CRE_Aaa_idx', 'TIPS_idx',
       'GOLD_pr', 'INM_idx', 'ENGY_idx', 'DXY_idx', 'USDCAD_rate',
       'USDNOK_rate', 'AUDUSD_rate', 'USDJPY_rate', 'USDCHF_rate',
       'TBILL3M_idx', 'EMEQ_idx', 'ACEQ_idx'],
      dtype='object')

# 3. Generate base asset indices

#### `bf` is a DataFrame of base asset indices where its columns are baset asset indices.

In [19]:
bf = pd.DataFrame(index = df.index)

#### Index names in `gray boxes`

### 1) Equities

1. `DMEQ` (Equities): Log returns of an equity index.
  - Since January 1970, **DMEQ_idx**: MSCI World in USD.
  - Before that date, **_SPXD**: S&P (500)

This is less preferred
```python
np.log(df['close']).diff()```
because this breaks when df['close'] is negative.

#### Take log returns

In [21]:
df['DMEQ_recent'] = np.log(df.DMEQ_idx/df.DMEQ_idx.shift(1))
df['DMEQ_older'] = np.log(df._SPXD/df._SPXD.shift(1))

#### Merge two columns to create `DMEQ`

In [24]:
bf['DMEQ'] = df.DMEQ_recent.fillna(df.DMEQ_older)

### 2) Treasuries

2. `GLT` (Treasuries): Log returns of a long-term government bond index.
  - Since February 1973, **GLT_idx**: Bloomberg Barclays U.S. Treasury Total Return Unhedged USD
  - Before that date, **TRUSG10M**: GFD Indices USA 10-year Government Bond Total Return Index.

In [30]:
df['GLT_recent'] = np.log(df.GLT_idx/df.GLT_idx.shift(1))
df['GLT_older'] = np.log(df.TRUSG10M/df.TRUSG10M.shift(1))
bf['GLT'] = df.GLT_recent.fillna(df.GLT_older)

### 3) Credit

3. `CRE` (Credit): Log returns of Baa-rating index - log returns of Aaa-rating index.
  - **CRE_Baa_idx**: Bloomberg Barclays U.S. Credit Baa index vs. **CRE_Aaa_idx**: Bloomberg Barclays U.S. Credit Aaa index since February 1973.
  - **_DJCBPD**: Dow Jones Corporate Bond Price Index (new) vs. **TRUSACOM** GFD Indices USA Total Return AAA Corporate Bond Index before that date.

In [39]:
df['CRE_recent'] = np.log(df.CRE_Baa_idx/df.CRE_Baa_idx.shift(1)) - np.log(df.CRE_Aaa_idx/df.CRE_Aaa_idx.shift(1))
df['CRE_older'] = np.log(df._DJCBPD/df._DJCBPD.shift(1)) - np.log(df.TRUSACOM/df.TRUSACOM.shift(1))
bf['CRE'] = df.CRE_recent.fillna(df.CRE_older)

### 4) Inlfation-Linked Bond

4. `ILB` (Inflation-Linked Bonds): Conceptually it's, *-(break-even inflation rates)*. It's defined as log returns of inflation-linked index ("*real return*") - log returns of government bond index ("*nominal return*")
  - A break-even inflation rate (**BEI**) = rate of returns of nominal bonds - rate of returns of TIPS
  - **BEI** is believed to be a leading indicator of CPI. Since we will get an `ILB` exposure through TIPS, we are assuming: $$\textrm{ILB} \propto -\textrm{BEI}.$$
  - Since April 1997, **TIPS_idx**: Log returns of Bloomberg Barclays U.S. TIPS ("*real return*") vs. **GLT_idx** Bloomberg Barclays U.S. Treasury Total Return Unhedged USD ("*nominal return*")
  - From 3Q 1981  to March 1997, -1 times average of U.S. Survey of Professional Forecasters' 1-year forecasted inflation and 10-year forecasted inflation, that is, ("*-1 times break-even return*").
    - 1-year forecated inflation is defined as an average inflation over the four quarters following the quarter when we conducted the survey.
    - As the 10-year forcasted inflation is available since 1991 on an annual basis, an alternative source called *Livingston Survey* is used for a period of 4Q79 to 4Q90.
    - U.S. Survey of Professional Forecasters: https://www.philadelphiafed.org/-/media/research-and-data/real-time-center/survey-of-professional-forecasters/historical-data/inflation.xlsx?la=en
    - Livingston Survey: https://www.philadelphiafed.org/-/media/research-and-data/real-time-center/survey-of-professional-forecasters/historical-data/additional-cpie10.xlsx?la=en
    - See: page #37 at https://www.philadelphiafed.org/-/media/research-and-data/real-time-center/survey-of-professional-forecasters/spf-documentation.pdf?la=en
  - Before that date, -1 times (1/3CPI$_{t-1}$ + 1/3CPI$_t$ + 1/3CPI$_{t+1})$ as a proxy of break-even inflation, where CPI is **CPUSAM**: United States BLS Consumer Price Index Inflation Rate NSA.

  
  
5. `GOLD` (Gold): Log returns of gold prices.
  - **GOLD** : Gold (USD/ounce)
  
  
6. `INM` (Industrial Metals): Log returns of industrial metals index.
  - Since February 1977, **INM_idx**: S&P GSCI Industrial Metals.
  - Before that date, **CMWXMMM**: World Bank Metals and Minerals Commodity Price Index.
  
  
7. `ENGR` (Energy commodity)
  - Since February 1983, **ENGY_idx**: S&P GSCI Energy.
  - Before that date, an equally-weighted basket of **__WTC_D**: Crude Oil and **NG_P_WTD** Natural Gas.
  
  
8. `DXY` (U.S. Dollar)
  - Since February 1967, **DXY_idx**: U.S. Dollar Index
  - Before that date, an equally-weighted basket of G-10 currencies.
  
  
9. `FXCS` (Commodity vs. safe haven currencies)
  - Since January 1987, an equally-weighted basket of USDCAD, USDNOK and AUDUSD vs USDCHF and USDJPY.
  - Since January 1980, USDJPY is excluded from what's above.
  - Before that date, USDNOK is further excluded from what's above.

In [22]:
df_bbg.TIPS_idx - df_bbg.GLT_idx

date
1959-12          NaN
1960-01          NaN
1960-02          NaN
1960-03          NaN
1960-04          NaN
             ...    
2019-11   -2066.9561
2019-12   -2052.3773
2020-01   -2103.6229
2020-02   -2163.4876
2020-03   -2241.3439
Freq: M, Length: 724, dtype: float64

In [15]:
df
# bamboolib live code export
df = df.reset_index()
df

Unnamed: 0,Date,CHFUSD,CMWXMMM,CPUSAM,GBPUSD,IDUSAD,ITUSA3CMD,NG_P_WTD,TRUSACOM,TRUSG10M,...,ENGY_idx,DXY_idx,USDCAD_rate,USDNOK_rate,AUDUSD_rate,USDJPY_rate,USDCHF_rate,TBILL3M_idx,EMEQ_idx,ACEQ_idx
0,1850-01,0.053528,,,23.047528,,,,1.051075,3.306525,...,,,,,,,,,,
1,1850-02,0.053811,,,23.428364,,,,1.055901,3.315689,...,,,,,,,,,,
2,1850-03,0.053698,,,23.360561,,,,1.052282,3.273533,...,,,,,,,,,,
3,1850-04,0.053186,,,23.091783,,,,1.057999,3.475150,...,,,,,,,,,,
4,1850-05,0.039322,,,23.911980,,,,1.081762,3.482482,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2038,2019-11,0.999800,76.215305,257.208,1.672875,,1.59,,24038.670000,10953.936305,...,190.0800,98.273,1.3282,9.2248,0.6763,109.49,1.0002,1.566108,1040.05,546.70
2039,2019-12,1.067163,77.469997,256.974,1.759655,,1.55,,23920.300000,10836.125049,...,206.6327,96.389,1.2990,8.7855,0.7021,108.61,0.9666,1.525863,1114.66,565.24
2040,2020-01,1.077882,77.702232,257.971,1.742672,,1.55,,24418.540000,11252.031439,...,174.8693,97.390,1.3237,9.2011,0.6692,108.35,0.9634,1.535940,1062.34,558.62
2041,2020-02,1.072827,73.015869,258.678,1.643800,,1.27,,24727.670000,11666.405078,...,154.7120,98.132,1.3407,9.3939,0.6515,107.89,0.9649,1.510747,1005.52,512.76
