In [1]:
import numpy as np
import pandas as pd

# to access public datasets from the knoema.com
import knoema

import bamboolib

# 1. Load datasets

#### From Bloomberg

In [2]:
bbg_filename = '../../data/raw/base_assets_price_bbg.log'

# Commodity prices from World Bank because really old data are not available in Bloomberg.
df_wb = knoema.get('WBCPD2015Oct',
                   frequency='M',
                   Commodity='ALUMINUM;COPPER;LEAD;Zinc;CRUDE_DUBAI;NGAS_US',
                   Measure='KN.NUD')

# Other data including Bloomberg Barclays indices from Bloomberg.
df_bbg = pd.read_csv(bbg_filename, header=2, parse_dates=['date'])
df_bbg = df_bbg.set_index(['date'], drop=True)

#### From Global Financial Data (GFD)

In [None]:
gfd_filename = '../../data/raw/base_assets'

#### Name the column of `df_wb`

In [3]:
df_wb.columns = df_wb.columns.droplevel(level=[1,2])
df_wb.columns = ['aluminum', 'copper', 'lead', 'zinc', 'crude', 'natural_gas']

#### Set the index name

In [4]:
df_wb.index.set_names('date', inplace = True)

# 2. Preprocessing

### Changing `date` formats to properly join

#### A different format is being used in each index
- YYYY-MM-**01** vs YYYY-MM-**TheEndOfMonth**

In [5]:
df_wb.index

DatetimeIndex(['1960-01-01', '1960-02-01', '1960-03-01', '1960-04-01',
               '1960-05-01', '1960-06-01', '1960-07-01', '1960-08-01',
               '1960-09-01', '1960-10-01',
               ...
               '2019-06-01', '2019-07-01', '2019-08-01', '2019-09-01',
               '2019-10-01', '2019-11-01', '2019-12-01', '2020-01-01',
               '2020-02-01', '2020-03-01'],
              dtype='datetime64[ns]', name='date', length=723, freq=None)

In [6]:
df_bbg.index

DatetimeIndex(['1959-12-31', '1960-01-29', '1960-02-29', '1960-03-31',
               '1960-04-29', '1960-05-31', '1960-06-30', '1960-07-29',
               '1960-08-31', '1960-09-30',
               ...
               '2019-06-28', '2019-07-31', '2019-08-30', '2019-09-30',
               '2019-10-31', '2019-11-29', '2019-12-31', '2020-01-31',
               '2020-02-28', '2020-03-31'],
              dtype='datetime64[ns]', name='date', length=724, freq=None)

#### Change each to `PeriodIndex` with **M** frequency

In [7]:
wb_index = pd.to_datetime(df_wb.index).to_period('M')
bbg_index = pd.to_datetime(df_bbg.index).to_period('M')

In [8]:
df_wb = df_wb.set_index(wb_index)
df_bbg = df_bbg.set_index(bbg_index)

In [9]:
df_wb

Unnamed: 0_level_0,aluminum,copper,lead,zinc,crude,natural_gas
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1960-01,511.471832,715.40,206.10,260.80,1.63,0.1400
1960-02,511.471832,728.19,203.70,244.90,1.63,0.1400
1960-03,511.471832,684.94,210.30,248.70,1.63,0.1400
1960-04,511.471832,723.11,213.60,254.60,1.63,0.1400
1960-05,511.471832,684.75,213.40,253.80,1.63,0.1400
...,...,...,...,...,...,...
2019-11,1774.790000,5859.95,2021.15,2425.48,61.41,2.6505
2019-12,1771.380000,6077.06,1900.54,2272.54,64.41,2.2425
2020-01,1773.090000,6031.21,1923.93,2354.31,63.76,2.0305
2020-02,1688.100000,5687.75,1872.54,2113.24,54.51,1.9158


In [10]:
df_bbg

Unnamed: 0_level_0,WEQ,GLT,CRE_Baa,CRE_Aaa,ILB,GOLD,INM,ENGY,DXY,CADUSD,NOKUSD,AUDUSD,JPYUSD,CHFUSD,LIBOR1M,FED_RATE,FED_EFFRATE
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
1959-12,,,,,,35.10,,,,,,,,,,,3.99
1960-01,,,,,,35.27,,,,,,,,,,,3.99
1960-02,,,,,,35.27,,,,,,,,,,,3.97
1960-03,,,,,,35.27,,,,,,,,,,,3.84
1960-04,,,,,,35.27,,,,,,,,,,,3.92
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2019-11,2292.26,2383.16,4206.804,2246.016,316.2039,1463.98,314.60,190.0800,98.273,0.7530,0.1084,0.6763,0.009133,0.9999,1.69713,1.75,1.55
2019-12,2358.47,2369.78,4233.494,2244.171,317.4027,1517.27,324.21,206.6327,96.389,0.7698,0.1139,0.7021,0.009205,1.0333,1.76250,1.75,1.55
2020-01,2342.41,2427.69,4332.237,2281.815,324.0671,1589.16,301.55,174.8693,97.390,0.7554,0.1087,0.6692,0.009228,1.0380,1.66188,1.75,1.55
2020-02,2141.12,2492.04,4372.826,2320.946,328.5524,1585.69,298.11,154.7120,98.132,0.7467,0.1064,0.6515,0.009250,1.0359,1.51525,1.75,1.58


#### Joining `df_wb` and `df_bbg` to create `df`

In [11]:
df = pd.merge(df_bbg, df_wb, how='inner', on='date')

Data range is:

In [12]:
df.index.min(), df.index.max()

(Period('1960-01', 'M'), Period('2020-03', 'M'))

# 3. Generate indices
- `CRE`: U.S Credit Baa index vs Aaa index
- `ILB`: A proxy of TIPS for old days from January 1997 backwords. Following Swinkels (2018), it's spread return based on estimated real yields changes.

In [15]:
df
# bamboolib live code export
df = df.reset_index()
df

Unnamed: 0_level_0,WEQ,GLT,CRE_Baa,CRE_Aaa,ILB,GOLD,INM,ENGY,DXY,CADUSD,...,CHFUSD,LIBOR1M,FED_RATE,FED_EFFRATE,aluminum,copper,lead,zinc,crude,natural_gas
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1960-01,,,,,,35.27,,,,,...,,,,3.99,511.471832,715.40,206.10,260.80,1.63,0.1400
1960-02,,,,,,35.27,,,,,...,,,,3.97,511.471832,728.19,203.70,244.90,1.63,0.1400
1960-03,,,,,,35.27,,,,,...,,,,3.84,511.471832,684.94,210.30,248.70,1.63,0.1400
1960-04,,,,,,35.27,,,,,...,,,,3.92,511.471832,723.11,213.60,254.60,1.63,0.1400
1960-05,,,,,,35.27,,,,,...,,,,3.85,511.471832,684.75,213.40,253.80,1.63,0.1400
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2019-11,2292.26,2383.16,4206.804,2246.016,316.2039,1463.98,314.60,190.0800,98.273,0.7530,...,0.9999,1.69713,1.75,1.55,1774.790000,5859.95,2021.15,2425.48,61.41,2.6505
2019-12,2358.47,2369.78,4233.494,2244.171,317.4027,1517.27,324.21,206.6327,96.389,0.7698,...,1.0333,1.76250,1.75,1.55,1771.380000,6077.06,1900.54,2272.54,64.41,2.2425
2020-01,2342.41,2427.69,4332.237,2281.815,324.0671,1589.16,301.55,174.8693,97.390,0.7554,...,1.0380,1.66188,1.75,1.55,1773.090000,6031.21,1923.93,2354.31,63.76,2.0305
2020-02,2141.12,2492.04,4372.826,2320.946,328.5524,1585.69,298.11,154.7120,98.132,0.7467,...,1.0359,1.51525,1.75,1.58,1688.100000,5687.75,1872.54,2113.24,54.51,1.9158
