In [1]:
import numpy as np
import pandas as pd
import pandas_datareader as pdr
pd.options.display.float_format = "{:.4f}".format
import matplotlib.pyplot as plt
from matplotlib import cm
from matplotlib.lines import Line2D
plt.rcParams["figure.figsize"] = (16,10)
plt.style.use("~/.dracula.mplstyle")
import quandl
import functools, re
import statsmodels.api as sm
from collections import Counter

# QUANDL Config
import os, sys
parent_path = os.path.abspath(os.path.join('..'))
if parent_path not in sys.path:
    sys.path.append(parent_path)
from common.config import QUANDL_API_KEY  # If you are not Philip, you don't have config
                                          # unless you make your own.

quandl.ApiConfig.api_key = QUANDL_API_KEY # Replace with your API key if running.

In [2]:
START_DATE    = "2009-01-01"
END_DATE      = "2021-12-31"

FX_DB     = "CUR/"
CURRENCIES = ["GBP", "ZAR", "THB", "PKR", "PHP"]
YC_DB = "YC/"
COUNTRIES = ["GBR_ISSC", "GBR", "ZAF", "THA", "PAK", "PHL"]

In [3]:
par_raw = quandl.get([YC_DB + c for c in COUNTRIES], start_date=START_DATE, end_date=END_DATE)
fx_raw = quandl.get([FX_DB + c for c in CURRENCIES], start_date=START_DATE, end_date=END_DATE)

## Clean Data

Map Yield Curve par data into a friendly MultiIndex for future slicing and usage.

In [4]:
def clean_columns(s: str, pre=len(YC_DB)):
    """Takes in Quandl formatted column names...
    returns a list to construct a pd.MultiIndex
    "YC/GBR_ISSC - 0.08Y" -> ['GBR', 0.08]
    """
    mapper = {'0.08Y': 0.08, '0.17Y': 0.17, '0.25Y': 0.25, '0.33Y': 0.33,
              '0.42Y': 0.42, '0.5Y': 0.5, '0.58Y': 0.58, '0.67Y': 0.67,
              '0.75Y': 0.75, '0.83Y': 0.83, '0.92Y':0.92, '1.0Y': 1, '1.08Y': 1.08, 
              '1.17Y': 1.17, '1.25Y': 1.25, '1.33Y': 1.33, '1.42Y': 1.42, 
              '1.5Y': 1.5, '1.58Y': 1.58, '1.67Y': 1.67, '1.75Y': 1.75, '1.83Y': 1.83, 
              '1.92Y': 1.92, '2.0Y': 2, '2.08Y': 2.08, '2.17Y': 2.17, '2.25Y': 2.25, 
              '2.33Y': 2.33, '2.42Y': 2.42, '2.5Y': 2.5, '2.58Y': 2.58, '2.67Y': 2.67,
              '2.75Y': 2.75, '2.83Y': 2.83, '2.92Y': 2.92, '3.0Y': 3, '3.08Y': 3.08, 
              '3.17Y': 3.17, '3.25Y': 3.25, '3.33Y': 3.33, '3.42Y': 3.42, '3.5Y': 3.5, 
              '3.58Y': 3.58, '3.67Y': 3.67, '3.75Y': 3.75, '3.83Y': 3.83, '3.92Y': 3.92, 
              '4.0Y': 4, '4.08Y': 4.08, '4.17Y': 4.17, '4.25Y': 4.25, '4.33Y': 4.33, 
              '4.42Y': 4.42, '4.5Y': 4.5, '4.58Y': 4.58, '4.67Y': 4.67, '4.75Y': 4.75, 
              '4.83Y': 4.83, '4.92Y': 4.92, '5.0Y': 5, '3-Month': 0.25, '6-Month': 0.5,
              '9-Month': 0.75, '12-Month': 1, '3to5-Year': 4, '5to10-Year': 7.5, 
              '10-Year': 10, '1-Month': 0.08, '1-Year': 1, '2-Year': 2, '3-Year': 3,
              '4-Year': 4, '5-Year': 5, '6-Year': 6, '7-Year': 7, '8-Year': 8, '9-Year': 9,
              '15-Year': 15, '20-Year': 20, '25-Year':25}
    
    ret = re.split(r"([A-Z]\d{4})|( - )", s[pre:])
    coun = ret[0].split('_')[0]
    ten = mapper[ret[-1]]
    return [coun, ten]

test_col = par_raw.columns[0]
print(f"IN:\n{test_col}\nOUT:\n{clean_columns(test_col)}")

par_raw = pd.concat([par_raw.iloc[:, 0], par_raw.iloc[:, 60:]], axis=1)

par_cols = pd.MultiIndex.from_arrays(
    [[clean_columns(c)[0] for c in par_raw.columns], [clean_columns(c)[1] for c in par_raw.columns]],
    names=("COUNTRY", "TENOR"))

par_raw.columns = par_cols
par_raw = par_raw / 100

fx_raw.columns = [c[4:].split(" - ")[0] for c in fx_raw.columns]

IN:
YC/GBR_ISSC - 0.08Y
OUT:
['GBR', 0.08]


## Turn `Spot Rates` to `Zero Rates`

To discount coupons from off-the-run bonds.

In [5]:
def compute_zcb_curve(spot_rates_curve):
    zcb_rates = spot_rates_curve.copy()
    for curve in spot_rates_curve.columns:
        spot = spot_rates_curve[curve].dropna()
        for tenor, spot_rate in spot.iteritems():
            if tenor>0.1:
                times = np.arange(tenor-0.5, 0, step=-0.5)[::-1]
                coupon_half_yr = 0.5*spot_rate
                z = np.interp(times, zcb_rates[curve].dropna().index.values, zcb_rates[curve].dropna().values) # Linear interpolation
                preceding_coupons_val = (coupon_half_yr*np.exp(-z*times)).sum()
                zcb_rates[curve][tenor] = -np.log((1-preceding_coupons_val)/(1+coupon_half_yr))/tenor
    return zcb_rates

zeros = []

for d, row in par_raw.iterrows():
    df = row.reset_index().pivot(index="TENOR", columns="COUNTRY").droplevel(None, axis=1)
    zero = compute_zcb_curve(df)
    zero = zero.stack().swaplevel(0,1,0)
    zeros.append(zero)

In [6]:
zeros = pd.concat(zeros, axis=1).T
zeros.index = par_raw.index
zeros

COUNTRY,GBR,GBR,GBR,GBR,PAK,PAK,PAK,PAK,PAK,PAK,...,THA,THA,THA,ZAF,ZAF,ZAF,ZAF,ZAF,ZAF,ZAF
TENOR,0.0800,5.0000,10.0000,20.0000,0.2500,0.5000,1.0000,3.0000,5.0000,10.0000,...,9.0000,10.0000,15.0000,0.2500,0.5000,0.7500,1.0000,4.0000,7.5000,10.0000
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2009-01-02,0.0128,0.0268,0.0343,0.0416,,,,,,,...,,,,,,,,,,
2009-01-05,0.0120,0.0277,0.0355,0.0424,,,,,,,...,,,,,,,,,,
2009-01-06,0.0117,0.0281,0.0366,0.0436,,,,,,,...,,,,,,,,,,
2009-01-07,0.0111,0.0282,0.0369,0.0437,,,,,,,...,,,,,,,,,,
2009-01-08,0.0109,0.0273,0.0362,0.0433,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-12-27,,,,,,,,,,,...,,0.0195,0.0241,,,,,,,
2021-12-28,,,,,,,,,,,...,,0.0193,0.0239,,,,,,,
2021-12-29,,0.0078,0.0102,0.0127,,,,,,,...,,,,,,,,,,
2021-12-30,,0.0075,0.0099,0.0123,,,,,,,...,,,,,,,,,,
