# Parse Input Data

This notebook implements data parsing and data normalisation.

In [None]:
import pandas as pd
import QuantLib as ql
import numpy as np

import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go


## ECB Data

ECB data are EUR yield curves and EUR-denominated FX rates.

In [None]:
path = "../data/input/ecb/"

### FX Rates

In [None]:
file_name = "ECB Data Portal_20240512160936_fx.csv"
data = pd.read_csv(path + file_name)
data = data.drop(["TIME PERIOD"], axis=1)
data = data.rename(
    {
        "Swiss franc/Euro (EXR.D.CHF.EUR.SP00.A)" : "CHF-EUR",
        "Chinese yuan renminbi/Euro (EXR.D.CNY.EUR.SP00.A)" : "CNY-EUR",
        "UK pound sterling/Euro (EXR.D.GBP.EUR.SP00.A)" : "GBP-EUR",
        "Japanese yen/Euro (EXR.D.JPY.EUR.SP00.A)" : "JPY-EUR",
        "US dollar/Euro (EXR.D.USD.EUR.SP00.A)" : "USD-EUR",
    },
    axis = 1,
)
data["DATE"] = pd.to_datetime(data["DATE"])
data

In [None]:
fig = make_subplots(rows = 5, cols = 1, subplot_titles=("USD-EUR", "GBP-EUR", "CHF-EUR", "JPY-EUR", "CNY-EUR"))
fig.add_trace(go.Scatter(x=data["DATE"], y=data["USD-EUR"]), row=1, col=1)
fig.add_trace(go.Scatter(x=data["DATE"], y=data["GBP-EUR"]), row=2, col=1)
fig.add_trace(go.Scatter(x=data["DATE"], y=data["CHF-EUR"]), row=3, col=1)
fig.add_trace(go.Scatter(x=data["DATE"], y=data["JPY-EUR"]), row=4, col=1)
fig.add_trace(go.Scatter(x=data["DATE"], y=data["CNY-EUR"]), row=5, col=1)
fig.update_layout(height=900, width=1600, showlegend=False)

fig.show()

### Interest Rates

In [None]:
file_name = "ECB Data Portal_20240512160755_yc.csv"
data = pd.read_csv(path + file_name)
data = data.drop(["TIME PERIOD"], axis=1)
terms = [ label.split(" ")[-1][1:-1].split("_")[-1] for label in data.columns[1:] ]
data = data.rename(dict( zip(data.columns[1:], terms)), axis = 1)
cols = [ "DATE",
    "3M", "6M", "9M", "1Y", "2Y", "3Y",
    "4Y", "5Y", "6Y", "7Y", "8Y", "9Y",
    "10Y", "12Y", "15Y", "20Y", "25Y", "30Y",
]
data = data[cols]
data["DATE"] = pd.to_datetime(data["DATE"])

In [None]:
data

In [None]:
px.line(data, x="DATE", y=data.columns[1:])

## UK Bank of England Data

In [None]:
path = "../data/input/boe/"

In [None]:
def boe_data(path, file_name, sheet_name):
    data = pd.read_excel(path + file_name, sheet_name=sheet_name, header=3)
    data = data.drop(0, axis=0)
    data = data.rename({ "years:" : "DATE" }, axis=1)
    data["DATE"] = pd.to_datetime(data["DATE"])
    terms = [ str(round(12*y)) + "M" for y in data.columns[1:]]
    data = data.rename(dict( zip(data.columns[1:], terms)), axis = 1)
    return data

### Period 2000 - 2004, Short End

In [None]:
file_name = "GLC Nominal daily data_2000 to 2004.xlsx"
sheet_name = "3. nominal spot, short end"
data = boe_data(path, file_name, sheet_name)
data

### Period 2000 - 2004, Long End

In [None]:
file_name = "GLC Nominal daily data_2000 to 2004.xlsx"
sheet_name = "4. nominal spot curve"
data = boe_data(path, file_name, sheet_name)
data

### Period 2005 - 2015, Short End

In [None]:
file_name = "GLC Nominal daily data_2005 to 2015.xlsx"
sheet_name = "3. spot, short end"
data = boe_data(path, file_name, sheet_name)
data

### Period 2005 - 2015, Long End

In [None]:
file_name = "GLC Nominal daily data_2005 to 2015.xlsx"
sheet_name = "4. spot curve"
data = boe_data(path, file_name, sheet_name)
data

### Period 2016 - Present, Short End

In [None]:
file_name = "GLC Nominal daily data_2016 to present.xlsx"
sheet_name = "3. spot, short end"
data = boe_data(path, file_name, sheet_name)
data

### Period 2016 - Present, Long End

In [None]:
file_name = "GLC Nominal daily data_2016 to present.xlsx"
sheet_name = "4. spot curve"
data = boe_data(path, file_name, sheet_name)
data

In [None]:
px.line(data, x="DATE", y=data.columns[1:])

## US Treasury Data

In [None]:
path = "../data/input/us_treasury/"

In [None]:
def us_data(path, file_name):
    data = pd.read_csv(path + file_name)
    data = data.rename({ "Date" : "DATE" }, axis=1)
    terms = [ label.replace(" ", "")[0:-1] for label in data.columns[1:] ]
    data = data.rename(dict( zip(data.columns[1:], terms)), axis = 1)
    try:
        data["DATE"] = pd.to_datetime(data["DATE"], format="%m/%d/%y")
    except ValueError:
        data["DATE"] = pd.to_datetime(data["DATE"], format="%m/%d/%Y")
    return data    

In [None]:
file_name = "yield-curve-rates-1990-2023.csv"
data = us_data(path, file_name)
data

In [None]:
file_name = "daily-treasury-rates.csv"
data = us_data(path, file_name)
data