In [1]:
# ML_in_Finance-Autoencoders
# Author: Matthew Dixon
# Version: 1.0 (24.7.2019)
# License: MIT
# Email: matthew.dixon@iit.edu
# Notes: tested on Mac OS X with Python 3.6 and Tensorflow 1.3.0
# Citation: Please cite the following reference if this notebook is used for research purposes:
# Bilokon P., Dixon M.F. and I. Halperin, Machine Learning in Finance: From Theory to Practice, Springer Graduate textbook Series, 2020. 

# Introduction

The purpose of this notebook is to first review PCA for yield curve dimension reduction. Linear autoencoders are then compared with PCA. Finally we evaluate deep autoencoders for yield curve compression.

# PCA
Principal component analysis requires finding the weights which maximize the maximum weighted variance of the data $Y$: 
$$\max_{w:||w||=1}Var(w^TY) = \max_{w:||w||=1}w^TVar(Y)w$$

Setting $w=v$, with $v$ the eigenvector corresponding to the largest eigenvalue $\lambda$ of $C:=Var(Y)$ will yield an orthogonal projection which is an optimal solution. From the eigenvalue problem, we can obtain the eigenvalues of $V$:

$$C v=\lambda v$$
$$(C-\lambda I)v=0$$
$$ |C-\lambda I|=0$$

Plugging the eigenvalues into second equation above gives the eigenvectors. The result can be written as:

$$\Lambda=V^TCV,$$

where $\Lambda$ is the diagonal matrix of descending eigenvalues and $P$ is the corresponding orthornormal matrix of eigenvectors. Rearanging gives the spectral decomposition of the covariance matrix.

$$C=P\Lambda P^T$$.

The transformation of Y onto the orthonormal basis spanned by the columns of $P$ is:
$$X=P^TY$$

# Libs & Defs

In [None]:
# %matplotlib inline
from keras.layers import Input, Dense
from keras import regularizers, models, optimizers
import numpy as np
import pandas as pd
import math
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.decomposition import PCA

In [None]:
# Linear Autoencoder
def LinearAE(y, dimension, learning_rate = 1e-4, regularization = 5e-4, epochs=10):
    input = Input(shape=(y.shape[1],))
    encoded = Dense(dimension, activation='linear',
                    kernel_regularizer=regularizers.l2(regularization))(input)
    decoded = Dense(y.shape[1], activation='linear',
                    kernel_regularizer=regularizers.l2(regularization))(encoded)
    autoencoder = models.Model(input, decoded)
    autoencoder.compile(optimizer=optimizers.adam(lr=learning_rate), loss='mean_squared_error')
    autoencoder.fit(y, y, epochs=epochs, batch_size=4, shuffle=True)
    
    return autoencoder

# Load Dataset

In [None]:
df = pd.read_csv('yield_curve.csv', sep=',')

df['Date'] = pd.to_datetime(df['Date'],infer_datetime_format=True)

df.set_index('Date', drop=True, inplace=True)

df.index.names = [None]

df.drop('Index', axis=1, inplace=True)

dt = df.transpose()

##### Uncomment to perform PCA on the daily yield changes $\Delta Y_0$.
#delta=df.diff(1)[1:]

# Visualizing the Dataset

In [None]:
plt.figure(figsize=(20,15))

plt.plot(df.index,df)
plt.xlim(df.index.min(), df.index.max())
# plt.ylim(0, 0.1)
plt.axhline(y=0,c="grey",linewidth=0.5,zorder=0)
for i in range(df.index.min().year, df.index.max().year+1):
    plt.axvline(x=df.index[df.index.searchsorted(pd.datetime(i,1,1))-1],
                c="grey", linewidth=0.5, zorder=0)

In [None]:
cols = 6
rows = 3
num_years = df.index.max().year-df.index.min().year
rows = math.ceil(num_years/cols)

plt.figure(figsize=(24,(24/cols)*rows))

plt.subplot2grid((rows,cols), (0,0), colspan=cols, rowspan=rows)


colnum = 0
rownum = 0
for year in range(df.index.min().year,df.index.max().year+1):
    year_start = df.index[df.index.searchsorted(pd.datetime(year,1,1))]
    year_end = df.index[df.index.searchsorted(pd.datetime(year,12,31))]
    
    plt.subplot2grid((rows,cols), (rownum,colnum), colspan=1, rowspan=1)
    plt.title('{0}'.format(year))
    plt.xlim(0, len(dt.index)-1)
    plt.ylim(np.min(dt.values), np.max(dt.values))
    plt.xticks(range(len(dt.index)), dt.index, size='small')
    
    plt.plot(dt.ix[:,year_start:year_end].values)
    
    if colnum != cols-1:
        colnum += 1
    else:
        colnum = 0
        rownum += 1

None

# Projection onto Principal Components

In [None]:
# calculate the PCA (Eigenvectors & Eigenvalues of the covariance matrix)
pcaA = PCA(n_components=3, copy=True, whiten=False)

# transform the dataset onto the first two eigenvectors
pcaA.fit(df)
dpca = pd.DataFrame(pcaA.transform(df))
dpca.index = df.index

## Evaluate and plot the reconstruction error

In [None]:
x=np.matmul(df-np.mean(df, axis=0),pcaA.components_.T)
Y_recon_pca=np.matmul(x, pcaA.components_) + np.array(np.mean(df, axis=0)).reshape(1,14)

In [None]:
plt.figure(figsize=(20,15))

plt.plot(df.index, pd.DataFrame(Y_recon_pca-df))
plt.xlim(df.index.min(), df.index.max())
# plt.ylim(0, 0.1)
plt.axhline(y=0,c="grey",linewidth=0.5,zorder=0)
for i in range(df.index.min().year, df.index.max().year+1):
    plt.axvline(x=df.index[df.index.searchsorted(pd.datetime(i,1,1))-1],
                c="grey", linewidth=0.5, zorder=0)


### Evaluate the amount of variance explained by each component

In [None]:
for i,pc in enumerate(pcaA.explained_variance_ratio_):
    print('{0}.\t{1:2.2f}%'.format(i+1,pc*100.0))

### Plot the loading vectors

In [None]:
fig = plt.figure(figsize=(16,10))
plt.title('First {0} PCA components'.format(np.shape(np.transpose(pcaA.components_))[-1]))

plt.plot(np.transpose(pcaA.components_[0]), label='1. PC')
plt.xticks(range(len(dt.index)), dt.index, size='small')
plt.plot(np.transpose(pcaA.components_[1]), label='2. PC')
plt.plot(np.transpose(pcaA.components_[2]), label='3. PC')

plt.legend() #'upper right')
None

# Linear Autoencoders

### Fit the linear auto-encoder

In [None]:
ae = LinearAE(df, 3, regularization=0, epochs=300)
(w1,b1,w2,b2)=ae.get_weights()

### Extract the first singular vectors of the decoder weight matrix
The left singular vectors approximate the PCA loading vectors (up to a sign)

In [None]:
(p_linear_ae, _, _) = np.linalg.svd(w2.T, full_matrices=False)# PCA by applying SVD to linear autoencoder weights

In [None]:
mu = np.mean(df, axis=0)
x_hat=np.matmul(df-mu,p_linear_ae)

In [None]:
dae = pd.DataFrame(x_hat)
dae.index = df.index

Diagonalize the sample covariance matrix with the m-loading vectors:
$$\text{P}_m^T \text{Y}_0 \text{Y}_0^T \text{P}_m$$

The sample covariance matrix is given by C.

In [None]:
C=np.matmul((df-mu).T,df-mu)

### Compare the transformed sample covariances
Begin with PCA

In [None]:
Lambda=np.matmul(pcaA.components_,np.matmul(C, pcaA.components_.T))

In [None]:
ax = sns.heatmap(Lambda, annot=True)
plt.show()

Show the explained variances by each component (check with above)

In [None]:
100*np.diag(Lambda)/np.sum(np.diag(Lambda))

Now perform the transformation of the covariance matrix using the decoder weights. Note that the matrix $\Lambda$ is no longer diagonal.

In [None]:
Lambda=np.matmul(w2,np.matmul(C, w2.T))

In [None]:
ax = sns.heatmap(Lambda, annot=True)
plt.show()

Finally, perform the transformation of the covariance matrix using the left singular vectors of the decoder weights. Note that the matrix $\Lambda$ is diagonal.

In [None]:
Lambda=np.matmul(p_linear_ae.T,np.matmul(C, p_linear_ae))

In [None]:
ax = sns.heatmap(Lambda, annot=True)
plt.show()

Show the explained variances by each diagonal component and compare with PCA.

In [None]:
100*np.diag(Lambda)/np.sum(np.diag(Lambda))

## Visualize the data projected onto the principal components
First show the data on the principal components obtained by PCA. Then show the data using the left singular vectors from the decoder weight matrix.

In [None]:
# plot the result
merged_years = 1
pc1 = 0
pc2 = 1
fig = plt.figure(figsize=(16,12))
plt.title('Projection on {0}. and {1}. PC'.format(pc1+1,pc2+1))
plt.axhline(y=0,c="grey",linewidth=1.0,zorder=0)
plt.axvline(x=0,c="grey",linewidth=1.0,zorder=0)
    
sc = plt.scatter(dpca.loc[:,pc1],dpca.loc[:,pc2], c=[d.year for d in dpca.index], cmap='rainbow')
cb = plt.colorbar(sc)
cb.set_ticks(ticks=np.unique([d.year for d in dpca.index])[::1])
cb.set_ticklabels(np.unique([d.year for d in dpca.index])[::1])

for year in range(dpca.index.min().year,dpca.index.max().year+1,merged_years):
    year_start = dpca.index[dpca.index.searchsorted(pd.datetime(year,1,1))]
    year_end = dpca.index[dpca.index.searchsorted(pd.datetime(year+merged_years-1,12,31))]
    
    plt.annotate('{0}'.format(year), xy=(dpca.loc[year_start,pc1],dpca.loc[year_start,pc2]), xytext=(dpca.loc[year_start,pc1],dpca.loc[year_start,pc2]))

None

Now show the data using the left singular vectors from the decoder weight matrix. Note that the sign of the first principal component has been changed for ease of comparison.

In [None]:
# plot the result
merged_years = 1
pc1 = 0
pc2 = 1
fig = plt.figure(figsize=(16,12))
plt.title('Projection on {0}. and {1}. PC'.format(pc1+1,pc2+1))
plt.axhline(y=0,c="grey",linewidth=1.0,zorder=0)
plt.axvline(x=0,c="grey",linewidth=1.0,zorder=0)
    
sc = plt.scatter(-dae.loc[:,pc1],dae.loc[:,pc2], c=[d.year for d in dae.index], cmap='rainbow')
cb = plt.colorbar(sc)
cb.set_ticks(ticks=np.unique([d.year for d in dae.index])[::1])
cb.set_ticklabels(np.unique([d.year for d in dae.index])[::1])

for year in range(dae.index.min().year,dae.index.max().year+1,merged_years):
    year_start = dae.index[dae.index.searchsorted(pd.datetime(year,1,1))]
    year_end = dae.index[dae.index.searchsorted(pd.datetime(year+merged_years-1,12,31))]
    plt.annotate('{0}'.format(year), xy=(-dae.loc[year_start,pc1],dae.loc[year_start,pc2]), xytext=(-dae.loc[year_start,pc1],dae.loc[year_start,pc2]))

    #plt.annotate('{0}'.format(year), xy=dae.loc[year_start,pc1],dae.loc[year_start,pc2]), xytext=(dae.loc[year_start,pc1],dae.loc[year_start,pc2]))

None

### Plot the reconstruction error of the linear autoencoder

In [None]:
y_hat_l=ae.predict(df)
plt.figure(figsize=(20,15))
plt.plot(df.index, pd.DataFrame(y_hat_l-df))
plt.xlim(df.index.min(), df.index.max())
# plt.ylim(0, 0.1)
plt.ylim(-0.008, 0.008)
plt.axhline(y=0,c="grey",linewidth=0.5,zorder=0)
for i in range(df.index.min().year, df.index.max().year+1):
    plt.axvline(x=df.index[df.index.searchsorted(pd.datetime(i,1,1))-1],
                c="grey", linewidth=0.5, zorder=0)

### Deep Autoencoders

In [None]:
# Deep Autoencoder
def DeepAE(y, dimension, learning_rate = 1e-4, regularization = 5e-4, epochs=10):
    input = Input(shape=(y.shape[1],))
    encoded1 = Dense(np.int(2*dimension), activation='tanh',
                    kernel_regularizer=regularizers.l2(regularization))(input)
    encoded2 = Dense(dimension, activation='tanh',
                    kernel_regularizer=regularizers.l2(regularization))(encoded1)
    decoded1 = Dense(np.int(y.shape[1]/2), activation='tanh',
                    kernel_regularizer=regularizers.l2(regularization))(encoded2)
    decoded2 = Dense(y.shape[1], activation='tanh',
                    kernel_regularizer=regularizers.l2(regularization))(decoded1)
    autoencoder = models.Model(input, decoded2)
    autoencoder.compile(optimizer=optimizers.adam(lr=learning_rate), loss='mean_squared_error')
    autoencoder.fit(y, y, epochs=epochs, batch_size=4, shuffle=True)
    
    return autoencoder

In [None]:
dae = DeepAE(df, 3, regularization=0, epochs=500)

### Plot the reconstruction error of the linear autoencoder

In [None]:
y_hat_d=dae.predict(df)
plt.figure(figsize=(20,15))
plt.plot(df.index, pd.DataFrame(y_hat_d-df))
plt.xlim(df.index.min(), df.index.max())
plt.ylim(-0.008, 0.008)
plt.axhline(y=0,c="grey",linewidth=0.5,zorder=0)
for i in range(df.index.min().year, df.index.max().year+1):
    plt.axvline(x=df.index[df.index.searchsorted(pd.datetime(i,1,1))-1],
                c="grey", linewidth=0.5, zorder=0)