# Packages

In [4]:
import warnings
import numpy as np
import pandas as pd

import plotly
import plotly.graph_objs as go
import plotly.offline as offline
offline.init_notebook_mode(connected=True)

# add a warning suppressing
warnings.filterwarnings("ignore")

# Data EUR-GBP Exchange Rate last five years:

In [6]:
df = pd.read_excel('../data/EURGBP.xlsx',index_col='date')
df.head()

Unnamed: 0_level_0,EURGBP
date,Unnamed: 1_level_1
2007-10-23,0.69575
2007-10-24,0.6955
2007-10-25,0.69765
2007-10-26,0.701
2007-10-29,0.69895


In [7]:
trace1 = go.Scatter(
                    x = df.index,
                    y = df.values[:,0],
                    mode = "lines", name="EURGBP")
data = [trace1]

layout = dict(title = 'Exchange Rate EUR-GBP',
              xaxis= dict(title= 'T',ticklen= 5,zeroline= False),
              yaxis= dict(title= 'EURGBP',ticklen= 5,zeroline= False))

fig = dict(data = data, layout = layout)
offline.iplot(fig)

# Log-Returns of time series

In [8]:
df['Ret']= df['EURGBP'].pct_change(periods=1)
df['LogRet'] = df['Ret'].apply(lambda x: np.log(1+x))
df.dropna(how='any',inplace=True)
X = df['LogRet'].values.reshape(-1,1)

In [9]:
from pomegranate import hmm , NormalDistribution , State

<b>hmm.HiddenMarkovModel.from_samples</b>
This method will learn both the transition matrix, emission distributions, and start probabilities for each state. This will only return a dense graph without any silent states or explicit transitions to an end state. Currently all components must be defined as the same distribution, but soon this restriction will be removed.

If learning a multinomial HMM over discrete characters, the initial emisison probabilities are initialized randomly. If learning a continuous valued HMM, such as a Gaussian HMM, then kmeans clustering is used first to identify initial clusters.

Regardless of the type of model, the transition matrix and start probabilities are initialized uniformly. Then the specified learning algorithm (Baum-Welch recommended) is used to refine the parameters of the model.

In [10]:
# This method will learn both the transition matrix, emission distributions, 
# and start probabilities for each state. 
model = hmm.HiddenMarkovModel.from_samples(NormalDistribution, 
                                           n_components=2, 
                                           X=X,
                                           algorithm='viterbi')


### Prediction easy: response and/or probabilities possible

<b>model.predict_proba(X)</b><br>The normalized probabilities of each state generating each emission.

In [11]:
Z_proba = model.predict_proba(X)

In [12]:
df_result_proba = pd.DataFrame(Z_proba, columns = ['prob node 1', 'prob node 2'])
df_result_proba= df_result_proba.apply(lambda x : np.round(x,4))
df_result_proba.index = df.index
df_result_proba.head()

Unnamed: 0_level_0,prob node 1,prob node 2
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2007-10-24,0.0,1.0
2007-10-25,0.0,1.0
2007-10-26,0.0,1.0
2007-10-29,0.0,1.0
2007-10-30,0.0,1.0


<b>model.predict()</b>

In [13]:
Z = model.predict(X)
df_result = pd.DataFrame(Z)
df_result.columns = ['state']
df_result.index = df.index


In [14]:
idx_1 = df_result['state'] == 1
idx_0 = df_result['state'] == 0

if sum(idx_1)> sum(idx_0):
    size_1 = 4
    size_0 = 12
else:
    size_1=12
    size_0 = 4

trace1 = go.Scatter(
                    x = df.loc[idx_1,:].index,
                    y = df.loc[idx_1,['EURGBP']].values[:,0],
                    mode = "markers", name="regime 1", 
                    marker=dict(size=size_1))

trace0 = go.Scatter(
                    x = df.loc[idx_0,:].index,
                    y = df.loc[idx_0,['EURGBP']].values[:,0],
                    mode = "markers", name="regime 0", 
                    marker=dict(size=size_0))
data = [trace1,trace0]

layout = dict(title = 'EURGBP',
              xaxis= dict(title= 'Time',ticklen= 5,zeroline= False),
              yaxis= dict(title= 'EURGBP',ticklen= 5,zeroline= False))

fig = dict(data = data, layout = layout)
offline.iplot(fig)


In [15]:
print(" ".join( state.name for i, state in model.maximum_a_posteriori( X )[1] )[0:15]+'...')

s1 s1 s1 s1 s1 ...


# Transition Matrix

In [16]:
transition_matrix = model.forward_backward(X)[0][0:2,0:2]
v = np.sum(transition_matrix,0).T
transition_matrix = transition_matrix/v.reshape(-1,1)
print('Transition Matrix')
pd.DataFrame(transition_matrix)

Transition Matrix


Unnamed: 0,0,1
0,0.077887,0.922113
1,0.006324,0.993676


# Properties of the Regimes (emission)

In [17]:
reg = [idx_0,idx_1]
for i in np.arange(0,len(reg)):
    std = np.float(df.loc[reg[i],['LogRet']].std())
    mean = np.float(df.loc[reg[i],['LogRet']].mean())
    print('Regime:',i,'Mean:',np.round(100*mean,3),'% Std:',np.round(100*std,3),'%')
    

Regime: 0 Mean: -1.913 % Std: 0.305 %
Regime: 1 Mean: 0.021 % Std: 0.555 %
