In [1]:
import numpy as np
import pandas as pd
import scipy as sp
from scipy.stats import multivariate_normal, weibull_min
import matplotlib.pyplot as plt
from tqdm import tqdm
from mpl_toolkits.mplot3d import Axes3D

In [2]:
plt.rcParams['text.usetex'] = True
plt.rcParams['pgf.rcfonts'] = False
plt.rcParams['pgf.texsystem'] = 'pdflatex'
plt.rcParams['pgf.preamble'] = '\n'.join([
    r'\usepackage[T1, T2A]{fontenc}',
    r'\usepackage[utf8]{inputenc}',
    r'\usepackage[english, russian]{babel}'
    ])
plt.rc('font', family='serif')
plt.switch_backend('pgf')

In [3]:
my_colors = ['green', 'red', 'gray', 'blue', 'purple', 'orange']
plt.rcParams['axes.prop_cycle'] = plt.cycler(color=my_colors)

In [4]:
plt.rcParams['font.size']=15

In [5]:
def stationary_distr(Q):
  evals, evecs = np.linalg.eig(Q.T)
  evec1 = evecs[:,np.isclose(evals, 1)]

#Since np.isclose will return an array, we've indexed with an array
#so we still have our 2nd axis.  Get rid of it, since it's only size 1.
  evec1 = evec1[:,0]

  stationary = evec1 / evec1.sum()

#eigs finds complex eigenvalues and eigenvectors, so you'll want the real part.
  stationary = stationary.real
  return stationary

In [6]:
def init_X(pi, U=np.random.uniform(0, 1, 1)):
  cumm = 0
  for i in range(len(pi)):
    cumm += pi[i]
    if cumm > U:
      return i

In [7]:
def scale(row):
  s = np.sum(row)
  return row/s

In [8]:
def model_X(dim_X, markov_chain, H, T):
  Lambda, pi, f, g = markov_chain
  
  # state
  X_grid_length = int(T/H)
  U = np.random.uniform(0, 1, X_grid_length)
  X_grid = np.arange(0, X_grid_length)
  X = np.empty(X_grid_length, dtype = np.int8)
  X[0] = init_X(pi)
  P = np.eye(dim_X) + H * Lambda
  for i in tqdm(X_grid[1:]):
    P_i = P[X[i - 1]]
    X[i] = init_X(scale(P_i), U[i])
  return X, X_grid

In [9]:
def model_Y(X, dim_Y, markov_chain, discretization, T):
  H, h = discretization
  Lambda, pi, f, g = markov_chain
  # observations
  Y_grid_length = int(T/h)

  W =  np.random.multivariate_normal(np.zeros(dim_Y), np.eye(dim_Y), Y_grid_length)

  Y_grid = np.arange(0, Y_grid_length)
  Y = np.empty((Y_grid_length, dim_Y))
  S = np.empty((Y_grid_length, dim_Y))
  S[0] = np.ones(dim_Y)
  for i in tqdm(Y_grid[1:]):
    Y[i] = np.diag(S[i-1]) @ f[X[i * int(h/H)]] * h + np.diag(S[i-1]) @ W[i] @ np.linalg.cholesky(h * g[X[i * int(h/H)]])
    S[i] = S[i - 1] + Y[i]
  return Y, S, Y_grid  

In [10]:
def model_Y_easy(X, dim_Y, markov_chain, discretization, T):
  H, h = discretization
  Lambda, pi, f, g = markov_chain
  # observations
  Y_grid_length = int(T/h)

  W =  np.random.normal(0, 1, Y_grid_length)

  Y_grid = np.arange(0, Y_grid_length)
  Y = np.empty(Y_grid_length)
  S = np.empty(Y_grid_length)
  S[0] = 1
  for i in tqdm(Y_grid[1:]):
    Y[i] = S[i-1] * f[0] * h + S[i-1] * W[i] * np.sqrt(h * g[0])
    S[i] = S[i - 1] + Y[i]
  return Y, S, Y_grid  

In [11]:
def to_sums(Y, start=0):
  sum_Y = np.empty(Y.size)
  sum_Y[0] = start
  for i in tqdm(range(sum_Y.size)[1:]):
    sum_Y[i] = sum_Y[i - 1] + Y[i - 1]
  return sum_Y

In [12]:
def model_observations(Y, X, weibull_distr_params, discretization, Y_grid, dimensions):
  dim_X, dim_Y = dimensions
  H, h = discretization
  i = 0
  t = np.empty(Y_grid.size)
  S = np.empty(Y_grid.size)
  curr_pos = 0
  pbar = tqdm(total=Y_grid.size)
  while i < Y_grid.size:
    prev = i
    weibull = weibull_min.rvs(weibull_distr_params[X[i * int(h/H)]][0], size=1, scale=weibull_distr_params[X[i * int(h/H)]][1])
    w_int = int(np.around(weibull * h ** (-1), 0))
    weibull_round = w_int * h
    t[curr_pos] = weibull_round
    i += w_int
    if i < len(Y_grid):
      S[curr_pos] = np.log(Y[i] / Y[prev])
    curr_pos += 1
    pbar.update(i - prev)
  pbar.close()
  return S[:curr_pos - 1], t[:curr_pos - 1]

In [13]:
def model_trades(S, t, window_orig, T):
  mean_S = np.empty(int(T/window_orig))
  mean_t = np.empty(int(T/window_orig))
  cumm_T = np.empty(int(T/window_orig))
  index_of_left = 0
  curr_index = 0
  window = window_orig
  for i in tqdm(range(t.size)):
    if np.sum(t[index_of_left:i]) >= window:
      mean_S[curr_index] = np.sum(S[index_of_left:i]) / np.sqrt(window)  # or should be window?
      mean_t[curr_index] = (i - index_of_left) / np.sqrt(window)  # np.sum(t[index_of_left:i]) / np.sqrt(window_orig) 
      cumm_T[curr_index] = i - index_of_left
      window = window + window_orig - np.sum(t[index_of_left:i])
      index_of_left = i
      curr_index += 1
  left_time = np.sum(t[index_of_left:])
  mean_S[curr_index] = np.sum(S[index_of_left:]) / np.sqrt(left_time)
  mean_t[curr_index] = (t.size - index_of_left) / np.sqrt(left_time)
  cumm_T[curr_index] = t.size - index_of_left
  return mean_S, mean_t, cumm_T


In [14]:
def filter(U, h, window, markov_chain, T, weibull_distr_params, dim_X):
  Lambda, pi, f, g = markov_chain
  g = g.flatten()
  f = f.flatten()
  X_pred_grid_length = int(T/h)
  X_pred = np.empty((X_pred_grid_length, dim_X), dtype=np.float32)
  X_pred_grid = np.arange(0, X_pred_grid_length)
  N_0 = np.empty((dim_X, 2))
  N_1 = np.empty((dim_X, 2, 2))
  for i in range(dim_X):
    m_l = weibull_min.mean(weibull_distr_params[i][0], scale=weibull_distr_params[i][1])
    d_l = weibull_min.var(weibull_distr_params[i][0], scale=weibull_distr_params[i][1])
    N_0[i] = np.array([np.sqrt(window) / m_l, (f[i] - g[i] / 2) * np.sqrt(window)])
    N_1[i] = np.diag([d_l / (m_l ** 3), g[i]])  # m_l or a_l?
  
  P_T = (np.eye(dim_X) + h * Lambda).T
  X_pred[0] = pi
  for i in tqdm(X_pred_grid[1:]):
    X_pred[i] = P_T @ X_pred[i-1]
    if i % (int(window / h)) == 0:
      Z = np.empty((dim_X))
      for j in range(dim_X):
        Z[j] = X_pred[i][j] * multivariate_normal.pdf(U[i // int((window / h))], N_0[j], N_1[j])
      if np.sum(Z) == 0:
        print(i)
      X_pred[i] = Z / np.sum(Z)
  return X_pred

In [15]:
def draw3dgraphs(iter, mean_S, mean_t, markov_chain, weibull_distr_params, dimensions, elev, azim):
    #%matplotlib widget
    Lambda, pi, f, g = markov_chain
    dim_X, dim_Y = dimensions
    g = g.flatten()
    f = f.flatten()


    N_0 = np.empty((dim_X, 2))
    N_1 = np.empty((dim_X, 2, 2))
    for i in range(dim_X):
        m_l = weibull_min.mean(weibull_distr_params[i][0], scale=weibull_distr_params[i][1])
        d_l = weibull_min.var(weibull_distr_params[i][0], scale=weibull_distr_params[i][1])
        N_0[i] = np.array([np.sqrt(window) / m_l, (f[i] - g[i] / 2) * np.sqrt(window)])
        N_1[i] = np.diag([d_l / (m_l ** 3), g[i]])  # m_l or a_l?

    x = mean_t
    y = mean_S

    t_range = np.max(mean_t) - np.min(mean_t)
    S_range = np.max(mean_S) - np.min(mean_S)

    bins = 40

    fig = plt.figure(figsize=(12, 7), dpi=240)
    ax = fig.add_subplot(projection='3d')
    hist, xedges, yedges = np.histogram2d(x, y, bins=bins, range=[[np.min(mean_t), np.max(mean_t)], [np.min(mean_S), np.max(mean_S)]], density=True)

    xpos, ypos = np.meshgrid(xedges[:-1] + t_range / (bins ** 2), yedges[:-1] + S_range / (bins ** 2), indexing="ij")
    xpos = xpos.ravel()
    ypos = ypos.ravel()
    zpos = 0

    dx = t_range / (bins) * np.ones_like(zpos)
    dy = S_range / (bins) * np.ones_like(zpos)
    dz = hist.ravel()
    ax.view_init(elev=elev, azim=azim)
    ax.auto_scale_xyz([], [], [])
    ax.bar3d(xpos, ypos, zpos, dx, dy, dz, zsort='average', alpha=0.7, color='green', zorder=2,label="Гистограмма")

    #y = np.linspace(np.min(mean_S), np.max(mean_S), 1600)
    #x = np.linspace(np.min(mean_t), np.max(mean_t), 1600)
    #X, Y = np.meshgrid(x[:-1], y[:-1], indexing='ij')
    X, Y = np.meshgrid(xedges + t_range / (bins ** 2), yedges + S_range / (bins ** 2), indexing="ij")
    zs = np.zeros(X.size)
    for j in range(dim_X):
        zs += multivariate_normal.pdf(np.vstack([X.ravel(), Y.ravel()]).T, N_0[j], N_1[j]) * pi[j]
    Z = zs.reshape(X.shape)
    #wireframe=ax.plot_wireframe(X, Y, Z, color='red', alpha=0.8, linewidth=2, zorder=1, label="Теоритическая гауссиана смеси")
    ax.plot_surface(X, Y, Z, color="red", shade=False, edgecolor="red", alpha=0.7)
    #ax.plot_surface(X, Y, Z, color='red', linewidth=1, alpha=0.8)
    #ax.view_init(elev=10., azim=270)
    #plt.title(iter+"агрегированных наблюдений")

    import mpl_toolkits.mplot3d.art3d as art3d 
    
    bar_legend = art3d.Line3D([], [], [], color='green', label='Гистограмма')
    wireframe_legend = art3d.Line3D([], [], [], color='red', label='Теоритическая гауссиана смеси')

# Add the lines to the plot and legend
    ax.add_artist(bar_legend)
    ax.add_artist(wireframe_legend)
    ax.legend(handles=[bar_legend, wireframe_legend])
    
    plt.savefig("histogram&gaussian.pgf", bbox_inches='tight')
    #plt.show()

In [16]:
def draw2dgraphs(iter, mean_S, mean_t, markov_chain, weibull_distr_params, dimensions):
    #%matplotlib widget
    Lambda, pi, f, g = markov_chain
    dim_X, dim_Y = dimensions
    g = g.flatten()
    f = f.flatten()

    N_0 = np.empty((dim_X, 2))
    N_1 = np.empty((dim_X, 2, 2))
    for i in range(dim_X):
        m_l = weibull_min.mean(weibull_distr_params[i][0], scale=weibull_distr_params[i][1])
        d_l = weibull_min.var(weibull_distr_params[i][0], scale=weibull_distr_params[i][1])
        N_0[i] = np.array([np.sqrt(window) / m_l, (f[i] - g[i] / 2) * np.sqrt(window)])
        N_1[i] = np.diag([d_l / (m_l ** 3), g[i]])  # m_l or a_l?


    bins = 30

    x = np.linspace(np.min(mean_t), np.max(mean_t), 500)
    y = np.linspace(np.min(mean_S), np.max(mean_S), 500)
    fig = plt.figure(figsize=(12, 7), dpi=240)
    z = np.zeros(x.size)
    for i in range(dim_X):
        z += pi[i] * multivariate_normal.pdf(x, N_0[i][0], N_1[i][0][0])
        plt.plot(x, multivariate_normal.pdf(x, N_0[i][0], N_1[i][0][0]) * pi[i], color='yellow', label='Гауссианы компонент смеси, умноженные на вероятности')

    #2d graphs
    plt.hist(mean_t, bins, density=True, label='Гистограмма')
    plt.plot(x, z, label='Теоритическая гауссиана смеси')
    #plt.title(iter + "агрегированных наблюдений за количеством трейдов")
    handles, labels = plt.gca().get_legend_handles_labels()
    newLabels, newHandles = [], []
    for handle, label in zip(handles, labels):
        if label not in newLabels:
            newLabels.append(label)
            newHandles.append(handle)
    #plt.legend(newHandles, newLabels, loc='upper right')
    
    plt.savefig("gaussian_of_t.pgf", bbox_inches='tight')
    #plt.show()
    
    fig = plt.figure(figsize=(12, 7), dpi=240)
    z = np.zeros(y.size)
    for i in range(dim_X):
        z += pi[i] * multivariate_normal.pdf(y, N_0[i][1], N_1[i][1][1])
        plt.plot(y, multivariate_normal.pdf(y, N_0[i][1], N_1[i][1][1]) * pi[i], color='yellow', label='Гауссианы компонент смеси, умноженные на вероятности')
        
    plt.hist(mean_S, bins, density=True, label='Гистограмма')
    plt.plot(y, z, label='Теоритическая гауссиана смеси')
    #plt.title(iter + " агрегированных наблюдений за приращениями")
    handles, labels = plt.gca().get_legend_handles_labels()
    newLabels, newHandles = [], []
    for handle, label in zip(handles, labels):
        if label not in newLabels:
            newLabels.append(label)
            newHandles.append(handle)
    #plt.legend(newHandles, newLabels, loc='upper right')
    plt.savefig("gaussian_of_S.pgf", bbox_inches='tight')
    #plt.show()

In [17]:
H = 10 ** (-7) # X time
h = 10 ** (-7) # Y time
T = 1# in minutes

seed = 69
if seed != 0:
    np.random.seed(seed)


dim_X = 4
dim_Y = 1

Lambda = np.array([[-12.5, 12.5, 0, 0],
                    [0, -1000, 1000, 0],
                    [0, 0, -250, 250],
                    [40, 0, 10, -50]])
pi = stationary_distr(np.eye(dim_X) + H * Lambda)
f = np.array([[0.07], 
              [0.03], 
              [0.02],
              [0.025]])
g = np.array([np.diag([0.1]), 
              np.diag([0.5]), 
              np.diag([0.6]),
              np.diag([0.3])])
weibull_distr_params = np.array([[1, 0.0000060],  # in article we have alpha = 1/Lambda, beta = k
                                 [1.2, 0.0000050],  # k - wiki, scale
                                 [1.2, 0.0000055],  #  
                                 [1.4, 0.0000070]])
markov_chain = (Lambda, pi, f, g)
discretization = (H, h)
dimensions = (dim_X, dim_Y)

In [18]:
"""X, X_grid = model_X(dim_X, markov_chain, H, T)
Y, Y_sum, Y_grid = model_Y(X, dim_Y, markov_chain, discretization, T)"""

'X, X_grid = model_X(dim_X, markov_chain, H, T)\nY, Y_sum, Y_grid = model_Y(X, dim_Y, markov_chain, discretization, T)'

In [19]:
"""with open('XandY.npy', 'wb') as f:
    np.savez(f, X=X, X_grid=X_grid, Y=Y, Y_grid=Y_grid, Y_sum=Y_sum)"""

"with open('XandY.npy', 'wb') as f:\n    np.savez(f, X=X, X_grid=X_grid, Y=Y, Y_grid=Y_grid, Y_sum=Y_sum)"

In [20]:
with open('XandY.npy', 'rb') as file:
    npzfile = np.load(file)
    X = npzfile['X'].copy()
    X_grid = npzfile['X_grid'].copy()
    Y = npzfile['Y'].copy()
    Y_grid = npzfile['Y_grid'].copy()
    Y_sum = npzfile['Y_sum'].copy()

In [21]:
pi

array([0.72072072, 0.00900901, 0.04504505, 0.22522523])

In [22]:
S, t = model_observations(Y_sum, X, weibull_distr_params, discretization, Y_grid, dimensions)
window = 10 ** (-4)
mean_S, mean_t, cumm_T = model_trades(S, t, window, T)
U = np.vstack([mean_t, mean_S]).T

10000038it [00:07, 1315059.54it/s]                                                                                     
100%|██████████████████████████████████████████████████████████████████████| 166104/166104 [00:00<00:00, 267959.08it/s]


In [23]:
X

array([0, 0, 0, ..., 0, 0, 0], dtype=int8)

In [24]:
X_pred = filter(U, h, window, markov_chain, T, weibull_distr_params, dim_X)

100%|████████████████████████████████████████████████████████████████████| 9999999/9999999 [00:30<00:00, 333103.61it/s]


In [25]:
#draw3dgraphs("Плотность распределения смеси ", mean_S, mean_t, markov_chain, weibull_distr_params, dimensions,elev=20,azim=-60)

In [26]:
draw2dgraphs("Плотность распределения смеси ", mean_S, mean_t, markov_chain, weibull_distr_params, dimensions)

In [27]:
argmax = np.argmax(X_pred, axis=1)

In [28]:
Y = None

In [29]:
X.shape

(10000000,)

In [30]:
"""plt.figure(figsize=(12, 1), dpi=240)
#plt.title("Результат фильтрации")
plt.plot(np.arange(0,1,h), X+1, label='Действительное значение', zorder=2, lw=0.5)
plt.plot(np.arange(0,1,h), argmax+1, label='Оценка', zorder=1, lw=0.5)
plt.legend(loc='upper right')
plt.ylabel('Состояние МСП')
plt.xlabel('Время (год)')
plt.yticks([1, 2, 3, 4])
plt.savefig("filtration.pgf", bbox_inches='tight')
#plt.show()"""

'plt.figure(figsize=(12, 1), dpi=240)\n#plt.title("Результат фильтрации")\nplt.plot(np.arange(0,1,h), X+1, label=\'Действительное значение\', zorder=2, lw=0.5)\nplt.plot(np.arange(0,1,h), argmax+1, label=\'Оценка\', zorder=1, lw=0.5)\nplt.legend(loc=\'upper right\')\nplt.ylabel(\'Состояние МСП\')\nplt.xlabel(\'Время (год)\')\nplt.yticks([1, 2, 3, 4])\nplt.savefig("filtration.pgf", bbox_inches=\'tight\')\n#plt.show()'

In [31]:
"""xstart=0.15
xend=0.2

coor_xstart = int(xstart / h)
coor_xend = int(xend/h)

plt.figure(figsize=(12, 1), dpi=240)
#plt.title("Результат фильтрации по наблюдениям в случайные моменты времени")
plt.plot(np.arange(0,1,h)[coor_xstart:coor_xend], (X+1)[coor_xstart:coor_xend], label='Действительное значение', zorder=2)
plt.plot(np.arange(0,1,h)[coor_xstart:coor_xend], (argmax+1)[coor_xstart:coor_xend], label='Оценка', zorder=1)
plt.legend(loc='upper right')
plt.ylabel('Состояние МСП')
plt.xlabel('Время (год)')
plt.yticks([1, 2, 3, 4])
plt.savefig("filtration_zoom.pgf", bbox_inches='tight')
#plt.show()"""

'xstart=0.15\nxend=0.2\n\ncoor_xstart = int(xstart / h)\ncoor_xend = int(xend/h)\n\nplt.figure(figsize=(12, 1), dpi=240)\n#plt.title("Результат фильтрации по наблюдениям в случайные моменты времени")\nplt.plot(np.arange(0,1,h)[coor_xstart:coor_xend], (X+1)[coor_xstart:coor_xend], label=\'Действительное значение\', zorder=2)\nplt.plot(np.arange(0,1,h)[coor_xstart:coor_xend], (argmax+1)[coor_xstart:coor_xend], label=\'Оценка\', zorder=1)\nplt.legend(loc=\'upper right\')\nplt.ylabel(\'Состояние МСП\')\nplt.xlabel(\'Время (год)\')\nplt.yticks([1, 2, 3, 4])\nplt.savefig("filtration_zoom.pgf", bbox_inches=\'tight\')\n#plt.show()'

In [32]:
plt.figure(figsize=(12, 7), dpi=240)
plt.plot(np.arange(0,1,h), Y_sum)
#plt.title('Смоделированная траектория цены акции')
plt.xlabel('Время (год)')
plt.ylabel('Цена базового финансового актива')
plt.savefig("stock_trajectory.pgf", bbox_inches='tight')
#plt.show()

In [33]:
plt.figure(figsize=(11, 7), dpi=240)
plt.step(to_sums(t), S, where='post')
#plt.title('Смоделированный поток трейдов')
plt.xlabel('Время (год)')
plt.ylabel('Приращение цены базового актива')
plt.savefig("trades_trajectory.pgf", bbox_inches='tight')
#plt.show()

100%|█████████████████████████████████████████████████████████████████████| 166103/166103 [00:00<00:00, 1950420.99it/s]


In [34]:
plt.figure(figsize=(11, 7), dpi=240)
plt.step(to_sums(t)[:55] * (250 * 8 * 60), S[:55], where='post')
#plt.title('Смоделированный поток трейдов в первые 30 минут')
plt.xlabel('Время (минуты)')
plt.ylabel('Приращение цены базового актива')
plt.savefig("trades_trajectory_half_hour.pgf", bbox_inches='tight')
#plt.show()

100%|█████████████████████████████████████████████████████████████████████| 166103/166103 [00:00<00:00, 2576055.48it/s]


In [35]:
to_sums(t)[:55] * (250 * 8 * 60)

100%|█████████████████████████████████████████████████████████████████████| 166103/166103 [00:00<00:00, 1992451.24it/s]


array([ 0.   ,  0.252,  1.44 ,  1.752,  2.868,  3.456,  3.672,  3.756,
        3.804,  4.608,  5.472,  6.42 ,  7.536,  7.596,  7.872,  8.844,
        9.132,  9.588,  9.732, 10.008, 10.44 , 10.98 , 11.232, 11.46 ,
       12.276, 12.468, 13.284, 13.692, 14.04 , 14.34 , 14.856, 18.024,
       18.564, 19.38 , 19.74 , 19.824, 20.712, 21.096, 21.54 , 21.828,
       21.9  , 22.344, 23.52 , 23.676, 24.132, 24.288, 24.348, 24.372,
       24.492, 24.708, 24.756, 26.64 , 27.204, 29.664, 31.308])

In [36]:
X[:60]

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int8)

In [37]:
sum(t[:1])

2.1e-06

In [38]:
xstart=0
xend=1

coor_xstart = int(xstart / h)
coor_xend = int(xend/h)
for i in range(1, dim_X+1):
    plt.figure(figsize=(12, 1), dpi=240)
    #plt.title("Результат фильтрации по наблюдениям в случайные моменты времени ("+str(i)+"-состояние)")
    plt.plot(np.arange(0,1,h)[coor_xstart:coor_xend],np.equal(X, i-1)[coor_xstart:coor_xend], label='Действительное значение', zorder=2, lw=0.5)
    plt.plot(np.arange(0,1,h)[coor_xstart:coor_xend],np.equal(argmax,i-1)[coor_xstart:coor_xend], label='Оценка', zorder=1, lw=0.5)
    plt.plot(np.arange(0,1,h)[coor_xstart:coor_xend],X_pred[:,i-1][coor_xstart:coor_xend], label='Условная вероятность состояния', alpha=0.5, zorder=0, lw=0.5)
    #plt.legend(loc='upper right')
    #plt.xlabel('Время (год)')
    plt.savefig(str(i)+"state.pgf", bbox_inches='tight')
    #plt.show()

In [39]:
xstart=0.35
xend=0.4

coor_xstart = int(xstart / h)
coor_xend = int(xend/h)
for i in range(1, dim_X+1):
    plt.figure(figsize=(12, 1), dpi=240)
    #plt.title("Результат фильтрации по наблюдениям в случайные моменты времени ("+str(i)+"-состояние)")
    plt.plot(np.arange(0,1,h)[coor_xstart:coor_xend],np.equal(X, i-1)[coor_xstart:coor_xend], label='Действительное значение', zorder=2, lw=0.5)
    plt.plot(np.arange(0,1,h)[coor_xstart:coor_xend],np.equal(argmax,i-1)[coor_xstart:coor_xend], label='Оценка', zorder=1, lw=0.5)
    plt.plot(np.arange(0,1,h)[coor_xstart:coor_xend],X_pred[:,i-1][coor_xstart:coor_xend], label='Условная вероятность состояния', alpha=0.5, zorder=0, lw=0.5)
    #plt.legend(loc='upper right')
    #plt.xlabel('Время (год)')
    plt.savefig(str(i)+"state_zoom.pgf", bbox_inches='tight')
    #plt.show()

In [40]:
np.sum(argmax == X) * h

0.9695448

In [41]:
def bmatrix(a):
    """Returns a LaTeX bmatrix

    :a: numpy array
    :returns: LaTeX bmatrix as a string
    """
    if len(a.shape) > 2:
        raise ValueError('bmatrix can at most display two dimensions')
    lines = str(a).replace('[', '').replace(']', '').splitlines()
    rv = [r'\begin{bmatrix}']
    rv += ['  ' + ' & '.join(l.split()) + r'\\' for l in lines]
    rv +=  [r'\end{bmatrix}']
    return '\n'.join(rv)

In [42]:
from sklearn.metrics import confusion_matrix
np.set_printoptions(suppress=True, precision=5)
print(confusion_matrix(X, argmax, labels=[0,1,2,3])*h)

[[0.61632 0.00101 0.0005  0.00619]
 [0.00266 0.00355 0.0009  0.     ]
 [0.00519 0.00123 0.05896 0.00178]
 [0.00463 0.      0.00636 0.29072]]


In [43]:
print(bmatrix(confusion_matrix(X, argmax, labels=[0,1,2,3])*h))

\begin{bmatrix}
  0.61632 & 0.00101 & 0.0005 & 0.00619\\
  0.00266 & 0.00355 & 0.0009 & 0.\\
  0.00519 & 0.00123 & 0.05896 & 0.00178\\
  0.00463 & 0. & 0.00636 & 0.29072\\
\end{bmatrix}


In [44]:
def bmatrix(a):
    """Returns a LaTeX bmatrix

    :a: numpy array
    :returns: LaTeX bmatrix as a string
    """
    if len(a.shape) > 2:
        raise ValueError('bmatrix can at most display two dimensions')
    lines = str(a).replace('[', '').replace(']', '').splitlines()
    rv = [r'\begin{bmatrix}']
    rv += ['  ' + '\% & '.join(l.split()) + r'\%\\' for l in lines]
    rv +=  [r'\end{bmatrix}']
    return '\n'.join(rv)

In [45]:
from sklearn.metrics import confusion_matrix
print(bmatrix(confusion_matrix(X, argmax, labels=[0,1,2,3], normalize='true') * 100))

\begin{bmatrix}
  98.76627\% & 0.16158\% & 0.08013\% & 0.99202\%\\
  37.46589\% & 49.90154\% & 12.63257\% & 0.\%\\
  7.73275\% & 1.83038\% & 87.78653\% & 2.65034\%\\
  1.53478\% & 0.\% & 2.10848\% & 96.35674\%\\
\end{bmatrix}


In [46]:
a = confusion_matrix(X, argmax, labels=[0,1,2,3], normalize='true') * 100
print(str(a))

[[98.76627  0.16158  0.08013  0.99202]
 [37.46589 49.90154 12.63257  0.     ]
 [ 7.73275  1.83038 87.78653  2.65034]
 [ 1.53478  0.       2.10848 96.35674]]
