In [None]:
import pandas as pd
import numpy as np
import tensorflow_probability
import plotly.io as pio
import sys
from src.em.mixture import DynamicMixture
from plotly.express import line

pio.renderers.default = "notebook"
# caution: path[0] is reserved for script path (or '' in REPL)
sys.path.insert(1, "/home/oplora/Documents/VMK/Dissertation/Magfield")

# Позволяет использовать измененные модули без перезагрузки ядра
%load_ext autoreload
%autoreload 2

# Mixtures of normal lows and coefficients of stochastic differential equations    

Assume that each time series is a mixture of __3__ normal lows.
Plots below represents reconstruction of stochastic coefficients for process:
$$ \delta X(t) = a(t) \delta t + b(t) \delta W ,$$ 
where X(t) stands for each ($B, dB, V, dV$) components projections.

Reconstruction was provided by EM-algorithm with Kolmogorov-Smirnov test, a.k.a.
"EM without dying variances" with sliding window's. Last one was next
parameters: step is __1 minute__, length is __3 days__.


**Assumption**: I'll drop out all `nan` values out of data.

## Mixtures of normal lows

In [None]:
data = pd.read_csv(
    "/home/oplora/Documents/VMK/Dissertation/Magfield/Data/2020_ydhm_id.csv"
)

In [None]:
def increm(arr):
    from math import nan

    new_ar = [nan]
    for i in range(1, len(arr)):
        inc = arr[i] + arr[i - 1]
        new_ar.append(inc)
    return new_ar

In [None]:
cname = "Vz"
dbx = data[cname][~np.isnan(data[cname])].values[::60]
dbxe = np.cumsum(dbx)
dbxe[0] = 0
line(dict(cum_sum=dbxe), title=f"Cumulative sum for {cname} per hour.")

In [None]:
cname = "Vx"
dbx = data[cname][~np.isnan(data[cname])].values[::60]
line(dict(orig=dbx), title=f"{cname} per hour")

In [None]:
dbx = data["Vz"][~np.isnan(data["Vz"])].values
dbxe = increm(dbx)
dbxe[0] = 0
line(dict(int=dbxe, orig=dbx))

In [None]:
comp_name = "Vx"
mixt: DynamicMixture = DynamicMixture.load(
    f"/home/oplora/Documents/VMK/Dissertation/Magfield/Data/MyMixtures/{comp_name}_4320_5.pickle"
)

In [None]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

volat = mixt.process_volatility()

# Create figure with secondary y-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])


y1 = mixt.process_coefs["a"]
x1 = list(range(len(y1)))
name1 = "a(t)"

# Add traces
fig.add_trace(
    go.Scatter(x=x1, y=y1, name=name1),
    secondary_y=False,
)

y2 = volat["diffusional"]
x2 = list(range(len(y2)))
name2 = "diffusional component"

fig.add_trace(
    go.Scatter(x=x2, y=y2, name=name2),
    secondary_y=True,
)

# Add figure title
fig.update_layout(title_text="Volatility " + f"<b>{comp_name}</b>")

# Set x-axis title
fig.update_xaxes(title_text="xaxis title")

# Set y-axes titles
fig.update_yaxes(title_text="values of " + name1, secondary_y=False)
fig.update_yaxes(title_text="values of " + name2, secondary_y=True)

fig.show()

In [None]:
# Create figure with secondary y-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])


y1 = volat["diffusional"] + volat["dynamic"]
x1 = list(range(len(y1)))
name1 = "instant volatility"

# Add traces
fig.add_trace(
    go.Scatter(x=x1, y=y1, name=name1),
    secondary_y=False,
)

y2 = mixt.process_coefs["b"]
x2 = list(range(len(y2)))
name2 = "b(t) square"
fig.add_trace(
    go.Scatter(x=x2, y=y2, name=name2),
    secondary_y=True,
)

# Add figure title
fig.update_layout(
    title_text="Instant volatility and b^2(t) for " + f"<b>{comp_name}</b>"
)

# Set x-axis title
fig.update_xaxes(title_text="xaxis title")

# Set y-axes titles
fig.update_yaxes(title_text="values of " + name1, secondary_y=False)
fig.update_yaxes(title_text="values of " + name2, secondary_y=True)

fig.show()

In [None]:
# Extraction components names
comp_names = data.columns.values
comp_names = comp_names[~(comp_names == "ydhm_id")]

TIME = data["ydhm_id"].values
SERIES = dict()  # Container for components values on chosen time span
COLOR = dict(x="#1f77b4", y="#ff7f0e", z="#2ca02c")

for i, comp in enumerate(comp_names):
    time_series = data[comp].values.copy()
    SERIES[comp] = time_series  # Saving component values

SERIES.keys()

In [None]:
window_size = 60 * 24 * 3 - 20  # 4320 minutes = 3 days

current_series = SERIES["dBx"]
mixt = DynamicMixture(
    num_comps=3,
    distrib=tensorflow_probability.distributions.Normal,
    time_span=TIME,
    window_shape=(window_size, 120),
)

mixt.rewrite_as_normal_human_this_initialization(
    random_seed=42, avr=np.mean(current_series)
)
mixt.predict_light(data=current_series[~np.isnan(current_series)])

In [None]:
a, b = mixt.reconstruct_process_coef()
coefs = dict(a=a, b_square=b, b=np.sqrt(b))
coefs_date = data["ydhm_id"][~np.isnan(current_series)][
    window_size // 2 : -window_size // 2 : 10
]
df_coefs = pd.DataFrame(coefs)

## Comparison with Egor

In [None]:
import json

# Opening JSON file
with open(
    "/home/oplora/Documents/VMK/Dissertation/Magfield/EGOR_Bx_A.json"
) as json_file:
    egor_a = json.load(json_file)
egor_a = list(egor_a.values())
with open(
    "/home/oplora/Documents/VMK/Dissertation/Magfield/EGOR_Bx_B.json"
) as json_file:
    egor_b = json.load(json_file)
egor_b = list(egor_b.values())

In [None]:
compare = dict(egor=egor_a, ruslan=a[:-1])
line(compare).show()

In [None]:
mixt.show_parameters()

In [None]:
compare = dict(egor=egor_b, ruslan=np.sqrt(b[:-1]))
line(compare).show()

In [None]:
line(
    data_frame=df_coefs,
    x=data["ydhm_id"][::10][: len(a)],
    y=["a", "b", "b_square"],
    title="Process coefficients for mixture dBx: 3 components, 4300 window width and 10 min step",
).show()

In [None]:
from numpy.lib.stride_tricks import sliding_window_view

shift_corr = 6  # in ten's of minutes (check step in mixture)
window_corr_size = 6 * 24  # in ten's of minutes (check step in mixture)

a_t = sliding_window_view(coefs["a"], window_shape=window_corr_size)[::shift_corr]

b_t = sliding_window_view(coefs["b_square"], window_shape=window_corr_size)[
    ::shift_corr
]
dynamic_corr = {"correlation": []}
for a, b in zip(a_t, b_t):
    dynamic_corr["correlation"].append(np.corrcoef(a, b)[0, 1])

line(
    x=coefs_date[window_corr_size // 2 : -window_corr_size // 2 : shift_corr],
    y=dynamic_corr["correlation"],
    title="Correlation between a(t) and b^2(t) with window size 1 day (1440 min) and step 1 hour (60 min).",
)

In [None]:
shift_corr = 6  # in ten's of minutes (check step in mixture)
window_corr_size = 6 * 24 * 7  # in ten's of minutes (check step in mixture)

a_t = sliding_window_view(coefs["a"], window_shape=window_corr_size)[::shift_corr]

b_t = sliding_window_view(coefs["b_square"], window_shape=window_corr_size)[
    ::shift_corr
]
dynamic_corr = {"correlation": []}
for a, b in zip(a_t, b_t):
    dynamic_corr["correlation"].append(np.corrcoef(a, b)[0, 1])

line(
    x=coefs_date[window_corr_size // 2 : -window_corr_size // 2 : shift_corr],
    y=dynamic_corr["correlation"],
    title="Correlation between a(t) and b^2(t) with window size 1 week (10080 min) and step 1 hour (60 min).",
)

In [None]:
shift_corr = 6  # in ten's of minutes (check step in mixture)
window_corr_size = 6 * 24 * 7 * 4  # in ten's of minutes (check step in mixture)

a_t = sliding_window_view(coefs["a"], window_shape=window_corr_size)[::shift_corr]

b_t = sliding_window_view(coefs["b_square"], window_shape=window_corr_size)[
    ::shift_corr
]
dynamic_corr = {"correlation": []}
for a, b in zip(a_t, b_t):
    dynamic_corr["correlation"].append(np.corrcoef(a, b)[0, 1])

line(
    x=coefs_date[window_corr_size // 2 : -window_corr_size // 2 : shift_corr],
    y=dynamic_corr["correlation"],
    title=f"Correlation between a(t) and b^2(t) with window size 1 month ({window_corr_size*10} min) and step 1 hour ({shift_corr*10} min).",
)

In [None]:
shift_corr = 6  # in ten's of minutes (check step in mixture)
window_corr_size = 6 * 24 * 3  # in ten's of minutes (check step in mixture)

a_t = sliding_window_view(coefs["a"], window_shape=window_corr_size)[::shift_corr]

b_t = sliding_window_view(coefs["b_square"], window_shape=window_corr_size)[
    ::shift_corr
]
dynamic_corr = {"correlation": []}
for a, b in zip(a_t, b_t):
    dynamic_corr["correlation"].append(np.corrcoef(a, b)[0, 1])

line(
    x=coefs_date[window_corr_size // 2 : -window_corr_size // 2 : shift_corr],
    y=dynamic_corr["correlation"],
    title=f"Correlation between a(t) and b^2(t) with window size 3 days ({window_corr_size*10} min) and step 1 hour ({shift_corr*10} min).",
)

In [None]:
shift_corr = 6 * 12  # in ten's of minutes (check step in mixture)
window_corr_size = 6 * 24 * 3  # in ten's of minutes (check step in mixture)

a_t = sliding_window_view(coefs["a"], window_shape=window_corr_size)[::shift_corr]

b_t = sliding_window_view(coefs["b_square"], window_shape=window_corr_size)[
    ::shift_corr
]
dynamic_corr = {"correlation": []}
for a, b in zip(a_t, b_t):
    dynamic_corr["correlation"].append(np.corrcoef(a, b)[0, 1])

line(
    x=coefs_date[window_corr_size // 2 : -window_corr_size // 2 : shift_corr],
    y=dynamic_corr["correlation"],
    title=f"Correlation between a(t) and b^2(t) with window size 3 days ({window_corr_size*10} min) and step 12 hour ({shift_corr*10} min).",
)

In [None]:
def smooth(data, wind_size=20):
    from numpy.lib.stride_tricks import sliding_window_view
    from numpy import mean

    windows = sliding_window_view(data, wind_size)
    smoothed = []
    for wind in windows:
        smoothed.append(mean(wind))
    return smoothed

In [None]:
shift_corr = 6  # in ten's of minutes (check step in mixture)
window_corr_size = 6 * 24 * 3  # in ten's of minutes (check step in mixture)

a_t = sliding_window_view(coefs["a"], window_shape=window_corr_size)[::shift_corr]

b_t = sliding_window_view(coefs["b_square"], window_shape=window_corr_size)[
    ::shift_corr
]
dynamic_corr = {"correlation": []}
for a, b in zip(a_t, b_t):
    dynamic_corr["correlation"].append(np.corrcoef(a, b)[0, 1])

x_date = coefs_date[
    (window_corr_size + 24 * shift_corr) // 2 : -(window_corr_size + 24 * shift_corr)
    // 2
    + 7 : shift_corr
]
line(
    x=x_date,
    y=smooth(dynamic_corr["correlation"], 24),
    title=f"Correlation between a(t) and b^2(t) with window size 3 days ({window_corr_size*10} min) and step 1 hour ({shift_corr*10} min). Smoothed by 1 day (24 counts)",
)

In [None]:
window_size = 60 * 24 * 3  # 4320 minutes = 3 days

current_series = SERIES["dBy"]
mixt = DynamicMixture(
    num_comps=3,
    distrib=tensorflow_probability.distributions.Normal,
    time_span=TIME,
    window_shape=(window_size, 10),
)

mixt.rewrite_as_normal_human_this_initialization(
    random_seed=42, avr=np.mean(current_series)
)
mixt.predict_light(data=current_series[~np.isnan(current_series)])

In [None]:
a, b = mixt.reconstruct_process_coef()
coefs = dict(a=a, b_square=b, b=np.sqrt(b))
coefs_date = data["ydhm_id"][~np.isnan(current_series)][
    window_size // 2 : -window_size // 2 : 10
]
df_coefs = pd.DataFrame(coefs)

In [None]:
df_coefs.corr(method="pearson")

In [None]:
line(
    data_frame=df_coefs,
    x=data["ydhm_id"][::10][: len(a)],
    y=["a", "b", "b_square"],
    title="Process coefficients for mixture <b>dBy</b>: 3 components, 4300 window width and 10 min step",
).show()

In [None]:
from numpy.lib.stride_tricks import sliding_window_view

shift_corr = 6  # in ten's of minutes (check step in mixture)
window_corr_size = 6 * 24  # in ten's of minutes (check step in mixture)

a_t = sliding_window_view(coefs["a"], window_shape=window_corr_size)[::shift_corr]

b_t = sliding_window_view(coefs["b_square"], window_shape=window_corr_size)[
    ::shift_corr
]
dynamic_corr = {"correlation": []}
for a, b in zip(a_t, b_t):
    dynamic_corr["correlation"].append(np.corrcoef(a, b)[0, 1])

line(
    x=coefs_date[window_corr_size // 2 : -window_corr_size // 2 : shift_corr],
    y=dynamic_corr["correlation"],
    title="Correlation between a(t) and b^2(t) with window size 1 day (1440 min) and step 1 hour (60 min).",
)

In [None]:
shift_corr = 6  # in ten's of minutes (check step in mixture)
window_corr_size = 6 * 24 * 7  # in ten's of minutes (check step in mixture)

a_t = sliding_window_view(coefs["a"], window_shape=window_corr_size)[::shift_corr]

b_t = sliding_window_view(coefs["b_square"], window_shape=window_corr_size)[
    ::shift_corr
]
dynamic_corr = {"correlation": []}
for a, b in zip(a_t, b_t):
    dynamic_corr["correlation"].append(np.corrcoef(a, b)[0, 1])

line(
    x=coefs_date[window_corr_size // 2 : -window_corr_size // 2 : shift_corr],
    y=dynamic_corr["correlation"],
    title="Correlation between a(t) and b^2(t) with window size 1 week (10080 min) and step 1 hour (60 min).",
)

In [None]:
shift_corr = 6  # in ten's of minutes (check step in mixture)
window_corr_size = 6 * 24 * 7 * 4  # in ten's of minutes (check step in mixture)

a_t = sliding_window_view(coefs["a"], window_shape=window_corr_size)[::shift_corr]

b_t = sliding_window_view(coefs["b_square"], window_shape=window_corr_size)[
    ::shift_corr
]
dynamic_corr = {"correlation": []}
for a, b in zip(a_t, b_t):
    dynamic_corr["correlation"].append(np.corrcoef(a, b)[0, 1])

line(
    x=coefs_date[window_corr_size // 2 : -window_corr_size // 2 : shift_corr],
    y=dynamic_corr["correlation"],
    title=f"Correlation between a(t) and b^2(t) with window size 1 month ({window_corr_size*10} min) and step 1 hour ({shift_corr*10} min).",
)

In [None]:
shift_corr = 6  # in ten's of minutes (check step in mixture)
window_corr_size = 6 * 24 * 3  # in ten's of minutes (check step in mixture)

a_t = sliding_window_view(coefs["a"], window_shape=window_corr_size)[::shift_corr]

b_t = sliding_window_view(coefs["b_square"], window_shape=window_corr_size)[
    ::shift_corr
]
dynamic_corr = {"correlation": []}
for a, b in zip(a_t, b_t):
    dynamic_corr["correlation"].append(np.corrcoef(a, b)[0, 1])

line(
    x=coefs_date[window_corr_size // 2 : -window_corr_size // 2 : shift_corr],
    y=dynamic_corr["correlation"],
    title=f"Correlation between a(t) and b^2(t) with window size 3 days ({window_corr_size*10} min) and step 1 hour ({shift_corr*10} min).",
)

In [None]:
shift_corr = 6  # in ten's of minutes (check step in mixture)
window_corr_size = 6 * 24 * 3  # in ten's of minutes (check step in mixture)

a_t = sliding_window_view(coefs["a"], window_shape=window_corr_size)[::shift_corr]

b_t = sliding_window_view(coefs["b_square"], window_shape=window_corr_size)[
    ::shift_corr
]
dynamic_corr = {"correlation": []}
for a, b in zip(a_t, b_t):
    dynamic_corr["correlation"].append(np.corrcoef(a, b)[0, 1])

x_date = coefs_date[
    (window_corr_size + 24 * shift_corr) // 2 : -(window_corr_size + 24 * shift_corr)
    // 2
    + 7 : shift_corr
]
line(
    x=x_date,
    y=smooth(dynamic_corr["correlation"], 24),
    title=f"Correlation between a(t) and b^2(t) with window size 3 days ({window_corr_size*10} min) and step 1 hour ({shift_corr*10} min). Smoothed by 1 day (24 counts)",
)

In [None]:
window_size = 60 * 24 * 3  # 4320 minutes = 3 days

current_series = SERIES["dBz"]
mixt = DynamicMixture(
    num_comps=3,
    distrib=tensorflow_probability.distributions.Normal,
    time_span=TIME,
    window_shape=(window_size, 10),
)

mixt.rewrite_as_normal_human_this_initialization(
    random_seed=42, avr=np.mean(current_series)
)
mixt.predict_light(data=current_series[~np.isnan(current_series)])

In [None]:
a, b = mixt.reconstruct_process_coef()
coefs = dict(a=a, b_square=b, b=np.sqrt(b))
coefs_date = data["ydhm_id"][~np.isnan(current_series)][
    window_size // 2 : -window_size // 2 : 10
]
df_coefs = pd.DataFrame(coefs)

In [None]:
df_coefs.corr(method="pearson")

In [None]:
line(
    data_frame=df_coefs,
    x=data["ydhm_id"][::10][: len(a)],
    y=["a", "b", "b_square"],
    title="Process coefficients for mixture <b>dBy</b>: 3 components, 4300 window width and 10 min step",
).show()

In [None]:
from numpy.lib.stride_tricks import sliding_window_view

shift_corr = 6  # in ten's of minutes (check step in mixture)
window_corr_size = 6 * 24  # in ten's of minutes (check step in mixture)

a_t = sliding_window_view(coefs["a"], window_shape=window_corr_size)[::shift_corr]

b_t = sliding_window_view(coefs["b_square"], window_shape=window_corr_size)[
    ::shift_corr
]
dynamic_corr = {"correlation": []}
for a, b in zip(a_t, b_t):
    dynamic_corr["correlation"].append(np.corrcoef(a, b)[0, 1])

line(
    x=coefs_date[window_corr_size // 2 : -window_corr_size // 2 : shift_corr],
    y=dynamic_corr["correlation"],
    title="Correlation between a(t) and b^2(t) with window size 1 day (1440 min) and step 1 hour (60 min).",
)

In [None]:
shift_corr = 6  # in ten's of minutes (check step in mixture)
window_corr_size = 6 * 24 * 7  # in ten's of minutes (check step in mixture)

a_t = sliding_window_view(coefs["a"], window_shape=window_corr_size)[::shift_corr]

b_t = sliding_window_view(coefs["b_square"], window_shape=window_corr_size)[
    ::shift_corr
]
dynamic_corr = {"correlation": []}
for a, b in zip(a_t, b_t):
    dynamic_corr["correlation"].append(np.corrcoef(a, b)[0, 1])

line(
    x=coefs_date[window_corr_size // 2 : -window_corr_size // 2 : shift_corr],
    y=dynamic_corr["correlation"],
    title="Correlation between a(t) and b^2(t) with window size 1 week (10080 min) and step 1 hour (60 min).",
)

In [None]:
shift_corr = 6  # in ten's of minutes (check step in mixture)
window_corr_size = 6 * 24 * 7 * 4  # in ten's of minutes (check step in mixture)

a_t = sliding_window_view(coefs["a"], window_shape=window_corr_size)[::shift_corr]

b_t = sliding_window_view(coefs["b_square"], window_shape=window_corr_size)[
    ::shift_corr
]
dynamic_corr = {"correlation": []}
for a, b in zip(a_t, b_t):
    dynamic_corr["correlation"].append(np.corrcoef(a, b)[0, 1])

line(
    x=coefs_date[window_corr_size // 2 : -window_corr_size // 2 : shift_corr],
    y=dynamic_corr["correlation"],
    title=f"Correlation between a(t) and b^2(t) with window size 1 month ({window_corr_size*10} min) and step 1 hour ({shift_corr*10} min).",
)

In [None]:
shift_corr = 6  # in ten's of minutes (check step in mixture)
window_corr_size = 6 * 24 * 3  # in ten's of minutes (check step in mixture)

a_t = sliding_window_view(coefs["a"], window_shape=window_corr_size)[::shift_corr]

b_t = sliding_window_view(coefs["b_square"], window_shape=window_corr_size)[
    ::shift_corr
]
dynamic_corr = {"correlation": []}
for a, b in zip(a_t, b_t):
    dynamic_corr["correlation"].append(np.corrcoef(a, b)[0, 1])

line(
    x=coefs_date[window_corr_size // 2 : -window_corr_size // 2 : shift_corr],
    y=dynamic_corr["correlation"],
    title=f"Correlation between a(t) and b^2(t) with window size 3 days ({window_corr_size*10} min) and step 1 hour ({shift_corr*10} min).",
)

In [None]:
shift_corr = 6  # in ten's of minutes (check step in mixture)
window_corr_size = 6 * 24 * 3  # in ten's of minutes (check step in mixture)

a_t = sliding_window_view(coefs["a"], window_shape=window_corr_size)[::shift_corr]

b_t = sliding_window_view(coefs["b_square"], window_shape=window_corr_size)[
    ::shift_corr
]
dynamic_corr = {"correlation": []}
for a, b in zip(a_t, b_t):
    dynamic_corr["correlation"].append(np.corrcoef(a, b)[0, 1])

x_date = coefs_date[
    (window_corr_size + 24 * shift_corr) // 2 : -(window_corr_size + 24 * shift_corr)
    // 2
    + 7 : shift_corr
]
line(
    x=x_date,
    y=smooth(dynamic_corr["correlation"], 24),
    title=f"Correlation between a(t) and b^2(t) with window size 3 days ({window_corr_size*10} min) and step 1 hour ({shift_corr*10} min). Smoothed by 1 day (24 counts)",
)