<a href="https://colab.research.google.com/github/sajtarius/practice/blob/main/adjustment_causality.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install KDEpy

Collecting KDEpy
  Downloading KDEpy-1.1.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (552 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m552.7/552.7 kB[0m [31m8.3 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: KDEpy
Successfully installed KDEpy-1.1.5


In [None]:
import numpy as np
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import pickle
from plotly.subplots import make_subplots
from matplotlib.animation import FuncAnimation
from tqdm import tqdm
from scipy.fft import fft, ifft, fftfreq, fftshift
from scipy import signal
from scipy import stats
from scipy.integrate import trapezoid, cumtrapz, solve_ivp
from KDEpy import FFTKDE
#%matplotlib notebook

# Functions

In [None]:
# @title information rate
###[information rate square] for 1D [pdf]###
def info_rate(pdf, axis, time):
    rate2 = []
    dift = time[1] - time[0] #[delta time] for integration usage.

    for i in range(0, len(time)-1, 1):
        difx = axis[i][0][1] - axis[i][0][0] #[delta x] for integration usage.
        sqrt1 = np.sqrt(pdf[i])
        sqrt2 = np.sqrt(pdf[i+1])
        temp = (sqrt2 - sqrt1)**2
        rate2.append(4*np.sum(temp * ((difx)/(dift**2)))) #[information rate square] calculation.

    rate2 = np.array(rate2)
    newt = np.delete(time, 0)
    #newt = time

    return rate2, newt

###[information rate square] for 2D [pdf]###
def info_rate2D(pdf, axis, time):
    rate2 = []
    dift = time[1] - time[0] #[delta time] for integration usage.

    for i in range(0, len(time)-1, 1):
        difx2 = axis[i][1][1] - axis[i][1][0] #[delta x2] for integration usage.
        difx1 = axis[i][0][1] - axis[i][0][0] #[delta x1] for integration usage.
        sqrt1 = np.sqrt(pdf[i][:][:])
        sqrt2 = np.sqrt(pdf[i+1][:][:])
        temp = (sqrt2 - sqrt1)**2
        rate2.append(4*np.sum(temp * difx2 * difx1 / dift**2)) #[information rate square] calculation.

    rate2 = np.array(rate2)
    newt = np.delete(time, 0)
    #newt = time

    return rate2, newt

In [None]:
# @title Information rate causality
###[information rate causality] by evaluating at [window of signals]; [one signal] will be fixed and the evolution is calculated for [another signal].
def info_rate_causal(signal1, signal2, time, window, sim):
    win = window
    sld = int(win/2)
    size = int(np.ceil(2*(sim*sld)**(1/3)))
    size = int(np.sqrt(size))

    datatime = []
    rate1to2 = []
    rate2to1 = []
    rate1 = []
    rate2 = []

    for t in tqdm(range(0, len(time)-win+1, sld)):
        signal1new = signal1[:,t:t+win]
        signal2new = signal2[:,t:t+win]
        timenew = time[t:t+win]

        pdf1sld = [] #the change of [pdf] for [signal 1] given [signal 2].
        axs1sld = [] #the axes of the [pdf] for the [pdf1sld]

        pdf2sld = [] #the change of [pdf] for [signal 2] given [signal 1].
        axs2sld = [] #the axes of the [pdf] for the [pdf2sld]

        pdf1 = []
        axs1 = []

        pdf2 = []
        axs2 = []

        newtime = []

        for i in range(sld): #edited here!!!
            H1sld, edges1sld = np.histogramdd(np.array([signal1new[:,i:i+sld].reshape(sld*sim), signal2new[:,0:sld].reshape(sld*sim)]).T, density=True, bins=size, range=[[np.min(signal1), np.max(signal1)], [np.min(signal2), np.max(signal2)]])
            H2sld, edges2sld = np.histogramdd(np.array([signal1new[:,0:sld].reshape(sld*sim), signal2new[:,i:i+sld].reshape(sld*sim)]).T, density=True, bins=size, range=[[np.min(signal1), np.max(signal1)], [np.min(signal2), np.max(signal2)]])
            H1, edges1 = np.histogramdd(signal1new[:,i:i+sld].reshape(sld*sim), density=True, bins=size**2, range=[[np.min(signal1), np.max(signal1)]])
            H2, edges2 = np.histogramdd(signal2new[:,i:i+sld].reshape(sld*sim), density=True, bins=size**2, range=[[np.min(signal2), np.max(signal2)]])

            pdf1sld.append(H1sld)
            axs1sld.append(edges1sld)

            pdf2sld.append(H2sld)
            axs2sld.append(edges2sld)

            pdf1.append(H1)
            axs1.append(edges1)

            pdf2.append(H2)
            axs2.append(edges2)

            newtime.append(timenew[i])

        #temp_mean1sld, temp_std1sld = mean_std_evol(pdf1sld, axs1sld, newtime)
        #temp_mean2sld, temp_std2sld = mean_std_evol(pdf2sld, axs2sld, newtime)
        #temp_mean1, temp_std1 = mean_std_evol(pdf1, axs1, newtime)
        #temp_mean2, temp_std2 = mean_std_evol(pdf2, axs2, newtime)

        temp_rate1sld, temp_time1sld = info_rate2D(pdf1sld, axs1sld, newtime)
        temp_rate2sld, temp_time2sld = info_rate2D(pdf2sld, axs2sld, newtime)
        temp_rate1, temp_time1 = info_rate(pdf1, axs1, newtime)
        temp_rate2, temp_time2 = info_rate(pdf2, axs2, newtime)

        rate2to1.append(temp_rate1sld)
        rate1to2.append(temp_rate2sld)
        rate1.append(temp_rate1)
        rate2.append(temp_rate2)

        datatime.append(temp_time1sld)

    data = {'rate_1to2': rate1to2, 'rate_2to1': rate2to1, 'rate_1': rate1, 'rate_2': rate2}

    return data, datatime

# Simulation

## Description
The equation used here is autoregressive equations as follows:

$x_1(t) = 0.55 x_1(t-1) + \epsilon_1(t)$,

$x_2(t) = H(\tau - 5) x_1(t-2) + \epsilon_2(t)$.

Such that $t$ is the time-index (or time step) and $\tau$ is the physical time. Hence, these equation has the causal influence of $x_1$ to $x_2$ when $\tau \geq 5$. And it should be expected that there is no causal influence at $\tau < 5$.

For simple example, the equations are simulated for 10,000 trial (you may adjust it at "sim").

For the calculation, the PDF is estimated via the np.histogramdd() with a fixed bin size of 10. The (*) PDF is estimated as $p(x_1(t+1), x_2(t))$ (for $\Gamma^*_1$) and vise verse. Refer to the code below at lines 29 and 30 of [PDF estimation] section.

In [None]:
###simulation of equations###
time = 10 #time of the series
fs = 200 #sampling frequency
tdata = np.arange(0, time, 1/fs) #array of time data
tdatapoint = len(tdata) #length of the time data
sim = 10000 #simulation number
c = np.heaviside(list(tdata - 5), 1) #heaviside step function to allow the coupling
#c1 = np.heaviside(list(10 - tdata), 1)
#plc = c[np.where(c!=0)].shape[0] #location where the [c] is NOT ZERO.
#c[np.where(c!=0)] = np.linspace(np.min(c), np.max(c), plc)**(1) #replace the heaviside function to a certain function instead of just one step value

nmean = np.array([0.0, 0.0])
ncov = np.array([[1.0, 0.0],
                 [0.0, 1.0]])
x1noise, x2noise = np.random.multivariate_normal(nmean, ncov, size=(tdatapoint, sim)).T

x1 = np.zeros((sim, tdatapoint))
x2 = np.zeros((sim, tdatapoint))

for t in tqdm(range(0, tdatapoint, 1)):
    #x1[:, t] = 0.55*x1[:, t-1]*np.exp((1 - np.abs(x1[:, t-1]))) + x1noise[:,t]

    x1[:, t] = 0.55*x1[:, t-1] + x1noise[:,t]
    x2[:, t] = c[t]*x1[:, t-2] + x2noise[:,t]

100%|██████████| 2000/2000 [00:00<00:00, 2714.83it/s]


In [None]:
fig = make_subplots(specs=[[{"secondary_y": True}]])
fig.add_trace(go.Scatter(
    x=tdata, y=c, name='step function',
), secondary_y=True, )
fig.add_trace(go.Scatter(
    x=tdata, y=x1[0,:], name='x1', mode='markers+lines',
), secondary_y=False, )
fig.add_trace(go.Scatter(
    x=tdata, y=x2[0,:], name='x2', mode='markers+lines',
), secondary_y=False, )
fig.show()

In [None]:
# @title PDF estimation
temp_x1 = x1
temp_x2 = x2
temp_time = tdata

win = 2 #sampling window for one trajectory.
sld = 1 #sliding of the window for one trajectory.
size = 10 #bin size for estimating the PDF.

pdf1s = []
axs1s = []

pdf2s = []
axs2s = []

pdf1 = []
axs1 = []

pdf2 = []
axs2 = []

newt = []

for i in tqdm(range(0, len(temp_time)-win, sld)):
    temp_x1f = temp_x1[:, i+sld:i+sld+win] #temp_x1 forward
    temp_x1b = temp_x1[:, i:i+win] #temp_x1 backward

    temp_x2f = temp_x2[:, i+sld:i+sld+win] #temp_x2 forward
    temp_x2b = temp_x2[:, i:i+win] #temp_x2 backward
    H1s, edges1s = np.histogramdd(np.array([temp_x1f.reshape(win*sim), temp_x2b.reshape(win*sim)]).T, density=True, bins=size, range=[[np.min(temp_x1), np.max(temp_x1)], [np.min(temp_x2), np.max(temp_x2)]])
    H2s, edges2s = np.histogramdd(np.array([temp_x1b.reshape(win*sim), temp_x2f.reshape(win*sim)]).T, density=True, bins=size, range=[[np.min(temp_x1), np.max(temp_x1)], [np.min(temp_x2), np.max(temp_x2)]])

    H12b, edges12b = np.histogramdd(np.array([temp_x1b.reshape(win*sim), temp_x2b.reshape(win*sim)]).T, density=True, bins=size, range=[[np.min(temp_x1), np.max(temp_x1)], [np.min(temp_x2), np.max(temp_x2)]])

    H1, edges1 = np.histogramdd(temp_x1b.reshape(win*sim), density=True, bins=size, range=[[np.min(temp_x1), np.max(temp_x1)]])
    H2, edges2 = np.histogramdd(temp_x2b.reshape(win*sim), density=True, bins=size, range=[[np.min(temp_x2), np.max(temp_x2)]])

    pdf1s.append(H1s)
    axs1s.append(edges1s)

    pdf2s.append(H2s)
    axs2s.append(edges2s)

    pdf1.append(H1)
    axs1.append(edges1)

    pdf2.append(H2)
    axs2.append(edges2)

    newt.append(temp_time[i])

pdf1s = np.array(pdf1s)
axs1s = np.array(axs1s)

pdf2s = np.array(pdf2s)
axs2s = np.array(axs2s)

pdf1 = np.array(pdf1)
axs1 = np.array(axs1)

pdf2 = np.array(pdf2)
axs2 = np.array(axs2)

newt = np.array(newt)

100%|██████████| 1998/1998 [06:33<00:00,  5.07it/s]


In [None]:
rate1s, time1s = info_rate2D(pdf1s, axs1s, newt)
rate2s, time2s = info_rate2D(pdf2s, axs2s, newt)

rate1, time1 = info_rate(pdf1, axs1, newt)
rate2, time2 = info_rate(pdf2, axs2, newt)

## Plotting of the result
The y-axis on the right-hand size is the for $\Gamma^{2*} - \Gamma^2$ while the left-hand size is for $\Gamma^{2*}$ and $\Gamma^2$. From the plot the results still show the negative of the causal information rate is still appear.

In [None]:
fig = make_subplots(specs=[[{"secondary_y": True}]])
fig.add_trace(go.Scatter(
    x=time1s, y=rate1s, name='rate1*',
), secondary_y=False, )
fig.add_trace(go.Scatter(
    x=time1, y=rate1, name='rate1',
), secondary_y=False, )
fig.add_trace(go.Scatter(
    x=time1, y=rate1s-rate1, name='rate1* - rate1',
), secondary_y=True)
fig.show()

In [None]:
fig = make_subplots(specs=[[{"secondary_y": True}]])
fig.add_trace(go.Scatter(
    x=time2s, y=rate2s, name='rate2*',
), secondary_y=False, )
fig.add_trace(go.Scatter(
    x=time2, y=rate2, name='rate2',
), secondary_y=False, )
fig.add_trace(go.Scatter(
    x=time2, y=rate2s-rate2, name='rate2* - rate2',
), secondary_y=True)
fig.show()

NameError: ignored

In [None]:
# @title information rate causality calculation
temp_x1 = x1
temp_x2 = x2
temp_time = tdata

win = 100

temp_dataratecausal, temp_datatimecausal = info_rate_causal(temp_x1, temp_x2, temp_time, win, sim)

100%|██████████| 39/39 [00:47<00:00,  1.23s/it]


In [None]:
fig = go.Figure()
for i in range(len(temp_datatimecausal)):
    fig.add_trace(go.Scatter(
        x=temp_datatimecausal[i], y=temp_dataratecausal['rate_1to2'][i], name='1 to 2',
    ))
    fig.add_trace(go.Scatter(
        x=temp_datatimecausal[i], y=temp_dataratecausal['rate_2to1'][i], name='2 o 1',
    ))
fig.show()

In [None]:
temp_x1 = x1
temp_x2 = x2
temp_time = tdata

sld = 2
win = 1
size = 10

i=0

temp_x1f = temp_x1[:, i+sld:i+sld+win] #temp_x1 forward
temp_x1b = temp_x1[:, i:i+win] #temp_x1 backward

temp_x2f = temp_x2[:, i+sld:i+sld+win] #temp_x2 forward
temp_x2b = temp_x2[:, i:i+win] #temp_x2 backward
H1s, edges1s = np.histogramdd(np.array([temp_x1f.reshape(win*sim), temp_x2b.reshape(win*sim)]).T, density=True, bins=size, range=[[np.min(temp_x1), np.max(temp_x1)], [np.min(temp_x2), np.max(temp_x2)]])
H2s, edges2s = np.histogramdd(np.array([temp_x1b.reshape(win*sim), temp_x2f.reshape(win*sim)]).T, density=True, bins=size, range=[[np.min(temp_x1), np.max(temp_x1)], [np.min(temp_x2), np.max(temp_x2)]])

H12b, edges12b = np.histogramdd(np.array([temp_x1b.reshape(win*sim), temp_x2b.reshape(win*sim)]).T, density=True, bins=size, range=[[np.min(temp_x1), np.max(temp_x1)], [np.min(temp_x2), np.max(temp_x2)]])

H1, edges1 = np.histogramdd(temp_x1b.reshape(win*sim), density=True, bins=size, range=[[np.min(temp_x1), np.max(temp_x1)]])
H2, edges2 = np.histogramdd(temp_x2b.reshape(win*sim), density=True, bins=size, range=[[np.min(temp_x2), np.max(temp_x2)]])

In [None]:
edges1

[array([-5.88042708, -4.68188225, -3.48333741, -2.28479258, -1.08624774,
         0.11229709,  1.31084193,  2.50938677,  3.7079316 ,  4.90647644,
         6.10502127])]

In [None]:
fig = go.Figure(data=go.Heatmap(
    x=edges1s[0],
    y=edges1s[1],
    z=H1s,
))
fig.show()

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(
    x=edges1[0], y=H1,
))
fig.show()