In [1]:
import sys
sys.path.append('../')
from typing import List
from copy import deepcopy
import numpy as np
import pandas as pd
from math import sqrt
from sklearn.preprocessing import scale, normalize, MinMaxScaler
from scipy.spatial.distance import pdist, squareform
from scipy.io import arff

from inputlds import *

import matplotlib.pyplot as plt

## Data Generalization

In [2]:
def data_generation(g,f_dash,proc_noise_std,obs_noise_std,inputs,T):
    """
    parametrs for dynamical_syste(A,B,C,D, **kwargs)
    input: A,B,C,D, **kwargs
    phi_(t+1) = A*phi_t + B*X_t + w_(t+1)
        Y_t = C*phi_t + D*X_t + v_t
    A--> g:[n,n] 
    B--> B:[n,d] matrix for inputs, d is the dimension of inputs
    C--> f_dash:[m,n] 
    D--> D:[m,d] matrix for inputs, d is the dimension of inputs
    n is the dimension of hidden states (phi: [T,n]);
    m is the dimension of observations (Y: [T,m]);
    d is the dimension of inputs (X: [T,d]).
    
    """
    n=len(g)
    m=len(f_dash)
    if inputs == 0: # no inputs
        inputs = np.zeros((m,T))
    dim = len(inputs) # dimension of inputs
    ds1 = dynamical_system(g,np.zeros((n,dim)),f_dash,np.zeros((m,dim)),
            process_noise='gaussian',
            observation_noise='gaussian', 
            process_noise_std=proc_noise_std, 
            observation_noise_std=obs_noise_std)

    h0=np.ones(ds1.d) # initial state
    ds1.solve(h0=h0, inputs=inputs, T=T)
    return np.asarray(ds1.outputs).reshape(T,m) #.tolist()


### N=2, M=2

In [3]:
# n=2 m=2
# Set parameters
start=0.02
stop=0.1
step=0.02
T=20

# Collect the nrmse value for each experiment
pro_rang = np.arange(start,stop,step)
obs_rang = np.arange(start,stop,step)
print(pro_rang, obs_rang)
proL=len(pro_rang)
obsL=len(obs_rang)

g_1 = 0.8*np.matrix([[0.9,0.2],[0.1,0.1]])
f_dash_1 = 0.8*np.matrix([[1.0,1.0],[0.2,0.2]])

g_2 = 0.8*np.matrix([[0.8,0.2],[0.2,0.1]])
f_dash_2 = 0.8*np.matrix([[0.8,1.0],[0.1,0.2]])

cluster_1 = []
cluster_2 = []

for i in range(proL):
    for j in range(obsL):
        proc_noise_std=pro_rang[i]
        obs_noise_std=obs_rang[j]
        # Generate data
        # inputs = np.zeros((2,T))
        inputs = 0
        for k in range(10):
            data_1 = data_generation(g_1,f_dash_1,proc_noise_std,obs_noise_std,inputs,T)
            cluster_1.append(data_1)
            
            data_2 = data_generation(g_2,f_dash_2,proc_noise_std,obs_noise_std,inputs,T)
            cluster_2.append(data_2)

[0.02 0.04 0.06 0.08] [0.02 0.04 0.06 0.08]


In [4]:
Y = np.concatenate((np.array(cluster_1),np.array(cluster_2)),axis=0)
Y_label = np.concatenate((np.zeros(len(cluster_1)),np.ones(len(cluster_2))),axis=0)
print(Y.shape, Y_label.shape)
data = Y.reshape(320,-1)
with open('2_2_test.npy', 'wb') as f:
    np.save(f, data)

(320, 20, 2) (320,)


### N=3,M=2

In [132]:
# n=6 m=2
# Set parameters
start=0.02
stop=0.1
step=0.02
T=20

# Collect the nrmse value for each experiment
pro_rang = np.arange(start,stop,step)
obs_rang = np.arange(start,stop,step)
print(pro_rang, obs_rang)
proL=len(pro_rang)
obsL=len(obs_rang)

g_1 = 0.6*np.matrix([[1.0,0.8,0.8],[0.6,0.1,0.2],[0.3,0.2,0.2]]) #6,6
f_dash_1 = 0.6*np.matrix([[0.7,0.4,0.3],[0.2,0.6,0.2]]) #2,6

g_2 = 0.6*np.matrix([[1.0,1.0,0.6],[0.7,0.2,0.2],[0.2,0.1,0.1]])
f_dash_2 = 0.6*np.matrix([[0.5,0.4,0.1],[0.2,0.5,0.1]])

cluster_1 = []
cluster_2 = []

for i in range(proL):
    for j in range(obsL):
        proc_noise_std=pro_rang[i]
        obs_noise_std=obs_rang[j]
        # Generate data
        # inputs = np.zeros((2,T))
        inputs = 0
        for k in range(10):
            data_1 = data_generation(g_1,f_dash_1,proc_noise_std,obs_noise_std,inputs,T)
            cluster_1.append(data_1)
            
            data_2 = data_generation(g_2,f_dash_2,proc_noise_std,obs_noise_std,inputs,T)
            cluster_2.append(data_2)

[0.02 0.04 0.06 0.08] [0.02 0.04 0.06 0.08]


In [134]:
Y = np.concatenate((np.array(cluster_1),np.array(cluster_2)),axis=0)
Y_label = np.concatenate((np.zeros(len(cluster_1)),np.ones(len(cluster_2))),axis=0)
print(Y.shape, Y_label.shape)
data = Y.reshape(320,-1)
with open('3_2_test.npy', 'wb') as f:
    np.save(f, data)

(320, 20, 2) (320,)


### N=4, M=2

In [177]:
# n=4 m=2
# Set parameters
start=0.02
stop=0.1
step=0.02
T=20

# Collect the nrmse value for each experiment
pro_rang = np.arange(start,stop,step)
obs_rang = np.arange(start,stop,step)
print(pro_rang, obs_rang)
proL=len(pro_rang)
obsL=len(obs_rang)

g_1 = np.matrix([[0.9,0.8,0.5,0.2],[0.9,0.1,0.3,0.4],[0.8,0.2,0.1,0.1],[0.1,0.1,0.1,0.7]])*0.4 #4,4
f_dash_1 = np.matrix([[0.2,0.5,0.1,0.1],[0.8,0.6,0.1,0.1]])*0.4 #2,4

g_2 = np.matrix([[1.0,0.8,0.5,0.3],[0.6,0.2,0.3,0.4],[0.8,0.2,0.3,0.1],[0.2,0.2,0.3,0.7]])*0.4
f_dash_2 = np.matrix([[0.2,0.4,0.1,0.1],[0.6,0.2,0.2,0.2]])*0.4

cluster_1 = []
cluster_2 = []

for i in range(proL):
    for j in range(obsL):
        proc_noise_std=pro_rang[i]
        obs_noise_std=obs_rang[j]
        # Generate data
        # inputs = np.zeros((2,T))
        inputs = 0
        for k in range(10):
            data_1 = data_generation(g_1,f_dash_1,proc_noise_std,obs_noise_std,inputs,T)
            cluster_1.append(data_1)
            
            data_2 = data_generation(g_2,f_dash_2,proc_noise_std,obs_noise_std,inputs,T)
            cluster_2.append(data_2)

[0.02 0.04 0.06 0.08] [0.02 0.04 0.06 0.08]


In [179]:
Y = np.concatenate((np.array(cluster_1),np.array(cluster_2)),axis=0)
Y_label = np.concatenate((np.zeros(len(cluster_1)),np.ones(len(cluster_2))),axis=0)
print(Y.shape, Y_label.shape)
data = Y.reshape(320,-1)
with open('4_2_test.npy', 'wb') as f:
    np.save(f, data)

(320, 20, 2) (320,)


## ECG

In [3]:
trainpath = "ECG5000/ECG5000_TRAIN.arff"

In [4]:
def a2p(path):
    # 读取ARFF文件
    data, meta = arff.loadarff(path)  # 将 'your_file.arff' 替换为你的ARFF文件路径

    # 转换为DataFrame
    df = pd.DataFrame(data)

    return df

In [None]:
train = a2p(trainpath)
print(train.shape)
train.head()

In [12]:
train.target.value_counts()

target
b'1'    292
b'2'    177
b'4'     19
b'3'     10
b'5'      2
Name: count, dtype: int64