In [22]:
# from sklearn import cluster, metrics
from scipy import stats
from esfMRI import plot_sates, stats_tests
from statsmodels.stats import nonparametric
import numpy as np
import pickle
import json
import os

In [2]:
# 可调节参数
window_length = 60 # 窗口尺寸，单位s
sliding_step = 3 # 滑动步长，单位s
target_states = [3, 4, 5, 6] # 目标状态数

In [None]:
with open(f"dFC/{window_length}_{sliding_step}dFCs.pkl", "rb") as f:
    dFCs = pickle.load(f)

In [None]:
# 绘制状态变化
pkl_dir = "states_pkl/total"
os.makedirs(pkl_dir, exist_ok=True)
for k in target_states:
    with open(f"{pkl_dir}/cluster/km_{window_length}s_{k}states.pkl", "rb") as f:
        km = pickle.load(f)
    states = {}
    for subid in dFCs:
        states[subid] = {}
        save_dir = f"states/total/{k}states/{window_length}/{subid}"
        os.makedirs(save_dir, exist_ok=True)
        states[subid]["ses-preop"] = {}
        for run, items in dFCs[subid]["ses-preop"].items():
            if run == "total":
                continue
            states[subid]["ses-preop"][run] = km.predict(items.reshape((items.shape[0], 13456)))
            plot_sates(states[subid]["ses-preop"][run], k, f"{save_dir}/preop_{run}.png")
        states[subid]["ses-postop"] = {}
        for run, items in dFCs[subid]["ses-postop"].items():
            if run == "total":
                continue
            states[subid]["ses-postop"][run] = km.predict(items.reshape((items.shape[0], 13456)))
            plot_sates(states[subid]["ses-postop"][run], k, f"{save_dir}/postop{run}.png")
    with open(f"states_pkl/total/{window_length}_{k}.pkl", "wb") as f:
        pickle.dump(states, f)

### 基于状态的方法的动态指标(state-based):

**dFC强度（dFC strength）**：FC在给定状态中的强度。  
**停留时间（Dwell time）**：受试在每个状态中停留的平均时间。  
**占用率（Occupancy rate）**：扫描期间每个状态发生的时间百分比。  
**转换概率矩阵（Transition matrix）**：从一种状态转换到另一种状态的概率。  
**平均可变性指数（Average variability index）**：它表示功能源的整体动态水平。可变性指数定义为二项分布的标准差，并估计一个区域与给定源的关联中的可变性水平。  
**功能（域间）状态连接（Functional (inter-domain) state connectivity）**：当一种技术（例如空间动态层次）分别估计每个源的动态状态时，它可以捕捉不同源（例如功能域）状态之间的并发性。  

In [3]:
with open(f"states_pkl/total/cluster/km_{window_length}s_{sliding_step}states.pkl", "rb") as f:
    km = pickle.load(f)
with open(f"states_pkl/total/{window_length}_{sliding_step}.pkl", "rb") as f:
    states = pickle.load(f)
with open("datasets.json", "rb") as f:
    datasets = json.load(f)

In [4]:
# dFC强度
km.cluster_centers_.sum(axis=1)/(116*116)

array([0.29040296, 0.15065973, 0.50272478])

In [5]:
# 状态转换概率
states_trans_preop = np.array([[0]*3 for i in range(3)])
states_trans_postop = np.array([[0]*3 for i in range(3)])
for sub in states:
    for run, state in states[sub]["ses-preop"].items():
        lastState= state[0]
        for st in state:
            if st != lastState:
                states_trans_preop[lastState][st] += 1
                lastState = st
    for run, state in states[sub]["ses-postop"].items():
        lastState= state[0]
        for st in state:
            if st != lastState:
                states_trans_postop[lastState][st] += 1
                lastState = st
transition_matrix_preop = (states_trans_preop.T/states_trans_preop.sum(axis=1)).T
transition_matrix_postop = (states_trans_postop.T/states_trans_postop.sum(axis=1)).T
print("preop: ")
print(transition_matrix_preop)
print("postop: ")
print(transition_matrix_postop)

preop: 
[[0.         0.51401869 0.48598131]
 [1.         0.         0.        ]
 [1.         0.         0.        ]]
postop: 
[[0.         0.73146293 0.26853707]
 [0.99731183 0.         0.00268817]
 [1.         0.         0.        ]]


In [12]:
# 停留时间
dwell_time_preop = []
dwell_time_postop = []
for sub in states:
    # 停留窗口数
    dt_preop = np.zeros(3)
    # 出现次数
    appear_times = np.zeros(3)
    for run, state in states[sub]["ses-preop"].items():
        lastState= -1
        for st in state:
            dt_preop[st] += 1
            if st != lastState:
                appear_times[st] += 1
                lastState = st
    dt_preop = np.divide(dt_preop, appear_times, out=np.zeros_like(dt_preop), where=appear_times!=0)
    dt_preop = dt_preop*datasets[sub]["ses-preop"]["run-01"]["TR"]
    dwell_time_preop.append(dt_preop)
    # 停留窗口数 
    dt_postop = np.zeros(3)
    appear_times = np.zeros(3)
    for run, state in states[sub]["ses-postop"].items():
        lastState= state[0]
        for st in state:
            dt_postop[st] += 1
            if st != lastState:
                appear_times[st] += 1
                lastState = st
    dt_postop = np.divide(dt_postop, appear_times, out=np.zeros_like(dt_postop), where=appear_times!=0)
    dt_postop *= datasets[sub]["ses-postop"]["run-01"]["TR"]
    dwell_time_postop.append(dt_postop)
    print(sub, f"preop: {dt_preop[0]:6.2f} {dt_preop[1]:6.2f} {dt_preop[2]:6.2f}, ", f"postop: {dt_postop[0]:6.2f} {dt_postop[1]:6.2f} {dt_postop[2]:6.2f}")
dwell_time_preop = np.array(dwell_time_preop)
dwell_time_postop = np.array(dwell_time_postop)

sub-292 preop:  33.33  14.67  48.00,  postop:  37.78 170.50   0.00
sub-294 preop:  24.57  12.80  32.00,  postop:  32.14  46.80   0.00
sub-302 preop:  11.75  12.00  32.67,  postop: 276.00  12.00   0.00
sub-303 preop:  20.79  20.84  16.95,  postop:  48.97  44.89  26.25
sub-307 preop:   9.49   0.00  42.29,  postop:  67.30   5.00 111.50
sub-314 preop:  18.24  13.56   9.66,  postop: 117.47  24.18   6.00
sub-316 preop:  17.27  11.92  56.50,  postop:  46.73  71.50   0.00
sub-320 preop:  31.26  55.75  37.29,  postop:  66.09  33.14  15.27
sub-330 preop:  30.67  40.11  18.08,  postop:  60.49  40.50  68.31
sub-331 preop:  15.59  81.36  13.28,  postop:  44.50  52.67 126.20
sub-334 preop:  29.94  35.59  23.73,  postop:  62.06  54.24  32.50
sub-335 preop:  20.72  27.12  46.33,  postop: 111.54 112.64  53.00
sub-352 preop:  18.36  53.34   7.91,  postop:  68.50 111.47  16.71
sub-372 preop:  20.02  15.82  25.42,  postop:  77.13  55.65  33.00
sub-376 preop:  17.18  29.38  22.60,  postop:  76.65  77.89  4

In [18]:
# 占用率
occupancy_rate_preop = []
occupancy_rate_postop = []
for sub in states:
    states_preop = [0]*3
    states_postop = [0]*3
    for run, state in states[sub]["ses-preop"].items():
        for i in range(3):
            states_preop[i] += sum(state==i)
    for run, state in states[sub]["ses-postop"].items():
        for i in range(3):
            states_postop[i] += sum(state==i)
    or_preop, or_postop = [i/sum(states_preop) for i in states_preop], [i/sum(states_postop) for i in states_postop]
    occupancy_rate_preop.append(or_preop)
    occupancy_rate_postop.append(or_postop)
    print(sub, f"preop:  {or_preop[0]:0>0.2f} {or_preop[1]:0>0.2f} {or_preop[2]:0>.2f}, ", f"postop:  {or_postop[0]:0>0.2f} {or_postop[1]:0>0.2f} {or_postop[2]:0>0.2f}")
states_preop = [0]*3
states_postop = [0]*3
for sub in states:
    for run, state in states[sub]["ses-preop"].items():
        for i in range(3):
            states_preop[i] += sum(state==i)
    for run, state in states[sub]["ses-postop"].items():
        for i in range(3):
            states_postop[i] += sum(state==i)
or_preop, or_postop = [i/sum(states_preop) for i in states_preop], [i/sum(states_postop) for i in states_postop]
print(f"total   preop:  {or_preop[0]:0>0.2f} {or_preop[1]:0>0.2f} {or_preop[2]:0>.2f}, ", f"postop:  {or_postop[0]:0>0.2f} {or_postop[1]:0>0.2f} {or_postop[2]:0>0.2f}")
occupancy_rate_preop = np.array(occupancy_rate_preop)
occupancy_rate_postop = np.array(occupancy_rate_postop)

sub-292 preop:  0.42 0.18 0.40,  postop:  0.18 0.82 0.00
sub-294 preop:  0.57 0.21 0.21,  postop:  0.39 0.61 0.00
sub-302 preop:  0.39 0.20 0.41,  postop:  0.96 0.04 0.00
sub-303 preop:  0.48 0.44 0.08,  postop:  0.53 0.44 0.04
sub-307 preop:  0.14 0.00 0.86,  postop:  0.43 0.00 0.56
sub-314 preop:  0.59 0.16 0.25,  postop:  0.84 0.15 0.00
sub-316 preop:  0.56 0.31 0.13,  postop:  0.36 0.64 0.00
sub-320 preop:  0.44 0.39 0.17,  postop:  0.72 0.22 0.05
sub-330 preop:  0.50 0.37 0.13,  postop:  0.57 0.23 0.20
sub-331 preop:  0.45 0.24 0.31,  postop:  0.36 0.27 0.37
sub-334 preop:  0.56 0.33 0.11,  postop:  0.55 0.38 0.07
sub-335 preop:  0.29 0.06 0.65,  postop:  0.54 0.43 0.03
sub-352 preop:  0.34 0.62 0.04,  postop:  0.48 0.49 0.03
sub-372 preop:  0.54 0.06 0.39,  postop:  0.63 0.29 0.08
sub-376 preop:  0.33 0.23 0.44,  postop:  0.55 0.31 0.14
sub-384 preop:  0.62 0.06 0.32,  postop:  0.87 0.13 0.00
sub-399 preop:  0.88 0.12 0.00,  postop:  0.64 0.15 0.22
sub-400 preop:  0.70 0.30 0.00,

In [15]:
dwell_time_preop[:,0].shape

(20,)

In [17]:
stats_tests(dwell_time_preop[...,0], dwell_time_postop[...,0])
stats_tests(dwell_time_preop[...,1], dwell_time_postop[...,1])
stats_tests(dwell_time_preop[...,2], dwell_time_postop[...,2])

正态性检验preop:  0.15024840831756592
正态性检验postop:  4.0633105527376756e-05
方差齐性检验:  0.04234234335496295
非参数检验: 
Wilcoxon秩和检验:  -5.193617095684409 2.062471174491123e-07
曼-惠特尼U检验:  8.0 2.2177643300922434e-07
ks检验:  0.85 1.4334782434603487e-07
kruskal检验 26.97365853658539 2.0624711744910948e-07
正态性检验preop:  0.00788016989827156
正态性检验postop:  0.06413329392671585
方差齐性检验:  0.06800124000786924
非参数检验: 
Wilcoxon秩和检验:  -3.3406859964428364 0.0008357168217272526
曼-惠特尼U检验:  76.5 0.0008729420415889432
ks检验:  0.55 0.00396729438660086
kruskal检验 11.169613181860855 0.0008314796135781634
正态性检验preop:  0.6557788252830505
正态性检验postop:  0.003290747059509158
方差齐性检验:  0.053732691504060424
非参数检验: 
Wilcoxon秩和检验:  0.3516511575202986 0.7250998943601429
曼-惠特尼U检验:  213.0 0.7342585234457052
ks检验:  0.25 0.571336004933722
kruskal检验 0.12464069591529543 0.7240547855734762


array([-1.        , -1.        ,  0.72509989,  0.73425852,  0.571336  ,
        0.72405479])

In [19]:
stats_tests(occupancy_rate_preop[...,0], occupancy_rate_postop[...,0])
stats_tests(occupancy_rate_preop[...,1], occupancy_rate_postop[...,1])
stats_tests(occupancy_rate_preop[...,2], occupancy_rate_postop[...,2])

正态性检验preop:  0.9645224809646606
正态性检验postop:  0.9523597955703735
方差齐性检验:  0.9873295683270217
t检验:  -1.876698851597989 0.06825401720641659
ANOVA检验:  3.5219985795891975 0.06825401720641724
非参数检验: 
Wilcoxon秩和检验:  -1.623005342401378 0.10458826282895559
曼-惠特尼U检验:  140.0 0.10751135969600566
ks检验:  0.3 0.33559098126008213
kruskal检验 2.6341463414634205 0.10458826282895543
正态性检验preop:  0.3308142423629761
正态性检验postop:  0.4995274841785431
方差齐性检验:  0.29588616874297274
t检验:  -1.9121025864148296 0.06342413948792731
ANOVA检验:  3.6561363009742776 0.06342413948792751
非参数检验: 
Wilcoxon秩和检验:  -1.650055431441401 0.0989315992014087
曼-惠特尼U检验:  139.0 0.101648372611528
ks检验:  0.25 0.571336004933722
kruskal检验 2.7239605818864345 0.09885245621809093
正态性检验preop:  0.047797199338674545
正态性检验postop:  6.195529567776248e-05
方差齐性检验:  0.012729276050756256
非参数检验: 
Wilcoxon秩和检验:  3.137810328642664 0.0017021498338632287
曼-惠特尼U检验:  316.0 0.0017087945875165669
ks检验:  0.5 0.012298612583953778
kruskal检验 9.924054462934945 0.001631

array([-1.        , -1.        ,  0.00170215,  0.00170879,  0.01229861,
        0.00163133])

In [28]:
nonparametric.rank_compare_2indep(dwell_time_preop, dwell_time_postop)

<class 'statsmodels.stats.nonparametric.RankCompareResult'>
statistic = array([-29.73825207,  -4.27382639,   0.3373077 ])
pvalue = array([8.28934822e-26, 1.24851080e-04, 7.38308254e-01])
s1 = array([ 1.41052632, 19.71776316, 16.55526316])
s2 = array([ 0.67368421, 22.03355263, 57.71315789])
var1 = array([0.00352632, 0.04929441, 0.04138816])
var2 = array([0.00168421, 0.05508388, 0.14428289])
var = array([0.01042105, 0.20875658, 0.37134211])
var_prob = array([0.00026053, 0.00521891, 0.00928355])
nobs1 = 20
nobs2 = 20
nobs = 40
mean1 = array([10.9  , 14.325, 21.15 ])
mean2 = array([30.1  , 26.675, 19.85 ])
prob1 = array([0.02   , 0.19125, 0.5325 ])
prob2 = array([0.98   , 0.80875, 0.4675 ])
somersd1 = array([-0.96  , -0.6175,  0.065 ])
somersd2 = array([ 0.96  ,  0.6175, -0.065 ])
df = array([33.77816071, 37.88345132, 29.07170667])
use_t = True
tuple = (array([-29.73825207,  -4.27382639,   0.3373077 ]), array([8.28934822e-26, 1.24851080e-04, 7.38308254e-01]))

In [29]:
nonparametric.rank_compare_2indep(occupancy_rate_preop, occupancy_rate_postop)

<class 'statsmodels.stats.nonparametric.RankCompareResult'>
statistic = array([-1.69669911, -1.73088828,  3.91880593])
pvalue = array([0.09793917, 0.09161405, 0.00037607])
s1 = array([30.31578947, 29.20789474, 26.37894737])
s2 = array([32.21052632, 32.89210526, 17.43157895])
var1 = array([0.07578947, 0.07301974, 0.06594737])
var2 = array([0.08052632, 0.08223026, 0.04357895])
var = array([0.31263158, 0.3105    , 0.21905263])
var_prob = array([0.00781579, 0.0077625 , 0.00547632])
nobs1 = 20
nobs2 = 20
nobs = 40
mean1 = array([17.5 , 17.45, 26.3 ])
mean2 = array([23.5 , 23.55, 14.7 ])
prob1 = array([0.35  , 0.3475, 0.79  ])
prob2 = array([0.65  , 0.6525, 0.21  ])
somersd1 = array([-0.3  , -0.305,  0.58 ])
somersd2 = array([ 0.3  ,  0.305, -0.58 ])
df = array([37.96513761, 37.86672049, 36.47850432])
use_t = True
tuple = (array([-1.69669911, -1.73088828,  3.91880593]), array([0.09793917, 0.09161405, 0.00037607]))