In [1]:
from esfMRI import sliceWindows, joint_cluster_save_states, plot_sates, align, clustering_evaluate, windows_evaluate, step_evaluate, AIC, BIC, plot_evaluated
from sklearn import cluster, metrics
from nilearn import connectome
import numpy as np
import pickle
import math
import os

### 猜测聚类簇数

1. 肘点法：绘制inertia随k值变化的曲线，转折幅度最大的点作为簇数。

### 评估聚类质量

对于不存在已知分类的评价，只能采用内部评价指标  
基础参数有
1. 紧密度（Compactness）
2. 分割度（Seperation）
3. 误差平方和（SSE: Sum of squares of errors）

评价指标
1. Silhouette Coefficient（轮廓系数） —— 越大越好
2. Calinski-Harabasz Index（CH） —— 越大越好
3. Davies-Bouldin Index（DB） —— 越小越好

In [2]:
# 可调节参数
# Adjustable parameters
window_length_Second = [40, 45, 50, 55, 60, 65, 70, 75, 80] # 窗口尺寸，单位s
sliding_step = 1 # 滑动步长，单位TR
target_states = [2, 3, 4] # 目标状态数

* 评估滑动窗口参数影响
* evaluate impact of sliding windows parameters

In [3]:
# 导入时间序列
with open("time_series2.pkl", "rb") as f:
    data = pickle.load(f)

In [4]:
# 拼接后聚类评估窗口尺寸影响
# evaluate impact of window size on quality of clustering
for subid in data:
    for k in target_states:
        save_dir = f"./cluster_evaluate/window_length/joint/{subid}"
        os.makedirs(save_dir, exist_ok=True)
        windows_evaluate(data, subid, range(30, 180, 10), 1, k, f"{save_dir}/{k}_states.png")

In [5]:
# 评估窗口尺寸，全部数据拼接
# evaluate impact of window size on quality of clustering
for k in target_states:
    inertias = []
    scs = []
    chs = []
    dbs = []
    for time in range(30, 180, 10):
        windows = []
        # sample_weight = []
        for subid in data:
            for run, items in data[subid]["ses-preop"].items():
                preopFrame = math.ceil(time/items["TR"])
                tmp = sliceWindows(items["time_series"], preopFrame, sliding_step)
                windows += tmp
                # sample_weight += [4]*len(tmp)
            for run, items in data[subid]["ses-postop"].items():
                postopFrame = math.ceil(time/items["TR"])
                tmp += sliceWindows(items["time_series"], postopFrame, sliding_step)
                windows += tmp
                # sample_weight += [1]*len(tmp)
        fcs = connectome.ConnectivityMeasure(kind="correlation").fit_transform(windows)
        del windows
        fcs = fcs.reshape((fcs.shape[0], 13456))
        if k < fcs.shape[0]:
            # center, states, inertia = cluster.k_means(fcs, k, sample_weight=sample_weight)
            center, states, inertia = cluster.k_means(fcs, k)
            inertias.append(inertia) # 肘点法
            scs.append(metrics.silhouette_score(fcs, states)) # 轮廓系数
            chs.append(metrics.calinski_harabasz_score(fcs, states)) # CH，方差比
            dbs.append(metrics.davies_bouldin_score(fcs, states)) # DB
        else:
            inertias.append(inertias[-1])
            scs.append(scs[-1])
            chs.append(chs[-1])
            dbs.append(dbs[-1])
        del fcs
    # 绘图
    save_dir = f"cluster_evaluate/window_length/total"
    os.makedirs(save_dir, exist_ok=True)
    plot_evaluated(range(30, 180, 10), inertias=inertias, scs=scs, chs=chs, dbs=dbs, aic=None, bic=None, save_path=f"{save_dir}/{k}_states.png")

In [4]:
# 全部拼接后聚类评估步长影响
# evaluate impact of sliding steps on quality of clustering
save_dir = "cluster_evaluate/step"
for time in window_length_Second:
    for k in target_states:
        step_evaluate(data, None, time, range(1, 10), k, save_path=f"{save_dir}/{time}_{k}.png")

* 评估聚类效果选择最佳状态数
* evaluate and select the best target states

In [3]:
# 加载滑动窗口拆分过的数据
# load pickle data generated in slidingWindow.ipynb
window_length = 60
with open(f"dFC/{window_length}_slidingWindows.pkl", "rb") as f:
    slidingWindows = pickle.load(f)

In [4]:
# 拼接后聚类，评估状态数对聚类效果影响
# evaluate impact of target states on quality of clustering
windows_preop = []
windows_postop = []
save_path = "cluster_evaluate/total"
os.makedirs(save_path, exist_ok=True)
for subid in slidingWindows:
        windows_preop += slidingWindows[subid]["ses-preop"]["total"]
        windows_postop += slidingWindows[subid]["ses-postop"]["total"]
clustering_evaluate(windows_preop, range(2, 11), f"{save_path}/{time}_preop.png")
clustering_evaluate(windows_postop, range(2, 11), f"{save_path}/{time}_postop.png")
clustering_evaluate(windows_preop+windows_postop, range(2, 11), f"{save_path}/{time}_total.png")

* 绘制输出状态变化
* plot states changing

In [3]:
# load pickle data generated in slidingWindow.ipynb
window_length = 60
with open(f"dFC/{window_length}_dFCs.pkl", "rb") as f:
    dFCs = pickle.load(f)

In [4]:
# 全体拼接后聚类，保存聚类结果对象
# clustering and save KMeans objects
save_path = "states/total"
os.makedirs(save_path, exist_ok=True)
dfcs = None
for subid in dFCs:
    # preop
    if "total" in dFCs[subid]["ses-preop"]:
        if dfcs is None:
            dfcs = dFCs[subid]["ses-preop"]["total"].reshape((dFCs[subid]["ses-preop"]["total"].shape[0], 13456))
        else:
            dfcs = np.vstack((dfcs, dFCs[subid]["ses-preop"]["total"].reshape((dFCs[subid]["ses-preop"]["total"].shape[0], 13456))))
    else:
        for run in dFCs[subid]["ses-preop"]:
            if dfcs is None:
                dfcs = dFCs[subid]["ses-preop"][run].reshape((dFCs[subid]["ses-preop"][run].shape[0], 13456))
            else:
                dfcs = np.vstack((dfcs, dFCs[subid]["ses-preop"][run].reshape((dFCs[subid]["ses-preop"][run].shape[0], 13456))))
    # postop
    if "total" in dFCs[subid]["ses-postop"]:
        if dfcs is None:
            dfcs = dFCs[subid]["ses-postop"]["total"].reshape((dFCs[subid]["ses-postop"]["total"].shape[0], 13456))
        else:
            dfcs = np.vstack((dfcs, dFCs[subid]["ses-postop"]["total"].reshape((dFCs[subid]["ses-postop"]["total"].shape[0], 13456))))
    else:
        for run in dFCs[subid]["ses-postop"]:
            if dfcs is None:
                dfcs = dFCs[subid]["ses-postop"][run].reshape((dFCs[subid]["ses-postop"][run].shape[0], 13456))
            else:
                dfcs = np.vstack((dfcs, dFCs[subid]["ses-postop"][run].reshape((dFCs[subid]["ses-postop"][run].shape[0], 13456))))

# 释放内存
del dFCs
fcs = np.vstack((dfcs, dfcs))

# 保存聚类对象
for k in target_states:
    km = cluster.KMeans(k)
    km.fit(fcs)
    save_dir = f"{save_path}/cluster"
    os.makedirs(save_dir, exist_ok=True)
    with open(f"{save_dir}/km_{window_length}s_{k}states.pkl", "wb") as f:
        pickle.dump(km, f)
