In [1]:
from esfMRI import sliceWindows, clustering_evaluate, windows_evaluate, step_evaluate, plot_evaluated
from sklearn import cluster, metrics
from nilearn import connectome
import matplotlib.pyplot as plt
import numpy as np
import pickle
import math
import os

### 猜测聚类簇数

1. 肘点法：绘制inertia随k值变化的曲线，转折幅度最大的点作为簇数。

### 评估聚类质量

对于不存在已知分类的评价，只能采用内部评价指标  
基础参数有
1. 紧密度（Compactness）
2. 分割度（Seperation）
3. 误差平方和（SSE: Sum of squares of errors）

评价指标
1. Silhouette Coefficient（轮廓系数） —— 越大越好
2. Calinski-Harabasz Index（CH） —— 越大越好
3. Davies-Bouldin Index（DB） —— 越小越好

In [2]:
# 可调节参数
# Adjustable parameters
window_length_Second = [40, 45, 50, 55, 60, 65, 70, 75, 80] # 窗口尺寸，单位s
sliding_step = 1 # 滑动步长，单位TR
target_states = [2] # 目标状态数

* 评估滑动窗口参数影响
* evaluate impact of sliding windows parameters

In [3]:
# 导入时间序列
with open("time_series2.pkl", "rb") as f:
    data = pickle.load(f)

In [4]:
# 拼接后聚类评估窗口尺寸影响
# evaluate impact of window size on quality of clustering
for subid in data:
    for k in target_states:
        save_dir = f"./cluster_evaluate/window_length/joint/{subid}"
        os.makedirs(save_dir, exist_ok=True)
        windows_evaluate(data, subid, range(30, 180, 10), 1, k, f"{save_dir}/{k}_states.png")

In [7]:
# 评估窗口尺寸，全部数据拼接
# evaluate impact of window size on quality of clustering
for k in target_states:
    inertias = []
    scs = []
    chs = []
    dbs = []
    for time in range(30, 180, 10):
        windows = []
        # sample_weight = []
        for subid in data:
            for run, items in data[subid]["ses-preop"].items():
                preopFrame = math.ceil(time/items["TR"])
                tmp = sliceWindows(items["time_series"], preopFrame, sliding_step)
                windows += tmp
                # sample_weight += [4]*len(tmp)
            for run, items in data[subid]["ses-postop"].items():
                postopFrame = math.ceil(time/items["TR"])
                tmp += sliceWindows(items["time_series"], postopFrame, sliding_step)
                windows += tmp
                # sample_weight += [1]*len(tmp)
        fcs = connectome.ConnectivityMeasure(kind="correlation").fit_transform(windows)
        del windows
        fcs = fcs.reshape((fcs.shape[0], 13456))
        if k < fcs.shape[0]:
            # center, states, inertia = cluster.k_means(fcs, k, sample_weight=sample_weight)
            center, states, inertia = cluster.k_means(fcs, k)
            inertias.append(inertia) # 肘点法
            scs.append(metrics.silhouette_score(fcs, states)) # 轮廓系数
            chs.append(metrics.calinski_harabasz_score(fcs, states)) # CH，方差比
            dbs.append(metrics.davies_bouldin_score(fcs, states)) # DB
        else:
            inertias.append(inertias[-1])
            scs.append(scs[-1])
            chs.append(chs[-1])
            dbs.append(dbs[-1])
        del fcs
    # 绘图
    save_dir = f"cluster_evaluate/window_length"
    os.makedirs(save_dir, exist_ok=True)
    with open(f"{save_dir}/results.pkl", "wb") as f:
        pickle.dump([inertias, scs, chs, dbs], f)
    plot_evaluated(range(30, 180, 10), inertias=inertias, scs=scs, chs=chs, dbs=dbs, aic=None, bic=None, save_path=f"{save_dir}/{k}_states.png")

In [27]:
save_dir = f"cluster_evaluate/window_length"
with open(f"{save_dir}/results.pkl", "rb") as f:
    [inertias, scs, chs, dbs] = pickle.load(f)
plot_evaluated(range(30, 180, 10), inertias=inertias, scs=scs, chs=chs, dbs=dbs, aic=None, bic=None, save_path=f"{save_dir}/2_states.png", x_label="Window length (s)")

In [11]:
# 可调节参数
# Adjustable parameters
window_length_Second = [60] # 窗口尺寸，单位s
sliding_step = 1 # 滑动步长，单位TR
target_states = [2] # 目标状态数

In [7]:
# 全部拼接后聚类评估步长影响
# evaluate impact of sliding steps on quality of clustering
save_dir = "cluster_evaluate/step"
os.makedirs(save_dir, exist_ok=True)
for time in window_length_Second:
    for k in target_states:
        results = step_evaluate(data, None, time, list(range(0, 21)), k, save_path=f"{save_dir}/{time}_{k}.png")
        with open(f"{save_dir}/results.pkl", "wb") as f:
            pickle.dump(results, f)

In [7]:
def plot_evaluated(x_axis, inertias=None, scs=None, chs=None, dbs=None, aic=None, bic=None, save_path=None, formate="jpg", x_label="Step"):
    plt.rcParams['font.family']=['Times New Roman']
    fontsize = 18
    plt.rcParams['font.size']=fontsize

    if aic is None and bic is None:
        figi, axi = plt.subplots(2, 2, figsize=(15, 10))
    else:
        figi, axi = plt.subplots(3, 2, figsize=(5, 3))
    figi.patch.set_color("white")
    # 调整子图间距
    figi.subplots_adjust(wspace=0.3, hspace=0.3)
    # figi.subplots_adjust(wspace=0.25, hspace=0.25)
    if inertias is not None:
        # axi[0, 0].set_title("A", loc="left")
        axi[0, 0].set_ylabel("Inertias")
        axi[0, 0].set_xlabel(x_label)
        axi[0, 0].plot(x_axis, inertias)
    if scs is not None:
        # axi[0, 1].set_title("B", loc="left")
        axi[0, 1].set_ylabel("Silhouette Coefficient")
        axi[0, 1].set_xlabel(x_label)
        axi[0, 1].set_ylim(0.25, 0.3)
        axi[0, 1].plot(x_axis, scs)
    if chs is not None:
        # axi[1, 0].set_title("C", loc="left")
        axi[1, 0].set_ylabel("Calinski Harabasz Index")
        axi[1, 0].set_xlabel(x_label)
        axi[1, 0].plot(x_axis, chs)
    if dbs is not None:
        # axi[1, 1].set_title("D", loc="left")
        axi[1, 1].set_ylabel("Davies Bouldin Index")
        axi[1, 1].set_xlabel(x_label)
        axi[1, 1].set_ylim(1.4, 1.45)
        axi[1, 1].plot(x_axis, dbs)
    if aic is not None:
        axi[2, 0].set_ylabel("AIC")
        axi[2, 0].set_xlabel(x_label)
        axi[2, 0].plot(x_axis, aic)
    if bic is not None:
        axi[2, 1].set_ylabel("BIC")
        axi[2, 1].set_xlabel(x_label)
        axi[2, 1].plot(x_axis, bic)
    if save_path is not None:
        figi.savefig(save_path, format=formate, dpi=600)
        plt.cla()
        plt.clf()
        plt.close("all")

In [8]:
save_dir = "cluster_evaluate/step"
with open(f"{save_dir}/results.pkl", "rb") as f:
    results = pickle.load(f)
[inertias, scs, chs, dbs] = results
plot_evaluated(range(1, 11), inertias=inertias[:10], scs=scs[:10], chs=chs[:10], dbs=dbs[:10], aic=None, bic=None, save_path=f"{save_dir}/60_2.png", x_label="Step (TR)")

* 评估聚类效果选择最佳状态数
* evaluate and select the best target states

In [4]:
# 加载滑动窗口拆分过的数据
# load pickle data generated in slidingWindow.ipynb
window_length = 60
with open(f"dFC/{window_length}_slidingWindows.pkl", "rb") as f:
    slidingWindows = pickle.load(f)

In [7]:
# 拼接后聚类，评估状态数对聚类效果影响
# evaluate impact of target states on quality of clustering
windows_preop = []
windows_postop = []
save_path = "cluster_evaluate/states"
os.makedirs(save_path, exist_ok=True)
for subid in slidingWindows:
    windows_preop += slidingWindows[subid]["ses-preop"]["total"]
    windows_postop += slidingWindows[subid]["ses-postop"]["total"]
# clustering_evaluate(windows_preop, range(2, 11), f"{save_path}/{window_length}_preop.png")
# clustering_evaluate(windows_postop, range(2, 11), f"{save_path}/{window_length}_postop.png")
results = clustering_evaluate(windows_preop+windows_postop, range(2, 11), f"{save_path}/{window_length}_total.png")
with open(f"{save_path}/results.pkl", "wb") as f:
    pickle.dump(results, f)

In [37]:
save_path = "cluster_evaluate/states"
window_length = 60
with open(f"{save_path}/results.pkl", "rb") as f:
    results = pickle.load(f)
[inertias, scs, chs, dbs] = results
plot_evaluated(range(2, 11), inertias=inertias, scs=scs, chs=chs, dbs=dbs, aic=None, bic=None, save_path=f"{save_path}/{window_length}_total.png", x_label="States")

* 绘制输出状态变化
* plot states changing

In [3]:
# load pickle data generated in slidingWindow.ipynb
window_length = 60
with open(f"dFC/{window_length}_dFCs.pkl", "rb") as f:
    dFCs = pickle.load(f)

In [4]:
# 全体拼接后聚类，保存聚类结果对象
# clustering and save KMeans objects
save_path = "states/total"
os.makedirs(save_path, exist_ok=True)
dfcs = None
for subid in dFCs:
    # preop
    if "total" in dFCs[subid]["ses-preop"]:
        if dfcs is None:
            dfcs = dFCs[subid]["ses-preop"]["total"].reshape((dFCs[subid]["ses-preop"]["total"].shape[0], 13456))
        else:
            dfcs = np.vstack((dfcs, dFCs[subid]["ses-preop"]["total"].reshape((dFCs[subid]["ses-preop"]["total"].shape[0], 13456))))
    else:
        for run in dFCs[subid]["ses-preop"]:
            if dfcs is None:
                dfcs = dFCs[subid]["ses-preop"][run].reshape((dFCs[subid]["ses-preop"][run].shape[0], 13456))
            else:
                dfcs = np.vstack((dfcs, dFCs[subid]["ses-preop"][run].reshape((dFCs[subid]["ses-preop"][run].shape[0], 13456))))
    # postop
    if "total" in dFCs[subid]["ses-postop"]:
        if dfcs is None:
            dfcs = dFCs[subid]["ses-postop"]["total"].reshape((dFCs[subid]["ses-postop"]["total"].shape[0], 13456))
        else:
            dfcs = np.vstack((dfcs, dFCs[subid]["ses-postop"]["total"].reshape((dFCs[subid]["ses-postop"]["total"].shape[0], 13456))))
    else:
        for run in dFCs[subid]["ses-postop"]:
            if dfcs is None:
                dfcs = dFCs[subid]["ses-postop"][run].reshape((dFCs[subid]["ses-postop"][run].shape[0], 13456))
            else:
                dfcs = np.vstack((dfcs, dFCs[subid]["ses-postop"][run].reshape((dFCs[subid]["ses-postop"][run].shape[0], 13456))))

# 释放内存
del dFCs
fcs = np.vstack((dfcs, dfcs))

# 保存聚类对象
for k in target_states:
    km = cluster.KMeans(k)
    km.fit(fcs)
    save_dir = f"{save_path}/cluster"
    os.makedirs(save_dir, exist_ok=True)
    with open(f"{save_dir}/km_{window_length}s_{k}states.pkl", "wb") as f:
        pickle.dump(km, f)
