In [None]:
import pingouin as pg      # 安装一下“企鹅”包
import numpy as np
import pandas as pd

In [None]:
# http://t.csdn.cn/Nta4G
'''
ICC的值介于0~1之间：
   小于0.5表示一致性较差；
   0.5~0.75一致性中等；
   0.75~0.9一致性较好；
   大于0.9一致性极好；

【1】模型有三种：
  单向随机效应：1-way random effects
  双向随机效应：2-way random effects:可靠性结果可推广
  双向混合效应：2-way fixed effects:可靠性结果不可推广

【2】类型有两种：这种选择取决于在实际应用中将如何执行测量方案:
如果我们计划使用3个评分者的 ★平均值★ 作为评估依据，则可靠性研究的实验设计应包括3个评分者，并选择类型:
    the mean of k raters/measurements。
如果我们打算使用单一评分者的测量作为实际测量的依据，那么即使可靠性实验涉及到2个或2个以上的评分者，也应该选择类型：
    single rater/ measurement。
   
'''

# 测试pingouin包的ICC使用

In [None]:
# 打开测试数据
data = pg.read_dataset('icc')
data

In [None]:
# 使用
icc = pg.intraclass_corr(data = data,targets='Wine',raters='Judge',ratings='Scores')
icc

# ICC1：单向随机
# ICC1k：单向随机 多个评分者

# ICC2：双向随机
# ICC2k：双向随机 多个评分者

# ICC3：双向混合
# ICC3k：双向混合 多个评分者

# 打开自己的数据

In [None]:
# 读取数据
open_pathA = r'→→TRJ_readerA_dup0.75.csv'
open_pathB = r'→→TRJ_readerB_dup0.75.csv'

df_A = pd.read_csv(open_pathA,index_col=0)
df_B = pd.read_csv(open_pathB,index_col=0)

df_A

# 拿一个特征练手

In [None]:
# 拿一个特征练手
features_demo = df_A.columns[0]
print(f'示例特征为{features_demo}')

df_featuresDemo_list  = []  # 存储 一个特征(featuresDemo)的数据框列表
df_list = [df_A,df_B]
for i in range(len(df_list)):
    df = df_list[i]
    # 在该Reader的df中加一列“Reader”
    df_demo = pd.DataFrame(data={
        'Pat':df[features_demo].index,
        'Reader':['Reader'+str(i)]*len(df[features_demo].index),
        'Values':df[features_demo].values})
    df_featuresDemo_list.append(df_demo)
    
df_featuresDemo = pd.concat(df_featuresDemo_list,ignore_index=True)  # 忽略掉原来的索引
df_featuresDemo

In [None]:
icc = pg.intraclass_corr(data = df_featuresDemo,targets='Pat',raters='Reader',ratings='Values')
icc = icc.set_index('Type')
icc

In [None]:
TypeNeed = 'ICC1'
ICC_value = icc.loc[TypeNeed,'ICC']
pval_value = icc.loc[TypeNeed,'pval']

df_feature_ICCstatus = pd.DataFrame(data={
    'Feature':[features_demo],
    'ICC':[ICC_value],
    'pval':[pval_value]
})
df_feature_ICCstatus

# 处理所有特征

In [None]:
# 设置待处理类型
ret_dict = {'ICC1':[],'ICC2':[],'ICC3':[],'ICC1k':[],'ICC2k':[],'ICC3k':[]}

# 设置特征列表
# features = df_A.columns.tolist()[:2]  # 只取两个看看效果
features = df_A.columns.tolist()

for i in range(len(features)):
    fetre = features[i]
    print(f'进度{round(i/len(features)*100,2)}%,处理特征为{fetre}')

    df_fetreDemo_list  = []  # 存储 一个特征(featuresDemo)的数据框列表
    df_list = [df_A,df_B]
    for i in range(len(df_list)):
        df = df_list[i]
        # 在该Reader的df中加一列“Reader”
        df_demo = pd.DataFrame(data={
            'Pat':df[fetre].index,
            'Reader':['Reader'+str(i)]*len(df[fetre].index),
            'Values':df[fetre].values})
        df_fetreDemo_list.append(df_demo)

    df_fetreDemo = pd.concat(df_fetreDemo_list,ignore_index=True)  # 忽略掉原来的索引
    
    
    # 计算ICC系数
    icc_ret = pg.intraclass_corr(data = df_fetreDemo,targets='Pat',raters='Reader',ratings='Values')
    icc_ret = icc_ret.set_index('Type')
    
    # 取icc_ret中各类型，如ICC1，ICC2，ICC3...的值
    for key,value in ret_dict.items():
        TypeNeed = key    
        ICC_value = icc_ret.loc[TypeNeed,'ICC']
        pval_value = icc_ret.loc[TypeNeed,'pval']

        df_feature_ICCstatus = pd.DataFrame(data={
            'Feature':[fetre],
            'ICC':[ICC_value],
            'pval':[pval_value]
        })
        value.append(df_feature_ICCstatus)

for key,value in ret_dict.items():
    TypeNeed = key   
    # 将ret_dict内的字典形式变成数据框
    ret_dict[TypeNeed] = pd.concat(ret_dict[TypeNeed],ignore_index=True)

In [None]:
ret_dict['ICC1']

# 保存

In [None]:

# ret_dict['ICC2k'].to_csv("icc2K.csv")

In [None]:
save_name = "→→→ICC结果.xlsx"
with pd.ExcelWriter(save_name) as xlsx:
    for type_need,df_ret in ret_dict.items():
        df_ret.to_excel(xlsx, sheet_name=type_need)