In [123]:
import numpy as np
import pandas as pd
import matplotlib,re,math
import matplotlib.pyplot as plt
from matplotlib.pyplot import MultipleLocator
import warnings
warnings.filterwarnings("ignore")



%matplotlib inline
plt.rcParams['font.sans-serif'] = ['SimHei'] 
plt.rcParams['axes.unicode_minus'] = False 

In [124]:
data1=pd.read_excel('表1-患者列表及临床信息.xlsx')
data2=pd.read_excel('表2-患者影像信息血肿及水肿的体积及位置.xlsx')
data3=pd.read_excel('表3-患者影像信息血肿及水肿的形状及灰度分布.xlsx')
data4=pd.read_excel('表4-答案文件.xlsx')

data1.rename(columns={data1.columns[0]: "ID"}, inplace=True)
data2.rename(columns={data2.columns[0]: "ID"}, inplace=True)

data_f_1=pd.read_excel('附表1-检索表格-流水号vs时间.xlsx')
data_f_time=pd.read_excel('时间点.xlsx')

In [125]:
# 取1a问题对应需要用的数据
data1_columns = ['ID','数据集划分', '入院首次影像检查流水号', '发病到首次影像检查时间间隔']
data1_1a = data1[data1_columns]

data2.rename(columns={data2.columns[0]: "ID"}, inplace=True)
data2_columns = ['ID'] + [col for col in data2.columns if col.startswith('HM_volume') or col.startswith('随访')] # '首次检查流水号'
data2_1a = data2[data2_columns]

In [126]:
# 合并和保存数据
_1a=pd.merge(data1_1a,data2_1a,how='outer',on='ID')
# _1a.to_csv('1a数据.csv', index=False, encoding='utf-8')
_1a.to_excel('1a数据.xlsx', index=False)

In [127]:
# 转换为时间戳格式
columns_to_convert = data_f_time.columns[1:]
data_f_time[columns_to_convert] = data_f_time[columns_to_convert].apply(pd.to_datetime)
# Datetime对象转换为秒级的时间戳形式
data_f_time[columns_to_convert] = data_f_time[columns_to_convert].apply(lambda x: x.astype(int) // 10**9)
# 将负数的时间戳转变为空值
data_f_time[data_f_time.columns[1:]] = data_f_time[data_f_time.columns[1:]].apply(lambda x: x.mask(x < 0))

In [128]:
# 找到两个 DataFrame 共有的列名
common_columns = data_f_time.columns.intersection(_1a.columns)
# 使用 data_f_time 中的列替换 _1a 中的相同列名的列
_1a[common_columns] = data_f_time[common_columns]

In [129]:
data_f_time.columns

Index(['ID', '入院首次影像检查流水号', '随访1流水号', '随访2流水号', '随访3流水号', '随访4流水号', '随访5流水号',
       '随访6流水号', '随访7流水号', '随访8流水号'],
      dtype='object')

In [130]:
_1a.columns

Index(['ID', '数据集划分', '入院首次影像检查流水号', '发病到首次影像检查时间间隔', 'HM_volume', '随访1流水号',
       'HM_volume.1', '随访2流水号', 'HM_volume.2', '随访3流水号', 'HM_volume.3',
       '随访4流水号', 'HM_volume.4', '随访5流水号', 'HM_volume.5', '随访6流水号',
       'HM_volume.6', '随访7流水号', 'HM_volume.7', '随访8流水号', 'HM_volume.8'],
      dtype='object')

In [131]:
# 取时间做散点图
result_list = []
flow_cols = [col for col in _1a.columns if col.endswith('流水号')]
hm_cols = [col for col in _1a.columns if col.startswith('HM_')]
print(flow_cols, hm_cols)

# 修改寻访的变为以0开始的时间戳
cumulative_value_lists = []
p = 0 # 更新第几列
for i in range(8):
    cumulative_value_list = []
    q = 0
    for j,k,m in zip(_1a[flow_cols[i]], _1a[flow_cols[i+1]], _1a['发病到首次影像检查时间间隔']):
        if j == float(np.nan) or k == float(np.nan): # 无记录继承前面的记录
            cumulative_value = cumulative_value_lists[p-1][q]
        else:
            if i == 0:
                cumulative_value = 0
                cumulative_value += (m * 3600 + (k - j))
                cumulative_value_list.append(cumulative_value)
            else:
                cumulative_value = cumulative_value_lists[p-1][q] # 继承上一次检测的时间戳
                cumulative_value += (k - j)
                cumulative_value_list.append(cumulative_value)
        q += 1 # 更新第几行
    cumulative_value_lists.append(cumulative_value_list)
    p += 1

# 最后才更新数据，前面只存储了每个时间戳以0开始的值
for i in range(8):
    for j,k,m in zip(_1a[flow_cols[i]], _1a[flow_cols[i+1]], _1a['发病到首次影像检查时间间隔']):
        _1a[flow_cols[i+1]] = np.array(cumulative_value_lists[i])

# 修改首次入院检查时的时间戳
_1a['入院首次影像检查流水号'] = _1a['发病到首次影像检查时间间隔'] * 3600
# 此时_1a内流水号全为时间戳形式了

# 画总体的散点图
'''
# [(时间戳，体积大小)]
for i,j in zip(flow_cols, hm_cols):
    for l,m in zip(_1a[i],_1a[j]):
        if l <= 48 * 3600: # 只取48小时内的数据
            result_list.append((l,m))

# 去除空值再画图
result_list = [item for item in result_list if not any(math.isnan(value) for value in item)]

x_values = [item[0] for item in result_list]
y_values = [item[1] for item in result_list]

# 绘制散点图
plt.scatter(x_values, y_values)

# 添加标题和标签
plt.title('1a')
plt.xlabel('timestamp')
plt.ylabel('HM_volume')

# 显示图形
plt.show()
'''

selected_cols = []
for i,j in zip(flow_cols, hm_cols):
    selected_cols.append(i)
    selected_cols.append(j)
selected_df = _1a[selected_cols]

['入院首次影像检查流水号', '随访1流水号', '随访2流水号', '随访3流水号', '随访4流水号', '随访5流水号', '随访6流水号', '随访7流水号', '随访8流水号'] ['HM_volume', 'HM_volume.1', 'HM_volume.2', 'HM_volume.3', 'HM_volume.4', 'HM_volume.5', 'HM_volume.6', 'HM_volume.7', 'HM_volume.8']


In [132]:
selected_df

Unnamed: 0,入院首次影像检查流水号,HM_volume,随访1流水号,HM_volume.1,随访2流水号,HM_volume.2,随访3流水号,HM_volume.3,随访4流水号,HM_volume.4,随访5流水号,HM_volume.5,随访6流水号,HM_volume.6,随访7流水号,HM_volume.7,随访8流水号,HM_volume.8
0,9000.0,69714,29766.0,74902.0,475590.0,70952.0,935055.0,62831.0,1531938.0,44029.0,,,,,,,,
1,10800.0,47500,53724.0,52271.0,249199.0,47748.0,1612859.0,13055.0,4029091.0,20.0,,,,,,,,
2,7200.0,86396,34281.0,106042.0,142545.0,103263.0,,,,,,,,,,,,
3,3600.0,45498,61107.0,39877.0,301843.0,16622.0,819004.0,8441.0,,,,,,,,,,
4,18000.0,14832,95283.0,24472.0,352595.0,25477.0,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
155,19800.0,32559,89575.0,35138.0,452217.0,34241.0,1055942.0,14377.0,,,,,,,,,,
156,1800.0,18150,4920.0,37650.0,25620.0,140688.0,72240.0,123926.0,,,,,,,,,,
157,10080.0,27969,66180.0,27071.0,256680.0,24119.0,1111140.0,3647.0,,,,,,,,,,
158,10800.0,53154,95520.0,126642.0,,,,,,,,,,,,,,


In [133]:
selected_df.to_excel('1a数据_已替换时间戳_已矫正.xlsx', index=False)

In [None]:
from scipy import optimize

#线性
def liner_func(x,a,b,c,d):
    return a*x+b

#二次
def erchi_func(x,a,b,c,d):
    return a*x**2+b*x+c

#三次
def sanchi_func(x,a,b,c,d):
    return a*x**3+b*x**2+c*x+d

# 三角函数
def trig_func(x,a,b,c,d):
    return a*np.sin(x)+b*np.cos(x)+c
# 指数曲线
def target_func(x, a, b, c, d):
    return a * np.exp(-x / b) + c
# 对数函数
def hyp_func(x, a,b,c,d):
    return a*np.log(x)+b

def __sst(y_no_fitting):
    """
    计算SST(total sum of squares) 总平方和
    :param y_no_predicted: List[int] or array[int] 待拟合的y
    :return: 总平方和SST
    """
    y_mean = sum(y_no_fitting) / len(y_no_fitting)
    s_list =[(y - y_mean)**2 for y in y_no_fitting]
    sst = sum(s_list)
    return sst


def __ssr(y_fitting, y_no_fitting):
    """
    计算SSR(regression sum of squares) 回归平方和
    :param y_fitting: List[int] or array[int]  拟合好的y值
    :param y_no_fitting: List[int] or array[int] 待拟合y值
    :return: 回归平方和SSR
    """
    y_mean = sum(y_no_fitting) / len(y_no_fitting)
    s_list =[(y - y_mean)**2 for y in y_fitting]
    ssr = sum(s_list)
    return ssr


def __sse(y_fitting, y_no_fitting):
    """
    计算SSE(error sum of squares) 残差平方和
    :param y_fitting: List[int] or array[int] 拟合好的y值
    :param y_no_fitting: List[int] or array[int] 待拟合y值
    :return: 残差平方和SSE
    """
    s_list = [(y_fitting[i] - y_no_fitting[i])**2 for i in range(len(y_fitting))]
    sse = sum(s_list)
    return sse


def goodness_of_fit(y_fitting, y_no_fitting):
    """
    计算拟合优度R^2
    :param y_fitting: List[int] or array[int] 拟合好的y值
    :param y_no_fitting: List[int] or array[int] 待拟合y值
    :return: 拟合优度R^2
    """
    SSR = __ssr(y_fitting, y_no_fitting)
    SST = __sst(y_no_fitting)
    rr = SSR /SST
    return rr
def selected_func(you,model,model_select):
    maxyou=max(you)
    for s in range(len(you)):
        if you[s]==maxyou:
            return model[s],model_select[s]
        
#========================粒子群=======================================
import numpy as np



def particle_swarm_optimization(mm, objective_func, num_particles, max_iterations,parm):
    # 初始化参数
    dimensions = 1
    inertia = 0.5  # 惯性权重
    cognitive_weight = 1.0  # 学习因子
    social_weight = 1.0  # 学习因子
    min_bound = 0  # 变量的最小边界
    max_bound = 2  # 变量的最大边界

    # 初始化粒子的位置和速度
    particles = np.random.uniform(min_bound, max_bound, (num_particles, dimensions))
    velocities = np.zeros((num_particles, dimensions))
    if mm==1:
        # 初始化粒子的局部最佳位置和全局最佳位置
        personal_best_positions = particles.copy()
        global_best_position = particles[np.argmin(objective_func(particles,parm[0],parm[1],parm[2],parm[3]))]

        # 迭代更新粒子的速度和位置
        for _ in range(max_iterations):
            for i in range(num_particles):
                # 更新粒子的速度
                velocities[i] = (inertia * velocities[i] +
                                cognitive_weight * np.random.rand() * (personal_best_positions[i] - particles[i]) +
                                social_weight * np.random.rand() * (global_best_position - particles[i]))

                # 限制速度范围
                velocities[i] = np.clip(velocities[i], min_bound, max_bound)

                # 更新粒子的位置
                particles[i] += velocities[i]

                # 限制位置范围
                particles[i] = np.clip(particles[i], min_bound, max_bound)

                # 更新局部最佳位置和全局最佳位置
                if objective_func(particles[i],parm[0],parm[1],parm[2],parm[3]) < objective_func(personal_best_positions[i],parm[0],parm[1],parm[2],parm[3]):
                    personal_best_positions[i] = particles[i]

                if objective_func(particles[i],parm[0],parm[1],parm[2],parm[3]) < objective_func(global_best_position,parm[0],parm[1],parm[2],parm[3]):
                    global_best_position = particles[i]
        return global_best_position, objective_func(global_best_position,parm[0],parm[1],parm[2],parm[3])
    if mm==-1:
        # 初始化粒子的局部最佳位置和全局最佳位置
        personal_best_positions = particles.copy()
        global_best_position = particles[np.argmin(-objective_func(particles,parm[0],parm[1],parm[2],parm[3]))]

        # 迭代更新粒子的速度和位置
        for _ in range(max_iterations):
            for i in range(num_particles):
                # 更新粒子的速度
                velocities[i] = (inertia * velocities[i] +
                                cognitive_weight * np.random.rand() * (personal_best_positions[i] - particles[i]) +
                                social_weight * np.random.rand() * (global_best_position - particles[i]))

                # 限制速度范围
                velocities[i] = np.clip(velocities[i], min_bound, max_bound)

                # 更新粒子的位置
                particles[i] += velocities[i]

                # 限制位置范围
                particles[i] = np.clip(particles[i], min_bound, max_bound)

                # 更新局部最佳位置和全局最佳位置
                if -objective_func(particles[i],parm[0],parm[1],parm[2],parm[3]) < -objective_func(personal_best_positions[i],parm[0],parm[1],parm[2],parm[3]):
                    personal_best_positions[i] = particles[i]

                if -objective_func(particles[i],parm[0],parm[1],parm[2],parm[3]) < -objective_func(global_best_position,parm[0],parm[1],parm[2],parm[3]):
                    global_best_position = particles[i]
    # 输出结果
        return global_best_position, -objective_func(global_best_position,parm[0],parm[1],parm[2],parm[3])


        
        

Set={}
youSet={}
for index, row in selected_df.iterrows():
    if index >= 100:
        break
    # 去除NaN值
    data = row.values[~np.isnan(row.values)]
    # 将数据分成x和y坐标对
    x = data[::2] / (3600 * 24)
    y = data[1::2]/1000
    
    you=[]
    model=[]
    model_select=[]
    you_index={}
    try:
        a1 = np.polyfit(x, y, 1)#线性
        you1 = goodness_of_fit([liner_func(x[p],a1[0],a1[1]) for p in range(len(x))],y)
        you.append(you1)
        model.append({'回归类型':'线性回归','回归系数':a1})
        you_index['线性回归']=you1
        model_select.append((liner_func,a1+[0,0]))
    except:
        pass
    
    try:
        a2 = np.polyfit(x, y, 2)#二次
        you2 = goodness_of_fit([erchi_func(x[p],a2[0],a2[1],a2[2]) for p in range(len(x))],y)
        you.append(you2)
        model.append({'回归类型':'二次函数回归','回归系数':a2})
        you_index['二次函数回归']=you2
        model_select.append((erchi_func,a2+[0]))
    except:
        pass
    
    try:
        a3 = np.polyfit(x, y, 3)#三次
        you3 = goodness_of_fit([sanchi_func(x[p],a3[0],a3[1],a3[2],a3[3]) for p in range(len(x))],y)
        you.append(you3)
        model.append({'回归类型':'三次函数回归','回归系数':a3})
        you_index['三次函数回归']=you3
        model_select.append((sanchi_func,a3))
    except:
        pass
    
    #拟合三角函数模型
    try:
        a4,_=optimize.curve_fit(trig_func,x,y)
        you4 = goodness_of_fit([trig_func(x[p],a4[0],a4[1],a4[2]) for p in range(len(x))],y)
        you.append(you4)
        model.append({'回归类型':'三角函数回归','回归系数':a4})
        you_index['三角函数回归']=you4
        model_select.append((trig_func,a4+[0]))
        
    except:
        pass
    
    #拟合指数函数模型
    try:
        a5,_=optimize.curve_fit(target_func,x,y)
        you5 = goodness_of_fit([target_func(x[p], a5[0], a5[1], a5[2]) for p in range(len(x))],y)
        you.append(you5)
        model.append({'回归类型':'指数函数回归','回归系数':a5})
        you_index['指数函数回归']=you5
        model_select.append((target_func,a5+[0]))
    except:
        pass
    
    #拟合对数函数模型
    try:
        a6,_=optimize.curve_fit(hyp_func,x,y)
        you6 = goodness_of_fit([hyp_func(x[p], a6[0],a6[1]) for p in range(len(x))],y)
        you.append(you6)
        model.append({'回归类型':'对数函数回归','回归系数':a6})
        you_index['对数函数回归']=you6
        model_select.append((hyp_func,a6+[0,0]))
    except:
        pass
    
    # model=[f'线性回归，系数为{a1}',f'二次函数回归，系数为{a2}',f'三次函数回归，系数为{a3}',f'三角函数回归，系数为{a4}',f'指数函数回归，系数为{a5}',f'对数函数回归，系数为{a6}']
    res,model_=selected_func(you,model,model_select)
    
    Set[index]=res
    youSet[index]=you_index
    best_position, best_value = particle_swarm_optimization(1,model_[0], 50, 100,model_[1])
    best_value=round(float(best_value),4)
    print("最小值:",best_value)
    best_position, best_value = particle_swarm_optimization(-1,model_[0], 50, 100,model_[1])
    best_value=round(float(-best_value),4)
    print("最大值:", best_value)
    
dicts1 = [{key: value for key, value in Set[row].items()} for row in Set]
df1 = pd.DataFrame(dicts1)
df1.to_excel('血肿1a_100人拟合函数选择结果.xlsx')
dicts2 = [{key: value for key, value in youSet[row].items()} for row in youSet]
df2 = pd.DataFrame(dicts2)
df2.to_excel('血肿1a_100人拟合优度.xlsx')

最小值: 72.0475
最大值: 72.8884
最小值: 48.9889
最大值: 49.6399
最小值: 63.0587
最大值: 126.5183
最小值: 28.7249
最大值: 43.2154
最小值: 12.8142
最大值: 31.1798
最小值: 141.6898
最大值: 210.1421
最小值: 25.1009
最大值: 30.6711
最小值: 26.1919
最大值: 45.0621
最小值: 41.4982
最大值: 57.3845
最小值: 11.6497
最大值: 34.5445
最小值: 4.4614
最大值: 5.2428
最小值: 63.1578
最大值: 64.2348
最小值: 13.6857
最大值: 14.0256
最小值: 29.7303
