实现R语言参数生存模型。

# 指数回归模型

In [2]:
install.packages("flexsurv")







In [1]:
head(lung)



In [8]:
# 加载包
library(survival)
library(flexsurv)
library(survminer)

# 导入数据
data(lung)

# 拟合模型
fit_exp <- survreg(
  Surv(time, status) ~ age + sex,
  data = lung,
  dist = "exp"
)



In [11]:
AIC(fit_exp)



In [14]:
BIC(fit_exp)



In [11]:
# 拟合模型
fit_exp <- survreg(
  Surv(time, status) ~ 1,
  data = lung,
  dist = "exp"
)

In [None]:
# 计算lambda风险函数
lambda <- 1 / exp(coef(fit_exp))  # 系数的乘积，这里只放了截距项

# 计算S(t)生存函数
surv_exp <- function(t) {
  exp(-lambda * t)
}

# 设定时间
time_seq <- seq(0, 1000, by = 0.1)

# 预测概率
surv_prob_exp <- surv_exp(time_seq)

# 绘制 Kaplan-Meier 曲线
km_fit <- survfit(Surv(time, status) ~ 1, data = lung)
plot(
  km_fit,
  col = "blue",
  lwd = 2,
  main = "KM Curve and Exp Regression",
  xlab = "Time",
  ylab = "Survival Probability",
  xlim = c(0, max(lung$time)),
  ylim = c(0, 1)
)

# 在同一图中添加指数回归生存函数
lines(time_seq, surv_prob_exp, col = "red", lwd = 2, lty = 2)



In [38]:
# 加载包
library(survival)
library(flexsurv)
library(survminer)

# 导入数据
data(lung)

# 拟合模型
fit_exp <- flexsurvreg(
  Surv(time, status) ~ 1,
  data = lung,
  dist = "exp"
)



In [39]:
fit_exp$res



# Weibull回归模型

In [2]:
# 加载包
library(survival)
library(flexsurv)
library(survminer)

# 导入数据
data(lung)

# 拟合参数生存模型
fit_weibull <- survreg(
  Surv(time, status) ~ 1,
  data = lung,
  dist = "weibull"
)



In [7]:
# 设定尺度参数
# exp非常关键
scale  <- exp(coef(fit_weibull)[1])  # 如果有预测变量，要将系数与变量相乘

# 设定形状参数
shape <- 1 / fit_weibull$scale

# 设定时间
time_seq  <-  seq(0, 1000, 0.1)

# 计算生存函数
surv_prob_weibull <- 1 - pweibull(time_seq, shape = shape, scale = scale)

# 绘制 Kaplan-Meier 曲线
km_fit <- survfit(Surv(time, status) ~ 1, data = lung)
plot(
  km_fit,
  col = "blue",
  lwd = 2,
  main = "KM Curve and Weibull Regression",
  xlab = "Time",
  ylab = "Survival Probability",
  xlim = c(0, max(lung$time)),
  ylim = c(0, 1)
)

# 在同一图中添加weibull回归生存函数
lines(time_seq, surv_prob_weibull, col = "red", lwd = 2, lty = 2)



# Gompertz回归

In [44]:
# 加载包
library(survival)
library(flexsurv)
library(survminer)

# 导入数据
data(lung)

# 拟合参数生存模型
fit_gompertz <- flexsurvreg(
  Surv(time, status) ~ 1,
  data = lung,
  dist = "gompertz"
)



In [43]:
fit_gompertz$res



In [29]:
fit_gompertz$res[2, 1]



In [34]:
# 提取参数
shape  <- fit_gompertz$res[1, 1]
# 或者
# shape <- coef(fit_gompertz)[1] 
# 如果有预测变量，要将系数与变量相乘
rate <- fit_gompertz$res[2, 1]

# 设定时间
time_seq <- seq(0, 1000, 0.1)

# 计算生存函数
surv_prob_gompertz <- 1 - pgompertz(time_seq, shape = shape, rate = rate)

# 绘制 Kaplan-Meier 曲线
km_fit <- survfit(Surv(time, status) ~ 1, data = lung)
plot(
  km_fit,
  col = "blue",
  lwd = 2,
  main = "KM Curve and Gompertz Regression",
  xlab = "Time",
  ylab = "Survival Probability",
  xlim = c(0, max(lung$time)),
  ylim = c(0, 1)
)

# 在同一图中添加gompertz回归生存函数
lines(time_seq, surv_prob_gompertz, col = "red", lwd = 2, lty = 2)



# 对数正态模型

In [84]:
# 加载必要的包
library(survival)

# 使用 lung 数据集
data(lung)

# 拟合对数正态回归模型，不考虑任何协变量
fit_lognormal <- survreg(
  Surv(time, status) ~ 1, 
  data = lung, 
  dist = "lognormal"
)



In [96]:
# 设定总体均数
mean = coef(fit_lognormal)[1]  # 如果有预测变量，要将系数与变量相乘

# 设定总体标准差
sd = fit_lognormal$scale

# 设定时间并取对数
time_seq = seq(0, 1000, 0.1)
log_time = log(time_seq)

# 进行预测
surv_prob_lognormal <- pnorm(log_time, mean = mean, sd = sd)

# 绘制对数正态回归生存曲线
plot(time_seq, 1-surv_prob_lognormal, col = "red", lwd = 2, type = "l",
     main = "Log-normal Survival Function", xlab = "Time", ylab = "Survival Probability")



In [90]:
# 加载必要的包
library(survival)

# 使用 lung 数据集
data(lung)
lung <- na.omit(lung)  # 去除缺失数据

# 拟合对数正态回归模型，不考虑任何协变量
lognormal_fit <- survreg(Surv(time, status) ~ 1, data = lung, dist = "lognormal")

# 输出模型参数
summary(lognormal_fit)

# 设定总体均数和标准差
mean <- coef(lognormal_fit)[1]  # 取得回归系数，即均值
sd <- lognormal_fit$scale  # 取得尺度参数

# 设定时间序列并取对数
time_seq <- seq(0, max(lung$time), length.out = 100)
log_time <- log(time_seq)

# 计算对数正态生存函数
lognormal_surv_probs <- pnorm(log_time, mean = mean, sd = sd)

# 绘制 Kaplan-Meier 曲线
km_fit <- survfit(Surv(time, status) ~ 1, data = lung)
plot(km_fit, col = "blue", lwd = 2, main = "KM Curve and Log-normal Regression", 
     xlab = "Time", ylab = "Survival Probability", xlim = c(0, max(lung$time)), ylim = c(0, 1))

# 在同一图中添加对数正态回归生存函数
lines(time_seq, 1 - lognormal_surv_probs, col = "red", lwd = 2, lty = 2)







# 对数逻辑模型

In [91]:
# 加载必要的包
library(survival)

# 使用 lung 数据集
data(lung)

# 拟合对数正态回归模型，不考虑任何协变量
fit_loglogit <- survreg(
  Surv(time, status) ~ 1, 
  data = lung, 
  dist = "loglogistic"
)



In [98]:
# 设定参数
mu <- coef(fit_loglogit)[1]  # 如果有预测变量，要将系数与变量相乘
gamma <- fit_loglogit$scale

# 设定时间并取对数
time_seq <- seq(0, max(lung$time), length.out = 100)
log_time <- log(time_seq)

# 预测概率
surv_prob_loglogit <- 1 / (1 + exp(-(log_time - mu) / gamma))

# 绘制对数逻辑回归生存曲线
plot(time_seq, 1-surv_prob_loglogit, col = "red", lwd = 2, type = "l",
     main = "Log-logistic Survival Function", xlab = "Time", ylab = "Survival Probability")



In [99]:
# 绘制 Kaplan-Meier 曲线
km_fit <- survfit(Surv(time, status) ~ 1, data = lung)
plot(km_fit, col = "blue", lwd = 2, main = "KM Curve and Log-logistic Regression", 
     xlab = "Time", ylab = "Survival Probability", xlim = c(0, max(lung$time)), ylim = c(0, 1))

# 在同一图中添加对数正态回归生存函数
lines(time_seq, 1 - surv_prob_loglogit, col = "red", lwd = 2, lty = 2)



# 伽马回归

In [None]:
# 拟合Gamma分布
fit_gamma <- flexsurvreg(
  Surv(time, status) ~ 1,
  data = lung,
  dist = "gamma"
)

In [84]:
fit_gamma$res



In [92]:
fit_gamma$res[1, 1]



In [None]:
# 提取参数
alpha <- 1 / fit_gamma$res[1, 1]^2
beta <- alpha / exp(coef(fit_gamma)[1])  # 如果有预测变量，要将系数与变量相乘

# 设定时间
time_seq <- seq(0, 1000, 0.1)

# 计算生存函数
surv_prob_gamma <- 1 - pgamma(time_seq, shape = alpha, scale = beta)

# 绘制 Kaplan-Meier 曲线
km_fit <- survfit(Surv(time, status) ~ 1, data = lung)

plot(
  km_fit,
  col = "blue",
  lwd = 2,
  main = "KM Curve and Gamma Regression",
  xlab = "Time",
  ylab = "Survival Probability",
  xlim = c(0, max(lung$time)),
  ylim = c(0, 1)
)

# 在同一图中添加gamma回归生存函数
lines(time_seq, surv_prob_gamma, col = "red", lwd = 2, lty = 2)



# 广义伽马回归

In [None]:
# 加载必要的包
library(survival)

# 使用 lung 数据集
data(lung)

# 拟合广义伽马模型，不考虑任何协变量
fit_gengamma <- flexsurvreg(
  Surv(time, status) ~ 1,
  data = lung,
  dist = "gengamma"
)



In [54]:
fit_gengamma$res



In [56]:
# 提取参数
mu <- coef(fit_gengamma)[1]  # 如果有预测变量，要将系数与变量相乘
sigma <- fit_gengamma$res[2, 1]
Q <- fit_gengamma$res[3, 1]

# 设定时间
time_seq <- seq(0, 1000, 0.1)

# 计算生存函数
surv_prob_gengamma <- 1 - pgengamma(time_seq, mu = mu, sigma = sigma, Q = Q)

# 绘制 Kaplan-Meier 曲线
km_fit <- survfit(Surv(time, status) ~ 1, data = lung)

plot(
  km_fit,
  col = "blue",
  lwd = 2,
  main = "KM Curve and Generalized Gamma Regression",
  xlab = "Time",
  ylab = "Survival Probability",
  xlim = c(0, max(lung$time)),
  ylim = c(0, 1)
)

# 在同一图中添加gengamma回归生存函数
lines(time_seq, surv_prob_gengamma, col = "red", lwd = 2, lty = 2)



In [59]:
# 基于伽马分布实现广义伽马分布预测

# 广义伽马提取参数
mu <- coef(fit_gengamma)[1]  # 如果有预测变量，要将系数与变量相乘
sigma <- fit_gengamma$res[2, 1]
Q <- fit_gengamma$res[3, 1]

# 确定伽马分布参数
alpha <- 1/Q^2
beta  <- 1

# 设定时间
time_seq <- seq(0, 1000, 0.1)

# 将time_seq转化为伽马分布输入time_seq_gamma
time_seq_gamma <- 1/Q^2 * exp(Q * ((log(time_seq) - mu) / sigma))

# 计算生存函数
surv_prob_gengamma_gamma <- 1 - pgamma(time_seq_gamma, shape = alpha, rate = beta)

# 绘制 Kaplan-Meier 曲线
km_fit <- survfit(Surv(time, status) ~ 1, data = lung)

plot(
  km_fit,
  col = "blue",
  lwd = 2,
  main = "KM Curve and Generalized Gamma Regression",
  xlab = "Time",
  ylab = "Survival Probability",
  xlim = c(0, max(lung$time)),
  ylim = c(0, 1)
)

# 在同一图中添加gengamma回归生存函数
lines(time_seq, surv_prob_gengamma_gamma, col = "red", lwd = 2, lty = 2)



# 大一统模型

我将构建一个一步出所有生存模型参数结果以及AIC、BIC，图像的函数，拯救世人！

## 指数回归部分

In [3]:
# 加载必要的包
library(survival)
library(flexsurv)
library(survminer)

In [4]:
data  <- lung # 测试数据

In [5]:
# 拟合指数回归模型
fit_exp <- survreg(
  Surv(time, status) ~ 1,
  data = data,
  dist = "exp"
)

# 提取指数分布参数以及AIC和BIC
params_exp <- c(
  1 / exp(coef(fit_exp)),  # Lambda
  NA,  # Gamma
  NA,  # Delta
  AIC(fit_exp),  # AIC
  BIC(fit_exp)  # BIC
)

In [6]:
# 画图验证一下
# 计算lambda风险函数
lambda <- params_exp[1]

# 计算S(t)生存函数
surv_exp <- function(t) {
  exp(-lambda * t)
}

# 设定时间
time_seq <- seq(0, 1000, by = 0.1)

# 预测概率
surv_prob_exp <- surv_exp(time_seq)

# 绘制 Kaplan-Meier 曲线
km_fit <- survfit(Surv(time, status) ~ 1, data = data)
plot(
  km_fit,
  col = "blue",
  lwd = 2,
  main = "KM Curve and Exp Regression",
  xlab = "Time",
  ylab = "Survival Probability",
  xlim = c(0, max(data$time)),
  ylim = c(0, 1)
)

# 在同一图中添加指数回归生存函数
lines(time_seq, surv_prob_exp, col = "red", lwd = 2, lty = 2)



## Weibull回归部分

In [7]:
# 拟合Weibull回归模型
fit_weibull <- survreg(
  Surv(time, status) ~ 1,
  data = data,
  dist = "weibull"
)

# 提取Weibull分布参数以及AIC和BIC
params_weibull <- c(
  1 / fit_weibull$scale,  # Lambda
  exp(coef(fit_weibull)[1]),  # Gamma
  NA,  # Delta
  AIC(fit_weibull),  # AIC
  BIC(fit_weibull)  # BIC
)

In [8]:
# 设定时间
time_seq  <-  seq(0, 1000, 0.1)

# 计算生存函数
surv_prob_weibull <- 1 - pweibull(
  time_seq,
  shape = params_weibull[1],
  scale = params_weibull[2]
)

# 绘制 Kaplan-Meier 曲线
km_fit <- survfit(Surv(time, status) ~ 1, data = lung)
plot(
  km_fit,
  col = "blue",
  lwd = 2,
  main = "KM Curve and Weibull Regression",
  xlab = "Time",
  ylab = "Survival Probability",
  xlim = c(0, max(lung$time)),
  ylim = c(0, 1)
)

# 在同一图中添加weibull回归生存函数
lines(time_seq, surv_prob_weibull, col = "red", lwd = 2, lty = 2)



## Gompertz部分

In [27]:
set.seed(12345)

# 拟合Gompertz回归模型
fit_gompertz <- flexsurvreg(
  Surv(time, status) ~ 1,
  data = data,
  dist = "gompertz"
)

# 提取Gompertz分布参数以及AIC和BIC
params_gompertz <- c(
  fit_gompertz$res[2, 1],  # Lambda
  fit_gompertz$res[1, 1],  # Gamma
  NA,  # Delta
  AIC(fit_gompertz),  # AIC
  BIC(fit_gompertz)  # BIC
)

In [28]:
fit_gompertz$res



In [29]:
# 设定时间
time_seq <- seq(0, 1000, 0.1)

# 计算生存函数
surv_prob_gompertz <- 1 - pgompertz(time_seq, shape = params_gompertz[2], rate = params_gompertz[1])

# 绘制 Kaplan-Meier 曲线
km_fit <- survfit(Surv(time, status) ~ 1, data = lung)
plot(
  km_fit,
  col = "blue",
  lwd = 2,
  main = "KM Curve and Gompertz Regression",
  xlab = "Time",
  ylab = "Survival Probability",
  xlim = c(0, max(lung$time)),
  ylim = c(0, 1)
)

# 在同一图中添加gompertz回归生存函数
lines(time_seq, surv_prob_gompertz, col = "red", lwd = 2, lty = 2)



## 对数正态回归部分

In [74]:
# 拟合对数正态回归模型
fit_lognormal <- survreg(
  Surv(time, status) ~ 1,
  data = data, 
  dist = "lognormal"
)

# 提取对数正态分布参数以及AIC和BIC
params_lognormal <- c(
  coef(fit_lognormal)[1],  # Lambda
  fit_lognormal$scale,  # Gamma
  NA,  # Delta
  AIC(fit_lognormal),  # AIC
  BIC(fit_lognormal)  # BIC
)

In [78]:
# 设定时间并取对数
time_seq = seq(0, 1000, 0.1)
log_time = log(time_seq)

# 进行预测
surv_prob_lognormal <- 1 - pnorm(log_time, mean = params_lognormal[1], sd = params_lognormal[2])

# 绘制 Kaplan-Meier 曲线
km_fit <- survfit(Surv(time, status) ~ 1, data = lung)
plot(km_fit, col = "blue", lwd = 2, main = "KM Curve and Log-normal Regression", 
     xlab = "Time", ylab = "Survival Probability", xlim = c(0, max(lung$time)), ylim = c(0, 1))

# 在同一图中添加对数正态回归生存函数
lines(time_seq, surv_prob_lognormal, col = "red", lwd = 2, lty = 2)



## 对数逻辑部分

In [79]:
# 拟合对数逻辑回归模型
fit_loglogit <- survreg(
  Surv(time, status) ~ 1,
  data = data, 
  dist = "loglogistic"
)

# 提取对数逻辑分布参数以及AIC和BIC
params_loglogit <- c(
  coef(fit_loglogit)[1],  # Lambda
  fit_loglogit$scale,  # Gamma
  NA,  # Delta
  AIC(fit_loglogit),  # AIC
  BIC(fit_loglogit)  # BIC
)

In [81]:
# 设定参数
mu <- coef(fit_loglogit)[1]  # 如果有预测变量，要将系数与变量相乘
gamma <- fit_loglogit$scale

# 设定时间并取对数
time_seq <- seq(0, max(lung$time), length.out = 100)
log_time <- log(time_seq)

# 预测概率
surv_prob_loglogit <- 1 / (1 + exp(-(log_time - mu) / gamma))

# 绘制 Kaplan-Meier 曲线
km_fit <- survfit(Surv(time, status) ~ 1, data = lung)
plot(km_fit, col = "blue", lwd = 2, main = "KM Curve and Log-logistic Regression", 
     xlab = "Time", ylab = "Survival Probability", xlim = c(0, max(lung$time)), ylim = c(0, 1))

# 在同一图中添加对数正态回归生存函数
lines(time_seq, 1 - surv_prob_loglogit, col = "red", lwd = 2, lty = 2)



## 广义伽马回归

In [1]:
# 加载必要的包
library(survival)
library(flexsurv)
library(survminer)



In [5]:
# 拟合广义伽马模型，不考虑任何协变量
fit_gengamma <- flexsurvreg(
  Surv(time, status) ~ 1,
  data = lung,
  dist = "gengamma"
)

# 提取广义伽马参数以及AIC和BIC
params_gengamma <- c(
  coef(fit_gengamma)[1, 1],  # Mu
  fit_gengamma$res[2, 1],  # Sigma
  fit_gengamma$res[3, 1],  # Q
  AIC(fit_gengamma),  # AIC
  BIC(fit_gengamma)  # BIC
)



In [6]:
fit_gengamma$res



In [7]:
# 设定时间
time_seq <- seq(0, 1000, 0.1)

# 计算生存函数
surv_prob_gengamma <- 1 - pgengamma(
  time_seq,
  mu = params_gengamma[1],
  sigma = params_gengamma[2],
  Q = params_gengamma[3]
)

# 绘制 Kaplan-Meier 曲线
km_fit <- survfit(Surv(time, status) ~ 1, data = lung)

plot(
  km_fit,
  col = "blue",
  lwd = 2,
  main = "KM Curve and Generalized Gamma Regression",
  xlab = "Time",
  ylab = "Survival Probability",
  xlim = c(0, max(lung$time)),
  ylim = c(0, 1)
)

# 在同一图中添加gengamma回归生存函数
lines(time_seq, surv_prob_gengamma, col = "red", lwd = 2, lty = 2)



## 构造统一模型

In [None]:
PSM_god <- function(data) {
  # 这是一个PSM的神级函数，功能包括：
  # 拟合6种模型并返回参数，返回AIC和BIC
  # 一图展示KM曲线与拟合曲线，直观明了
  # 将预测概率存到一个EXCEL中，以供测验
  # 提供对应参数的EXCEL公式，方便使用

  # 设置数值显示格式，禁用科学计数法
  old_scipen <- options("scipen")
  options(scipen = 999)  # 设置一个大值来禁用科学计数法

  
  # 加载必要的包
  library(survival)
  library(flexsurv)
  library(survminer)
  library(ggplot2)
  library(tidyr)
  library(dplyr)


  # 拟合指数回归模型
  fit_exp <- survreg(
    Surv(time, status) ~ 1,
    data = data,
    dist = "exp"
  )
  # 提取指数分布参数以及AIC和BIC
  params_exp <- c(
    1 / exp(coef(fit_exp)),  # Lambda
    NA,  # Gamma
    NA,  # Delta
    AIC(fit_exp),  # AIC
    BIC(fit_exp)  # BIC
  )


  # 拟合Weibull回归模型
  fit_weibull <- survreg(
    Surv(time, status) ~ 1,
    data = data,
    dist = "weibull"
  )
  # 提取Weibull分布参数以及AIC和BIC
  params_weibull <- c(
    1 / fit_weibull$scale,  # Lambda
    exp(coef(fit_weibull)[1]),  # Gamma
    NA,  # Delta
    AIC(fit_weibull),  # AIC
    BIC(fit_weibull)  # BIC
  )


  # 拟合Gompertz回归模型
  fit_gompertz <- flexsurvreg(
    Surv(time, status) ~ 1,
    data = data,
    dist = "gompertz"
  )
  # 提取Gompertz分布参数以及AIC和BIC
  params_gompertz <- c(
    fit_gompertz$res[2, 1],  # Lambda
    fit_gompertz$res[1, 1],  # Gamma
    NA,  # Delta
    AIC(fit_gompertz),  # AIC
    BIC(fit_gompertz)  # BIC
  )


  # 拟合对数正态回归模型
  fit_lognormal <- survreg(
    Surv(time, status) ~ 1,
    data = data, 
    dist = "lognormal"
  )
  # 提取对数正态分布参数以及AIC和BIC
  params_lognormal <- c(
    coef(fit_lognormal)[1],  # Lambda
    fit_lognormal$scale,  # Gamma
    NA,  # Delta
    AIC(fit_lognormal),  # AIC
    BIC(fit_lognormal)  # BIC
  )


  # 拟合对数逻辑回归模型
  fit_loglogit <- survreg(
    Surv(time, status) ~ 1,
    data = data,
    dist = "loglogistic"
  )
  # 提取对数逻辑分布参数以及AIC和BIC
  params_loglogit <- c(
    coef(fit_loglogit)[1],  # Lambda
    fit_loglogit$scale,  # Gamma
    NA,  # Delta
    AIC(fit_loglogit),  # AIC
    BIC(fit_loglogit)  # BIC
  )


  # 拟合广义伽马模型，不考虑任何协变量
  fit_gengamma <- flexsurvreg(
    Surv(time, status) ~ 1,
    data = data,
    dist = "gengamma"
  )

  # 提取广义伽马参数以及AIC和BIC
  params_gengamma <- c(
    coef(fit_gengamma)[1],  # Mu
    fit_gengamma$res[2, 1],  # Sigma
    fit_gengamma$res[3, 1],  # Q
    AIC(fit_gengamma),  # AIC
    BIC(fit_gengamma)  # BIC
  )

  
  # 以数据表形式打印结果
  # 结果数据框中格式化显示
  result <- data.frame(
    row.names = c(
      "Exp", "Weibull", "Gompertz", 
      "Log-normal", "Log-logistic", "Generalized Gamma"
    ),
    LAMBDA = format(c(
      params_exp[1], params_weibull[1], params_gompertz[1],
      params_lognormal[1], params_loglogit[1], params_gengamma[1]
    ), scientific = FALSE),
    GAMMA = format(c(
      params_exp[2], params_weibull[2], params_gompertz[2],
      params_lognormal[2], params_loglogit[2], params_gengamma[2]
    ), scientific = FALSE),
    DELTA = format(c(
      params_exp[3], params_weibull[3], params_gompertz[3],
      params_lognormal[3], params_loglogit[3], params_gengamma[3]
    ), scientific = FALSE),
    AIC = format(c(
      params_exp[4], params_weibull[4], params_gompertz[4],
      params_lognormal[4], params_loglogit[4], params_gengamma[4]
    ), scientific = FALSE),
    BIC = format(c(
      params_exp[5], params_weibull[5], params_gompertz[5],
      params_lognormal[5], params_loglogit[5], params_gengamma[5]
    ), scientific = FALSE)
  )

  # 打印结果
  cat("PSM Results:", "\n")
  print(result)
  cat("---------------------------------------------------------------------", "\n")

  # 指出最佳模型
  best_model <- rownames(result)[which.min(result$AIC)]
  cat("The best model is", best_model, "\n")
  cat("AIC:", result[best_model, "AIC"], "\n")
  cat("BIC:", result[best_model, "BIC"], "\n")
  cat("---------------------------------------------------------------------", "\n")

  # 恢复原来的设置
  options(scipen = old_scipen$scipen)

  # 对应EXCEL公式
  cat("Excel formula", "\n")
  cat("Exp:", "1 - EXPON.DIST(x = time, lambda = Lambda, TRUE)", "\n")
  cat("Weibull:", "1 - WEIBULL(x = time, alpha = Lambda, beta = Gammma, TRUE)", "\n")
  cat("Gompertz:", "EXP( -LAMBDA / GAMMA * (EXP(GAMMA * time) - 1) )", "\n")
  cat("Log-normal:", "1 - NORM.DIST(LN(time), mean = LAMBDA, standard_dev = GAMMA, TRUE)", "\n")
  cat("Log-logistic:", "1 - 1 / (1 + EXP(-(LN(time) - LAMBDA) / GAMMA))", "\n")
  cat("Generalized Gamma:", "1 - GAMMA.DIST(x = 1/Delta^2 * EXP(Delta * (LN(time)-LAMBDA) / GAMMA), ALPHA = Delta^(-2), Beta = 1, TRUE)", "\n")
  cat("---------------------------------------------------------------------", "\n")
  
  # 填入对应的参数的EXCEL公式
  cat("Excel formula with parameters", "\n")
  cat("Exp:", "1 - EXPON.DIST(time, ", params_exp[1], ", TRUE)", "\n")
  cat("Weibull:", "1 - WEIBULL(time, ", params_weibull[1], ", ", params_weibull[2], ", TRUE)", "\n")
  cat("Gompertz:", "EXP( -", params_gompertz[1], " / ", params_gompertz[2], " * (EXP(", params_gompertz[2], " * time) - 1) )", "\n")
  cat("Log-normal:", "1 - NORM.DIST(LN(time), ", params_lognormal[1], ", ", params_lognormal[2], ", TRUE)", "\n")
  cat("Log-logistic:", "1 - 1 / (1 + EXP(-(LN(time) - ", params_loglogit[1], ") / ", params_loglogit[2], "))", "\n")
  cat("Generalized Gamma:", "1 - GAMMA.DIST(x = 1/", params_gengamma[3], "^2 * EXP(", params_gengamma[3], " * (LN(time)-", params_gengamma[1], ") / ", params_gengamma[2], "), ", 1/params_gengamma[3]^2, ", Beta = 1, TRUE)", "\n")
  cat("---------------------------------------------------------------------", "\n")

  # 绘制所有曲线与KM曲线，标签标明
  # 设定时间
  time_seq <- seq(0, 1000, 0.1)

  # 计算生存函数
  surv_prob_exp <- 1 - pexp(time_seq, rate = params_exp[1])
  surv_prob_weibull <- 1 - pweibull(
    time_seq,
    shape = params_weibull[1],
    scale = params_weibull[2]
  )
  surv_prob_gompertz <- 1 - pgompertz(
    time_seq,
    shape = params_gompertz[2],
    rate = params_gompertz[1]
  )
  surv_prob_lognormal <- 1 - pnorm(
    log(time_seq),
    mean = params_lognormal[1],
    sd = params_lognormal[2]
  )
  surv_prob_loglogit <- 1 - 1 / (1 + exp(-(log(time_seq) - params_loglogit[1]) / params_loglogit[2]))
  surv_prob_gengamma <- 1 - pgengamma(
    time_seq,
    mu = params_gengamma[1],
    sigma = params_gengamma[2],
    Q = params_gengamma[3]
  )

  # 绘制 Kaplan-Meier 曲线
  km_fit <- survfit(Surv(time, status) ~ 1, data = lung)
  plot(
    km_fit,
    col = "#ff5e00",
    lwd = 2,
    main = "KM Curve and Regression",
    xlab = "Time",
    ylab = "Survival Probability",
    xlim = c(0, max(lung$time)),
    ylim = c(0, 1)
  )

  # 在同一图中添加所有回归生存函数
  lines(time_seq, surv_prob_exp, col = "red", lwd = 2, lty = 2)
  lines(time_seq, surv_prob_weibull, col = "green", lwd = 2, lty = 2)
  lines(time_seq, surv_prob_gompertz, col = "blue", lwd = 2, lty = 2)
  lines(time_seq, surv_prob_lognormal, col = "purple", lwd = 2, lty = 2)
  lines(time_seq, surv_prob_loglogit, col = "orange", lwd = 2, lty = 2)
  lines(time_seq, surv_prob_gengamma, col = "black", lwd = 2, lty = 2)

  # 添加图例
  legend(
    "topright",
    legend = c(
      "KM", "Exp", "Weibull", "Gompertz",
      "Log-normal", "Log-logistic", "Generalized Gamma"
    ),
    col = c(
      "#ff5e00", "red", "green","blue",
      "purple", "orange", "black"
    ),
    lty = 1,
    lwd = 2
  )


  # 将预测概率存到一个excel中，以供测验
  write.csv(
    data.frame(
      time = time_seq,
      exp = surv_prob_exp,
      weibull = surv_prob_weibull,
      gompertz = surv_prob_gompertz,
      lognormal = surv_prob_lognormal,
      loglogit = surv_prob_loglogit,
      gengamma = surv_prob_gengamma
    ),
    "survival_probabilities.csv",
    row.names = FALSE
  )
}

PSM_god(lung)





In [None]:
PSM_god <- function(data) {
  # 这是一个PSM的神级函数，功能包括：
  # 拟合6种模型并返回参数，返回AIC和BIC
  # 一图展示KM曲线与拟合曲线，直观明了
  # 将预测概率存到一个EXCEL中，以供测验
  # 提供对应参数的EXCEL公式，方便使用

  # 设置数值显示格式，禁用科学计数法
  old_scipen <- options("scipen")
  options(scipen = 999)  # 设置一个大值来禁用科学计数法

  
  # 加载必要的包
  library(survival)
  library(flexsurv)
  library(survminer)
  library(ggplot2)
  library(tidyr)
  library(dplyr)


  # 拟合指数回归模型
  fit_exp <- survreg(
    Surv(time, status) ~ 1,
    data = data,
    dist = "exp"
  )
  # 提取指数分布参数以及AIC和BIC
  params_exp <- c(
    1 / exp(coef(fit_exp)),  # Lambda
    NA,  # Gamma
    NA,  # Delta
    AIC(fit_exp),  # AIC
    BIC(fit_exp)  # BIC
  )


  # 拟合Weibull回归模型
  fit_weibull <- survreg(
    Surv(time, status) ~ 1,
    data = data,
    dist = "weibull"
  )
  # 提取Weibull分布参数以及AIC和BIC
  params_weibull <- c(
    1 / fit_weibull$scale,  # Lambda
    exp(coef(fit_weibull)[1]),  # Gamma
    NA,  # Delta
    AIC(fit_weibull),  # AIC
    BIC(fit_weibull)  # BIC
  )


  # 拟合Gompertz回归模型
  fit_gompertz <- flexsurvreg(
    Surv(time, status) ~ 1,
    data = data,
    dist = "gompertz"
  )
  # 提取Gompertz分布参数以及AIC和BIC
  params_gompertz <- c(
    fit_gompertz$res[2, 1],  # Lambda
    fit_gompertz$res[1, 1],  # Gamma
    NA,  # Delta
    AIC(fit_gompertz),  # AIC
    BIC(fit_gompertz)  # BIC
  )


  # 拟合对数正态回归模型
  fit_lognormal <- survreg(
    Surv(time, status) ~ 1,
    data = data, 
    dist = "lognormal"
  )
  # 提取对数正态分布参数以及AIC和BIC
  params_lognormal <- c(
    coef(fit_lognormal)[1],  # Lambda
    fit_lognormal$scale,  # Gamma
    NA,  # Delta
    AIC(fit_lognormal),  # AIC
    BIC(fit_lognormal)  # BIC
  )


  # 拟合对数逻辑回归模型
  fit_loglogit <- survreg(
    Surv(time, status) ~ 1,
    data = data,
    dist = "loglogistic"
  )
  # 提取对数逻辑分布参数以及AIC和BIC
  params_loglogit <- c(
    coef(fit_loglogit)[1],  # Lambda
    fit_loglogit$scale,  # Gamma
    NA,  # Delta
    AIC(fit_loglogit),  # AIC
    BIC(fit_loglogit)  # BIC
  )


  # 拟合广义伽马模型，不考虑任何协变量
  fit_gengamma <- flexsurvreg(
    Surv(time, status) ~ 1,
    data = lung,
    dist = "gengamma"
  )

  # 提取广义伽马参数以及AIC和BIC
  params_gengamma <- c(
    coef(fit_gengamma)[1],  # Mu
    fit_gengamma$res[2, 1],  # Sigma
    fit_gengamma$res[3, 1],  # Q
    AIC(fit_gengamma),  # AIC
    BIC(fit_gengamma)  # BIC
  )

  # 拟合伽马模型
  fit_gamma <- flexsurvreg(
    Surv(time, status) ~ 1,
    data = lung,
    dist = "gamma"
  )

  # 提取广义伽马参数以及AIC和BIC
  params_gamma <- c(
    coef(fit_gamma)[1],  # Mu
    fit_gamma$res[2, 1],  # Sigma
    NA,  # Q
    AIC(fit_gamma),  # AIC
    BIC(fit_gamma)  # BIC
  )

  
  # 以数据表形式打印结果
  # 结果数据框中格式化显示
  result <- data.frame(
    row.names = c(
      "Exp", "Weibull", "Gompertz", 
      "Log-normal", "Log-logistic", "Generalized Gamma"
    ),
    LAMBDA = format(c(
      params_exp[1], params_weibull[1], params_gompertz[1],
      params_lognormal[1], params_loglogit[1], params_gengamma[1],
      params_gamma[1]
    ), scientific = FALSE),
    GAMMA = format(c(
      params_exp[2], params_weibull[2], params_gompertz[2],
      params_lognormal[2], params_loglogit[2], params_gengamma[2],
      params_gamma[2]
    ), scientific = FALSE),
    DELTA = format(c(
      params_exp[3], params_weibull[3], params_gompertz[3],
      params_lognormal[3], params_loglogit[3], params_gengamma[3],
      params_gamma[3]
    ), scientific = FALSE),
    AIC = format(c(
      params_exp[4], params_weibull[4], params_gompertz[4],
      params_lognormal[4], params_loglogit[4], params_gengamma[4],
      params_gamma[4]
    ), scientific = FALSE),
    BIC = format(c(
      params_exp[5], params_weibull[5], params_gompertz[5],
      params_lognormal[5], params_loglogit[5], params_gengamma[5],
      params_gamma[5]
    ), scientific = FALSE)
  )

  # 打印结果
  cat("PSM Results:", "\n")
  print(result)
  cat("---------------------------------------------------------------------", "\n")

  # 指出最佳模型
  best_model <- rownames(result)[which.min(result$AIC)]
  cat("The best model is", best_model, "\n")
  cat("AIC:", result[best_model, "AIC"], "\n")
  cat("BIC:", result[best_model, "BIC"], "\n")
  cat("---------------------------------------------------------------------", "\n")

  # 恢复原来的设置
  options(scipen = old_scipen$scipen)

  # 对应EXCEL公式
  cat("Excel formula", "\n")
  cat("Exp:", "1 - EXPON.DIST(x = time, lambda = Lambda, TRUE)", "\n")
  cat("Weibull:", "1 - WEIBULL(x = time, alpha = Lambda, beta = Gammma, TRUE)", "\n")
  cat("Gompertz:", "EXP( -LAMBDA / GAMMA * (EXP(GAMMA * time) - 1) )", "\n")
  cat("Log-normal:", "1 - NORM.DIST(LN(time), mean = LAMBDA, standard_dev = GAMMA, TRUE)", "\n")
  cat("Log-logistic:", "1 - 1 / (1 + EXP(-(LN(time) - LAMBDA) / GAMMA))", "\n")
  cat("Generalized Gamma:", "1 - GAMMA.DIST(x = 1/Delta^2 * EXP(Delta * (LN(time)-LAMBDA) / GAMMA), ALPHA = Delta^(-2), Beta = 1, TRUE)", "\n")
  cat("---------------------------------------------------------------------", "\n")
  
  # 填入对应的参数的EXCEL公式
  cat("Excel formula with parameters", "\n")
  cat("Exp:", "1 - EXPON.DIST(time, ", params_exp[1], ", TRUE)", "\n")
  cat("Weibull:", "1 - WEIBULL(time, ", params_weibull[1], ", ", params_weibull[2], ", TRUE)", "\n")
  cat("Gompertz:", "EXP( -", params_gompertz[1], " / ", params_gompertz[2], " * (EXP(", params_gompertz[2], " * time) - 1) )", "\n")
  cat("Log-normal:", "1 - NORM.DIST(LN(time), ", params_lognormal[1], ", ", params_lognormal[2], ", TRUE)", "\n")
  cat("Log-logistic:", "1 - 1 / (1 + EXP(-(LN(time) - ", params_loglogit[1], ") / ", params_loglogit[2], "))", "\n")
  cat("Generalized Gamma:", "1 - GAMMA.DIST(x = 1/", params_gengamma[3], "^2 * EXP(", params_gengamma[3], " * (LN(time)-", params_gengamma[1], ") / ", params_gengamma[2], "), ", 1/params_gengamma[3]^2, ", Beta = 1, TRUE)", "\n")
  cat("---------------------------------------------------------------------", "\n")

  library(ggplot2)

  # 生成预测数据框
  time_seq <- seq(0, max(data$time), 0.1)
  surv_probs <- data.frame(
    time = time_seq,
    KM = NA,  # 占位，后续用真实KM数据替换
    Exp = 1 - pexp(time_seq, rate = params_exp[1]),
    Weibull = 1 - pweibull(time_seq, shape = params_weibull[1], scale = params_weibull[2]),
    Gompertz = 1 - pgompertz(time_seq, shape = params_gompertz[2], rate = params_gompertz[1]),
    LogNormal = 1 - pnorm(log(time_seq), mean = params_lognormal[1], sd = params_lognormal[2]),
    LogLogistic = 1 - 1/(1 + exp(-(log(time_seq) - params_loglogit[1])/params_loglogit[2])),
    GeneralizedGamma = 1 - pgengamma(time_seq, mu = params_gengamma[1], sigma = params_gengamma[2], Q = params_gengamma[3]),
    Gamma = 1 - pgamma(time_seq, shape = params_gamma[1], scale = params_gamma[2])
  )

  # 提取KM曲线数据
  km_fit <- survfit(Surv(time, status) ~ 1, data = data)
  km_df <- data.frame(
    time = km_fit$time,
    surv = km_fit$surv,
    upper = km_fit$upper,
    lower = km_fit$lower
  )

  # 转换成长格式便于ggplot绘图
  surv_probs_long <- surv_probs %>%
    pivot_longer(
      cols = -time,
      names_to = "Model",
      values_to = "survival"
    )

  # 自定义颜色和线型
  model_colors <- c(
    "KM" = "#ff5e00",
    "Exp" = "red",
    "Weibull" = "green",
    "Gompertz" = "blue",
    "LogNormal" = "purple",
    "LogLogistic" = "orange",
    "GeneralizedGamma" = "black",
    "Gamma" = "brown"
  )
  model_linetype <- c(
    "KM" = "solid",
    "Exp" = "dashed",
    "Weibull" = "dashed",
    "Gompertz" = "dashed",
    "LogNormal" = "dashed",
    "LogLogistic" = "dashed",
    "GeneralizedGamma" = "dashed",
    "Gamma" = "dashed"
  )

  # 绘制图形
  p <- ggplot() +
    # 绘制KM曲线（阶梯线）
    geom_step(
      data = km_df,
      aes(x = time, y = surv, color = "KM", linetype = "KM"),
      size = 1.2
    ) +
    # 绘制置信区间
    geom_ribbon(
      data = km_df,
      aes(x = time, ymin = lower, ymax = upper),
      fill = "gray",
      alpha = 0.3
    ) +
    # 绘制参数模型曲线
    geom_line(
      data = surv_probs_long,
      aes(x = time, y = survival, color = Model, linetype = Model),
      size = 0.8
    ) +
    # 坐标轴设置
    scale_x_continuous(limits = c(0, max(data$time))) +
    scale_y_continuous(limits = c(0, 1)) +
    # 颜色和线型映射
    scale_color_manual(values = model_colors) +
    scale_linetype_manual(values = model_linetype) +
    # 标签和主题
    labs(
      title = "KM Curve and Parametric Models",
      x = "Time",
      y = "Survival Probability",
      color = "Model",
      linetype = "Model"
    ) +
    theme_bw() +
    theme(
      legend.position = c(0.95, 0.95),
      legend.justification = c(1, 1),
      legend.background = element_blank(),
      plot.title = element_text(hjust = 0.5)
    )

  print(p)
}

PSM_god(lung)







In [None]:
PSM_god <- function(data) {
  # 这是一个PSM的神级函数，功能包括：
  # 拟合6种模型并返回参数，返回AIC和BIC
  # 一图展示KM曲线与拟合曲线，直观明了
  # 将预测概率存到一个EXCEL中，以供测验
  # 提供对应参数的EXCEL公式，方便使用

  # 设置数值显示格式，禁用科学计数法
  old_scipen <- options("scipen")
  options(scipen = 999)  # 设置一个大值来禁用科学计数法

  
  # 加载必要的包
  library(survival)
  library(flexsurv)
  library(survminer)
  library(ggplot2)
  library(tidyr)
  library(dplyr)


  # 拟合指数回归模型
  fit_exp <- survreg(
    Surv(time, status) ~ 1,
    data = data,
    dist = "exp"
  )
  # 提取指数分布参数以及AIC和BIC
  params_exp <- c(
    1 / exp(coef(fit_exp)),  # Lambda
    NA,  # Gamma
    NA,  # Delta
    AIC(fit_exp),  # AIC
    BIC(fit_exp)  # BIC
  )


  # 拟合Weibull回归模型
  fit_weibull <- survreg(
    Surv(time, status) ~ 1,
    data = data,
    dist = "weibull"
  )
  # 提取Weibull分布参数以及AIC和BIC
  params_weibull <- c(
    1 / fit_weibull$scale,  # Lambda
    exp(coef(fit_weibull)[1]),  # Gamma
    NA,  # Delta
    AIC(fit_weibull),  # AIC
    BIC(fit_weibull)  # BIC
  )


  # 拟合Gompertz回归模型
  fit_gompertz <- flexsurvreg(
    Surv(time, status) ~ 1,
    data = data,
    dist = "gompertz"
  )
  # 提取Gompertz分布参数以及AIC和BIC
  params_gompertz <- c(
    fit_gompertz$res[2, 1],  # Lambda
    fit_gompertz$res[1, 1],  # Gamma
    NA,  # Delta
    AIC(fit_gompertz),  # AIC
    BIC(fit_gompertz)  # BIC
  )


  # 拟合对数正态回归模型
  fit_lognormal <- survreg(
    Surv(time, status) ~ 1,
    data = data, 
    dist = "lognormal"
  )
  # 提取对数正态分布参数以及AIC和BIC
  params_lognormal <- c(
    coef(fit_lognormal)[1],  # Lambda
    fit_lognormal$scale,  # Gamma
    NA,  # Delta
    AIC(fit_lognormal),  # AIC
    BIC(fit_lognormal)  # BIC
  )


  # 拟合对数逻辑回归模型
  fit_loglogit <- survreg(
    Surv(time, status) ~ 1,
    data = data,
    dist = "loglogistic"
  )
  # 提取对数逻辑分布参数以及AIC和BIC
  params_loglogit <- c(
    coef(fit_loglogit)[1],  # Lambda
    fit_loglogit$scale,  # Gamma
    NA,  # Delta
    AIC(fit_loglogit),  # AIC
    BIC(fit_loglogit)  # BIC
  )


  # 拟合广义伽马模型，不考虑任何协变量
  fit_gengamma <- flexsurvreg(
    Surv(time, status) ~ 1,
    data = data,
    dist = "gengamma"
  )

  # 提取广义伽马参数以及AIC和BIC
  params_gengamma <- c(
    coef(fit_gengamma)[1],  # Mu
    fit_gengamma$res[2, 1],  # Sigma
    fit_gengamma$res[3, 1],  # Q
    AIC(fit_gengamma),  # AIC
    BIC(fit_gengamma)  # BIC
  )

  
  # 以数据表形式打印结果
  # 结果数据框中格式化显示
  result <- data.frame(
    row.names = c(
      "Exp", "Weibull", "Gompertz", 
      "Log-normal", "Log-logistic", "Generalized Gamma"
    ),
    LAMBDA = format(c(
      params_exp[1], params_weibull[1], params_gompertz[1],
      params_lognormal[1], params_loglogit[1], params_gengamma[1]
    ), scientific = FALSE),
    GAMMA = format(c(
      params_exp[2], params_weibull[2], params_gompertz[2],
      params_lognormal[2], params_loglogit[2], params_gengamma[2]
    ), scientific = FALSE),
    DELTA = format(c(
      params_exp[3], params_weibull[3], params_gompertz[3],
      params_lognormal[3], params_loglogit[3], params_gengamma[3]
    ), scientific = FALSE),
    AIC = format(c(
      params_exp[4], params_weibull[4], params_gompertz[4],
      params_lognormal[4], params_loglogit[4], params_gengamma[4]
    ), scientific = FALSE),
    BIC = format(c(
      params_exp[5], params_weibull[5], params_gompertz[5],
      params_lognormal[5], params_loglogit[5], params_gengamma[5]
    ), scientific = FALSE)
  )

  # 打印结果
  cat("PSM Results:", "\n")
  print(result)
  cat("---------------------------------------------------------------------", "\n")

  # 指出最佳模型
  best_model <- rownames(result)[which.min(result$AIC)]
  cat("The best model is", best_model, "\n")
  cat("AIC:", result[best_model, "AIC"], "\n")
  cat("BIC:", result[best_model, "BIC"], "\n")
  cat("---------------------------------------------------------------------", "\n")

  # 恢复原来的设置
  options(scipen = old_scipen$scipen)

  # 对应EXCEL公式
  cat("Excel formula", "\n")
  cat("Exp:", "1 - EXPON.DIST(x = time, lambda = Lambda, TRUE)", "\n")
  cat("Weibull:", "1 - WEIBULL(x = time, alpha = Lambda, beta = Gammma, TRUE)", "\n")
  cat("Gompertz:", "EXP( -LAMBDA / GAMMA * (EXP(GAMMA * time) - 1) )", "\n")
  cat("Log-normal:", "1 - NORM.DIST(LN(time), mean = LAMBDA, standard_dev = GAMMA, TRUE)", "\n")
  cat("Log-logistic:", "1 - 1 / (1 + EXP(-(LN(time) - LAMBDA) / GAMMA))", "\n")
  cat("Generalized Gamma:", "1 - GAMMA.DIST(x = 1/Delta^2 * EXP(Delta * (LN(time)-LAMBDA) / GAMMA), ALPHA = Delta^(-2), Beta = 1, TRUE)", "\n")
  cat("---------------------------------------------------------------------", "\n")
  
  # 填入对应的参数的EXCEL公式
  cat("Excel formula with parameters", "\n")
  cat("Exp:", "1 - EXPON.DIST(time, ", params_exp[1], ", TRUE)", "\n")
  cat("Weibull:", "1 - WEIBULL(time, ", params_weibull[1], ", ", params_weibull[2], ", TRUE)", "\n")
  cat("Gompertz:", "EXP( -", params_gompertz[1], " / ", params_gompertz[2], " * (EXP(", params_gompertz[2], " * time) - 1) )", "\n")
  cat("Log-normal:", "1 - NORM.DIST(LN(time), ", params_lognormal[1], ", ", params_lognormal[2], ", TRUE)", "\n")
  cat("Log-logistic:", "1 - 1 / (1 + EXP(-(LN(time) - ", params_loglogit[1], ") / ", params_loglogit[2], "))", "\n")
  cat("Generalized Gamma:", "1 - GAMMA.DIST(x = 1/", params_gengamma[3], "^2 * EXP(", params_gengamma[3], " * (LN(time)-", params_gengamma[1], ") / ", params_gengamma[2], "), ", 1/params_gengamma[3]^2, ", Beta = 1, TRUE)", "\n")
  cat("---------------------------------------------------------------------", "\n")

  library(ggplot2)

  # 生成预测数据框
  time_seq <- seq(0, max(data$time), 0.1)
  surv_probs <- data.frame(
    time = time_seq,
    KM = NA,  # 占位，后续用真实KM数据替换
    Exp = 1 - pexp(time_seq, rate = params_exp[1]),
    Weibull = 1 - pweibull(time_seq, shape = params_weibull[1], scale = params_weibull[2]),
    Gompertz = 1 - pgompertz(time_seq, shape = params_gompertz[2], rate = params_gompertz[1]),
    LogNormal = 1 - pnorm(log(time_seq), mean = params_lognormal[1], sd = params_lognormal[2]),
    LogLogistic = 1 - 1/(1 + exp(-(log(time_seq) - params_loglogit[1])/params_loglogit[2])),
    GeneralizedGamma = 1 - pgengamma(time_seq, mu = params_gengamma[1], sigma = params_gengamma[2], Q = params_gengamma[3])
  )

  # 提取KM曲线数据
  km_fit <- survfit(Surv(time, status) ~ 1, data = data)
  km_df <- data.frame(
    time = km_fit$time,
    surv = km_fit$surv,
    upper = km_fit$upper,
    lower = km_fit$lower
  )

  # 转换成长格式便于ggplot绘图
  surv_probs_long <- surv_probs %>%
    pivot_longer(
      cols = -time,
      names_to = "Model",
      values_to = "survival"
    )

  # 自定义颜色和线型
  model_colors <- c(
    "KM" = "#ff5e00",
    "Exp" = "red",
    "Weibull" = "green",
    "Gompertz" = "blue",
    "LogNormal" = "purple",
    "LogLogistic" = "orange",
    "GeneralizedGamma" = "black"
  )
  model_linetype <- c(
    "KM" = "solid",
    "Exp" = "dashed",
    "Weibull" = "dashed",
    "Gompertz" = "dashed",
    "LogNormal" = "dashed",
    "LogLogistic" = "dashed",
    "GeneralizedGamma" = "dashed"
  )

  # 绘制图形
  p <- ggplot() +
    # 绘制KM曲线（阶梯线）
    geom_step(
      data = km_df,
      aes(x = time, y = surv, color = "KM", linetype = "KM"),
      size = 1.2
    ) +
    # 绘制置信区间
    geom_ribbon(
      data = km_df,
      aes(x = time, ymin = lower, ymax = upper),
      fill = "gray",
      alpha = 0.3
    ) +
    # 绘制参数模型曲线
    geom_line(
      data = surv_probs_long,
      aes(x = time, y = survival, color = Model, linetype = Model),
      size = 0.8
    ) +
    # 坐标轴设置
    scale_x_continuous(limits = c(0, max(data$time))) +
    scale_y_continuous(limits = c(0, 1)) +
    # 颜色和线型映射
    scale_color_manual(values = model_colors) +
    scale_linetype_manual(values = model_linetype) +
    # 标签和主题
    labs(
      title = "KM Curve and Parametric Models",
      x = "Time",
      y = "Survival Probability",
      color = "Model",
      linetype = "Model"
    ) +
    theme_bw() +
    theme(
      legend.position = c(0.95, 0.95),
      legend.justification = c(1, 1),
      legend.background = element_blank(),
      plot.title = element_text(hjust = 0.5)
    )

  print(p)
}

PSM_god(lung)







In [None]:
PSM_god <- function(data) {
  # 这是一个PSM的神级函数，功能包括：
  # 拟合6种模型并返回参数，返回AIC和BIC
  # 一图展示KM曲线与拟合曲线，直观明了
  # 将预测概率存到一个EXCEL中，以供测验
  # 提供对应参数的EXCEL公式，方便使用

  # 设置数值显示格式，禁用科学计数法
  old_scipen <- options("scipen")
  options(scipen = 999)  # 设置一个大值来禁用科学计数法

  
  # 加载必要的包
  library(survival)
  library(flexsurv)
  library(survminer)
  library(ggplot2)
  library(tidyr)
  library(dplyr)


  # 拟合指数回归模型
  fit_exp <- survreg(
    Surv(time, status) ~ 1,
    data = data,
    dist = "exp"
  )
  # 提取指数分布参数以及AIC和BIC
  params_exp <- c(
    1 / exp(coef(fit_exp)),  # Lambda
    NA,  # Gamma
    NA,  # Delta
    AIC(fit_exp),  # AIC
    BIC(fit_exp)  # BIC
  )


  # 拟合Weibull回归模型
  fit_weibull <- survreg(
    Surv(time, status) ~ 1,
    data = data,
    dist = "weibull"
  )
  # 提取Weibull分布参数以及AIC和BIC
  params_weibull <- c(
    1 / fit_weibull$scale,  # Lambda
    exp(coef(fit_weibull)[1]),  # Gamma
    NA,  # Delta
    AIC(fit_weibull),  # AIC
    BIC(fit_weibull)  # BIC
  )


  # 拟合Gompertz回归模型
  fit_gompertz <- flexsurvreg(
    Surv(time, status) ~ 1,
    data = data,
    dist = "gompertz"
  )
  # 提取Gompertz分布参数以及AIC和BIC
  params_gompertz <- c(
    fit_gompertz$res[2, 1],  # Lambda
    fit_gompertz$res[1, 1],  # Gamma
    NA,  # Delta
    AIC(fit_gompertz),  # AIC
    BIC(fit_gompertz)  # BIC
  )


  # 拟合对数正态回归模型
  fit_lognormal <- survreg(
    Surv(time, status) ~ 1,
    data = data, 
    dist = "lognormal"
  )
  # 提取对数正态分布参数以及AIC和BIC
  params_lognormal <- c(
    coef(fit_lognormal)[1],  # Lambda
    fit_lognormal$scale,  # Gamma
    NA,  # Delta
    AIC(fit_lognormal),  # AIC
    BIC(fit_lognormal)  # BIC
  )


  # 拟合对数逻辑回归模型
  fit_loglogit <- survreg(
    Surv(time, status) ~ 1,
    data = data,
    dist = "loglogistic"
  )
  # 提取对数逻辑分布参数以及AIC和BIC
  params_loglogit <- c(
    coef(fit_loglogit)[1],  # Lambda
    fit_loglogit$scale,  # Gamma
    NA,  # Delta
    AIC(fit_loglogit),  # AIC
    BIC(fit_loglogit)  # BIC
  )


  # 拟合广义伽马模型，不考虑任何协变量
  fit_gengamma <- flexsurvreg(
    Surv(time, status) ~ 1,
    data = data,
    dist = "gengamma"
  )

  # 提取广义伽马参数以及AIC和BIC
  params_gengamma <- c(
    coef(fit_gengamma)[1],  # Mu
    fit_gengamma$res[2, 1],  # Sigma
    fit_gengamma$res[3, 1],  # Q
    AIC(fit_gengamma),  # AIC
    BIC(fit_gengamma)  # BIC
  )

  # 拟合伽马模型
  fit_gamma <- flexsurvreg(
    Surv(time, status) ~ 1,
    data = data,
    dist = "gamma"
  )

  # 提取广义伽马参数以及AIC和BIC
  params_gamma <- c(
    coef(fit_gamma)[1],  # Mu
    fit_gamma$res[2, 1],  # Sigma
    NA,  # Q
    AIC(fit_gamma),  # AIC
    BIC(fit_gamma)  # BIC
  )

  
  # 以数据表形式打印结果
  # 结果数据框中格式化显示
  result <- data.frame(
    row.names = c(
      "Exp", "Weibull", "Gompertz",
      "Log-normal", "Log-logistic", "Generalized Gamma",
      "Gamma"
    ),
    LAMBDA = format(c(
      params_exp[1], params_weibull[1], params_gompertz[1],
      params_lognormal[1], params_loglogit[1], params_gengamma[1],
      params_gamma[1]
    ), scientific = FALSE),
    GAMMA = format(c(
      params_exp[2], params_weibull[2], params_gompertz[2],
      params_lognormal[2], params_loglogit[2], params_gengamma[2],
      params_gamma[2]
    ), scientific = FALSE),
    DELTA = format(c(
      params_exp[3], params_weibull[3], params_gompertz[3],
      params_lognormal[3], params_loglogit[3], params_gengamma[3],
      params_gamma[3]
    ), scientific = FALSE),
    AIC = format(c(
      params_exp[4], params_weibull[4], params_gompertz[4],
      params_lognormal[4], params_loglogit[4], params_gengamma[4],
      params_gamma[4]
    ), scientific = FALSE),
    BIC = format(c(
      params_exp[5], params_weibull[5], params_gompertz[5],
      params_lognormal[5], params_loglogit[5], params_gengamma[5],
      params_gamma[5]
    ), scientific = FALSE)
  )

  # 打印结果
  cat("PSM Results:", "\n")
  print(result)
  cat("---------------------------------------------------------------------", "\n")

  # 指出最佳模型
  best_model <- rownames(result)[which.min(result$AIC)]
  cat("The best model is", best_model, "\n")
  cat("AIC:", result[best_model, "AIC"], "\n")
  cat("BIC:", result[best_model, "BIC"], "\n")
  cat("---------------------------------------------------------------------", "\n")

  # 恢复原来的设置
  options(scipen = old_scipen$scipen)

  # 对应EXCEL公式
  cat("Excel formula", "\n")
  cat("Exp:", "1 - EXPON.DIST(x = time, lambda = Lambda, TRUE)", "\n")
  cat("Weibull:", "1 - WEIBULL(x = time, alpha = Lambda, beta = Gammma, TRUE)", "\n")
  cat("Gompertz:", "EXP( -LAMBDA / GAMMA * (EXP(GAMMA * time) - 1) )", "\n")
  cat("Log-normal:", "1 - NORM.DIST(LN(time), mean = LAMBDA, standard_dev = GAMMA, TRUE)", "\n")
  cat("Log-logistic:", "1 - 1 / (1 + EXP(-(LN(time) - LAMBDA) / GAMMA))", "\n")
  cat("Generalized Gamma:", "1 - GAMMA.DIST(x = 1/Delta^2 * EXP(Delta * (LN(time)-LAMBDA) / GAMMA), ALPHA = Delta^(-2), Beta = 1, TRUE)", "\n")
  cat("---------------------------------------------------------------------", "\n")
  
  # 填入对应的参数的EXCEL公式
  cat("Excel formula with parameters", "\n")
  cat("Exp:", "1 - EXPON.DIST(time, ", params_exp[1], ", TRUE)", "\n")
  cat("Weibull:", "1 - WEIBULL(time, ", params_weibull[1], ", ", params_weibull[2], ", TRUE)", "\n")
  cat("Gompertz:", "EXP( -", params_gompertz[1], " / ", params_gompertz[2], " * (EXP(", params_gompertz[2], " * time) - 1) )", "\n")
  cat("Log-normal:", "1 - NORM.DIST(LN(time), ", params_lognormal[1], ", ", params_lognormal[2], ", TRUE)", "\n")
  cat("Log-logistic:", "1 - 1 / (1 + EXP(-(LN(time) - ", params_loglogit[1], ") / ", params_loglogit[2], "))", "\n")
  cat("Generalized Gamma:", "1 - GAMMA.DIST(x = 1/", params_gengamma[3], "^2 * EXP(", params_gengamma[3], " * (LN(time)-", params_gengamma[1], ") / ", params_gengamma[2], "), ", 1/params_gengamma[3]^2, ", Beta = 1, TRUE)", "\n")
  cat("---------------------------------------------------------------------", "\n")

  library(ggplot2)

  # 生成预测数据框
  time_seq <- seq(0, max(data$time), 0.1)
  surv_probs <- data.frame(
    time = time_seq,
    KM = NA,  # 占位，后续用真实KM数据替换
    Exp = 1 - pexp(time_seq, rate = params_exp[1]),
    Weibull = 1 - pweibull(time_seq, shape = params_weibull[1], scale = params_weibull[2]),
    Gompertz = 1 - pgompertz(time_seq, shape = params_gompertz[2], rate = params_gompertz[1]),
    LogNormal = 1 - pnorm(log(time_seq), mean = params_lognormal[1], sd = params_lognormal[2]),
    LogLogistic = 1 - 1/(1 + exp(-(log(time_seq) - params_loglogit[1])/params_loglogit[2])),
    GeneralizedGamma = 1 - pgengamma(time_seq, mu = params_gengamma[1], sigma = params_gengamma[2], Q = params_gengamma[3]),
    Gamma = 1 - pgamma(time_seq, shape = params_gamma[1], scale = params_gamma[2])
  )

  # 提取KM曲线数据
  km_fit <- survfit(Surv(time, status) ~ 1, data = data)
  km_df <- data.frame(
    time = km_fit$time,
    surv = km_fit$surv,
    upper = km_fit$upper,
    lower = km_fit$lower
  )

  # 转换成长格式便于ggplot绘图
  surv_probs_long <- surv_probs %>%
    pivot_longer(
      cols = -time,
      names_to = "Model",
      values_to = "survival"
    )

  # 自定义颜色和线型
  model_colors <- c(
    "KM" = "#ff5e00",
    "Exp" = "red",
    "Weibull" = "green",
    "Gompertz" = "blue",
    "LogNormal" = "purple",
    "LogLogistic" = "orange",
    "GeneralizedGamma" = "black",
    "Gamma" = "brown"
  )
  model_linetype <- c(
    "KM" = "solid",
    "Exp" = "dashed",
    "Weibull" = "dashed",
    "Gompertz" = "dashed",
    "LogNormal" = "dashed",
    "LogLogistic" = "dashed",
    "GeneralizedGamma" = "dashed",
    "Gamma" = "dashed"
  )

  # 绘制图形
  p <- ggplot() +
    # 绘制KM曲线（阶梯线）
    geom_step(
      data = km_df,
      aes(x = time, y = surv, color = "KM", linetype = "KM"),
      size = 1.2
    ) +
    # 绘制置信区间
    geom_ribbon(
      data = km_df,
      aes(x = time, ymin = lower, ymax = upper),
      fill = "gray",
      alpha = 0.3
    ) +
    # 绘制参数模型曲线
    geom_line(
      data = surv_probs_long,
      aes(x = time, y = survival, color = Model, linetype = Model),
      size = 0.8
    ) +
    # 坐标轴设置
    scale_x_continuous(limits = c(0, max(data$time))) +
    scale_y_continuous(limits = c(0, 1)) +
    # 颜色和线型映射
    scale_color_manual(values = model_colors) +
    scale_linetype_manual(values = model_linetype) +
    # 标签和主题
    labs(
      title = "KM Curve and Parametric Models",
      x = "Time",
      y = "Survival Probability",
      color = "Model",
      linetype = "Model"
    ) +
    theme_bw() +
    theme(
      legend.position = c(0.95, 0.95),
      legend.justification = c(1, 1),
      legend.background = element_blank(),
      plot.title = element_text(hjust = 0.5)
    )

  print(p)
}

PSM_god(lung)





