In [20]:
# Load necessary packages
library(ggplot2)
library(psych)
library(dplyr)
library(fmsb)
library(ggradar)
library(plotrix)

In [21]:
# radarchart function to plot data
# altered dfmax to be 1.25x larger than the max value, making the figures more readable
radarchart <- function(df, axistype=0, seg=4, pty=16, pcol=1:8, plty=1:6, plwd=1,
                       pdensity=NULL, pangle=45, pfcol=NA, cglty=3, cglwd=1,
                       cglcol="navy", axislabcol="blue", title="", maxmin=TRUE,
                       na.itp=TRUE, centerzero=FALSE, vlabels=NULL, vlcex=NULL,
                       caxislabels=NULL, calcex=NULL,
                       paxislabels=NULL, palcex=NULL, ...) {
  if (!is.data.frame(df)) { cat("The data must be given as dataframe.\n"); return() }
  if ((n <- length(df))<3) { cat("The number of variables must be 3 or more.\n"); return() }
  if (maxmin==FALSE) { # when the dataframe does not include max and min as the top 2 rows.
    dfmax <- apply(df, 2, max)
    dfmax <- round(dfmax+(dfmax/4), digits=2)
    dfmin <- apply(df, 2, min)
    df <- rbind(dfmax, dfmin, df)
  }
  plot(c(-1.2, 1.2), c(-1.2, 1.2), type="n", frame.plot=FALSE, axes=FALSE, 
       xlab="", ylab="", main=title, asp=1, ...) # define x-y coordinates without any plot
  theta <- seq(90, 450, length=n+1)*pi/180
  theta <- theta[1:n]
  xx <- cos(theta)
  yy <- sin(theta)
  CGap <- ifelse(centerzero, 0, 1)
  for (i in 0:seg) { # complementary guide lines, dotted navy line by default
    polygon(xx*(i+CGap)/(seg+CGap), yy*(i+CGap)/(seg+CGap), lty=cglty, lwd=cglwd, border=cglcol)
    if (axistype==1|axistype==3) CAXISLABELS <- paste(i/seg*100,"(%)")
    if (axistype==4|axistype==5) CAXISLABELS <- sprintf("%3.2f",i/seg)
    if (!is.null(caxislabels)&(i<length(caxislabels))) CAXISLABELS <- caxislabels[i+1]
    if (axistype==1|axistype==3|axistype==4|axistype==5) {
     if (is.null(calcex)) text(-0.05, (i+CGap)/(seg+CGap), CAXISLABELS, col=axislabcol) else
     text(-0.05, (i+CGap)/(seg+CGap), CAXISLABELS, col=axislabcol, cex=calcex)
    }
  }
  if (centerzero) {
    arrows(0, 0, xx*1, yy*1, lwd=cglwd, lty=cglty, length=0, col=cglcol)
  }
  else {
    arrows(xx/(seg+CGap), yy/(seg+CGap), xx*1, yy*1, lwd=cglwd, lty=cglty, length=0, col=cglcol)
  }
  PAXISLABELS <- df[1,1:n]
  if (!is.null(paxislabels)) PAXISLABELS <- paxislabels
  if (axistype==2|axistype==3|axistype==5) {
   if (is.null(palcex)) text(xx[1:n], yy[1:n], PAXISLABELS, col=axislabcol) else
   text(xx[1:n], yy[1:n], PAXISLABELS, col=axislabcol, cex=palcex)
  }
  VLABELS <- colnames(df)
  if (!is.null(vlabels)) VLABELS <- vlabels
  if (is.null(vlcex)) text(xx*1.2, yy*1.2, VLABELS) else
  text(xx*1.2, yy*1.2, VLABELS, cex=vlcex)
  series <- length(df[[1]])
  SX <- series-2
  if (length(pty) < SX) { ptys <- rep(pty, SX) } else { ptys <- pty }
  if (length(pcol) < SX) { pcols <- rep(pcol, SX) } else { pcols <- pcol }
  if (length(plty) < SX) { pltys <- rep(plty, SX) } else { pltys <- plty }
  if (length(plwd) < SX) { plwds <- rep(plwd, SX) } else { plwds <- plwd }
  if (length(pdensity) < SX) { pdensities <- rep(pdensity, SX) } else { pdensities <- pdensity }
  if (length(pangle) < SX) { pangles <- rep(pangle, SX)} else { pangles <- pangle }
  if (length(pfcol) < SX) { pfcols <- rep(pfcol, SX) } else { pfcols <- pfcol }
  for (i in 3:series) {
    xxs <- xx
    yys <- yy
    scale <- CGap/(seg+CGap)+(df[i,]-df[2,])/(df[1,]-df[2,])*seg/(seg+CGap)
    if (sum(!is.na(df[i,]))<3) { cat(sprintf("[DATA NOT ENOUGH] at %d\n%g\n",i,df[i,])) # for too many NA's (1.2.2012)
    } else {
      for (j in 1:n) {
        if (is.na(df[i, j])) { # how to treat NA
          if (na.itp) { # treat NA using interpolation
            left <- ifelse(j>1, j-1, n)
            while (is.na(df[i, left])) {
              left <- ifelse(left>1, left-1, n)
            }
            right <- ifelse(j<n, j+1, 1)
            while (is.na(df[i, right])) {
              right <- ifelse(right<n, right+1, 1)
            }
            xxleft <- xx[left]*CGap/(seg+CGap)+xx[left]*(df[i,left]-df[2,left])/(df[1,left]-df[2,left])*seg/(seg+CGap)
            yyleft <- yy[left]*CGap/(seg+CGap)+yy[left]*(df[i,left]-df[2,left])/(df[1,left]-df[2,left])*seg/(seg+CGap)
            xxright <- xx[right]*CGap/(seg+CGap)+xx[right]*(df[i,right]-df[2,right])/(df[1,right]-df[2,right])*seg/(seg+CGap)
            yyright <- yy[right]*CGap/(seg+CGap)+yy[right]*(df[i,right]-df[2,right])/(df[1,right]-df[2,right])*seg/(seg+CGap)
            if (xxleft > xxright) {
              xxtmp <- xxleft; yytmp <- yyleft;
              xxleft <- xxright; yyleft <- yyright;
              xxright <- xxtmp; yyright <- yytmp;
            }
            xxs[j] <- xx[j]*(yyleft*xxright-yyright*xxleft)/(yy[j]*(xxright-xxleft)-xx[j]*(yyright-yyleft))
            yys[j] <- (yy[j]/xx[j])*xxs[j]
          } else { # treat NA as zero (origin)
            xxs[j] <- 0
            yys[j] <- 0
          }
        }
        else {
          xxs[j] <- xx[j]*CGap/(seg+CGap)+xx[j]*(df[i, j]-df[2, j])/(df[1, j]-df[2, j])*seg/(seg+CGap)
          yys[j] <- yy[j]*CGap/(seg+CGap)+yy[j]*(df[i, j]-df[2, j])/(df[1, j]-df[2, j])*seg/(seg+CGap)
        }
      }
      if (is.null(pdensities)) {
        polygon(xxs, yys, lty=pltys[i-2], lwd=plwds[i-2], border=pcols[i-2], col=pfcols[i-2])
      } else {
        polygon(xxs, yys, lty=pltys[i-2], lwd=plwds[i-2], border=pcols[i-2], 
         density=pdensities[i-2], angle=pangles[i-2], col=pfcols[i-2])
      }
      points(xx*scale, yy*scale, pch=ptys[i-2], col=pcols[i-2])
    }
  }
}

In [22]:
data <- read.csv("/home/mtweed/scratch/ukb_category_109_bis.csv")
# Using ad_subj_data as the variables for UKBB are already organized by patient
ad_subj_data <- read.csv("/home/mtweed/linear_regression/UKBB_ad_data_other.csv")

In [23]:
names(data)[names(data) == 'X24483.2.0'] <- 't2_sub_nigra_right'
names(data)[names(data) == 'X24484.2.0'] <- 't2_sub_nigra_left'
names(data)[names(data) == 'X24479.2.0'] <- 'mms_accumbens_left'
names(data)[names(data) == 'X24480.2.0'] <- 'mms_accumbens_right'
names(data)[names(data) == 'X24477.2.0'] <- 'mms_amygdala_left'
names(data)[names(data) == 'X24478.2.0'] <- 'mms_amygdala_right'
names(data)[names(data) == 'X24469.2.0'] <- 'mms_caudate_left'
names(data)[names(data) == 'X24470.2.0'] <- 'mms_caudate_right'
names(data)[names(data) == 'X24475.2.0'] <- 'mms_hipp_left'
names(data)[names(data) == 'X24476.2.0'] <- 'mms_hipp_right'
names(data)[names(data) == 'X24473.2.0'] <- 'mms_pallidum_left'
names(data)[names(data) == 'X24474.2.0'] <- 'mms_pallidum_right'
names(data)[names(data) == 'X24471.2.0'] <- 'mms_putamen_left'
names(data)[names(data) == 'X24472.2.0'] <- 'mms_putamen_right'
names(data)[names(data) == 'X24481.2.0'] <- 'mms_sub_nigra_left'
names(data)[names(data) == 'X24482.2.0'] <- 'mms_sub_nigra_right'
names(data)[names(data) == 'X24467.2.0'] <- 'mms_thalamus_left'
names(data)[names(data) == 'X24468.2.0'] <- 'mms_thalamus_right'

data$t2_sub_nigra <- rowMeans(data[c('t2_sub_nigra_right' ,'t2_sub_nigra_left')], na.rm = TRUE)
data$mms_accumbens <- rowMeans(data[c('mms_accumbens_left' ,'mms_accumbens_right')], na.rm = TRUE)
data$mms_amygdala <- rowMeans(data[c('mms_amygdala_left' ,'mms_amygdala_right')], na.rm = TRUE)
data$mms_caudate <- rowMeans(data[c('mms_caudate_left' ,'mms_caudate_right')], na.rm = TRUE)
data$mms_hipp <- rowMeans(data[c('mms_hipp_left' ,'mms_hipp_right')], na.rm = TRUE)
data$mms_pallidum <- rowMeans(data[c('mms_pallidum_left' ,'mms_pallidum_right')], na.rm = TRUE)
data$mms_putamen <- rowMeans(data[c('mms_putamen_left' ,'mms_putamen_right')], na.rm = TRUE)
data$mms_sub_nigra <- rowMeans(data[c('mms_sub_nigra_left' ,'mms_sub_nigra_right')], na.rm = TRUE)
data$mms_thalamus <- rowMeans(data[c('mms_thalamus_left' ,'mms_thalamus_right')], na.rm = TRUE)

In [26]:
write.csv(data, "/home/mtweed/linear_regression/SWI_raw_data.csv")

In [7]:
swi_data <- data[c('eid', 't2_sub_nigra' ,'mms_accumbens' ,'mms_amygdala' ,'mms_caudate' ,'mms_hipp' ,'mms_pallidum' ,'mms_putamen' ,'mms_sub_nigra' ,'mms_thalamus')]
colnames(swi_data)[colnames(swi_data) == "eid"] <- "Subject_ID"
ad_subj_data=merge(ad_subj_data, swi_data)
swi_data = ad_subj_data
swi_data <- swi_data[c("Subject_ID","sex_31.0.0","age_when_attended_assessment_centre_21003.2.0","volume_of_estimatedtotalintracranial_whole_brain_26521.2.0","uk_biobank_assessment_centre_54.2.0","mean_tfmri_head_motion_averaged_across_space_and_time_points_25742.2.0","mean_rfmri_head_motion_averaged_across_space_and_time_points_25741.2.0", "date_of_attending_assessment_centre_53.2.0", "body_mass_index_bmi_21001.2.0", "c.reactive_protein_30710.0.0", "glucose_30740.0.0", "glycated_haemoglobin_hba1c_30750.0.0", "cholesterol_30690.0.0", "hdl_cholesterol_30760.0.0", "ldl_direct_30780.0.0", "triglycerides_30870.0.0", "systolic_blood_pressure_automated_reading_4080.0.1", "diastolic_blood_pressure_automated_reading_4079.0.1", 't2_sub_nigra' ,'mms_accumbens' ,'mms_amygdala' ,'mms_caudate' ,'mms_hipp' ,'mms_pallidum' ,'mms_putamen' ,'mms_sub_nigra' ,'mms_thalamus')]                       

In [8]:
colnames(swi_data)

In [9]:
# Define necessary variables
brain_regions <- c("t2_sub_nigra", "mms_accumbens", "mms_amygdala", "mms_caudate", "mms_hipp", "mms_pallidum", "mms_putamen", "mms_sub_nigra", "mms_thalamus")
vars <- c("BMI", "CRP", "Glucose", "HBA1C", "Cholesterol", "HDL_Chol", "LDL_Chol", "Triglycerides", "Systolic_BP", "Diastolic_BP")
variables <- c("body_mass_index_bmi_21001.2.0", "c.reactive_protein_30710.0.0", 
                        "glucose_30740.0.0", "glycated_haemoglobin_hba1c_30750.0.0", 
                        "cholesterol_30690.0.0", "hdl_cholesterol_30760.0.0", 
                        "ldl_direct_30780.0.0", "triglycerides_30870.0.0", 
                        "systolic_blood_pressure_automated_reading_4080.0.1", 
                        "diastolic_blood_pressure_automated_reading_4079.0.1")
all_data_summary <- data.frame()

# Loop through each brain region
for (region in brain_regions) {
    k=1
    # Loop through each obesity-related parameter
    for (variable in variables) {
        # Calculate regression for current region, parameter, and data frame
        lm_model <- lm(get(region) ~ get(variable) + 
                       age_when_attended_assessment_centre_21003.2.0 + 
                       poly(age_when_attended_assessment_centre_21003.2.0, 2, raw=TRUE) * sex_31.0.0 +
                       poly(difftime(as.Date(date_of_attending_assessment_centre_53.2.0),
                            min(as.Date(date_of_attending_assessment_centre_53.2.0)), units='days'), 2, raw=TRUE) + 
                       uk_biobank_assessment_centre_54.2.0 + 
                       volume_of_estimatedtotalintracranial_whole_brain_26521.2.0, data = swi_data)
        summary_model <- summary(lm_model)
        values <- data.frame(summary_model$coefficients[2,])
        values <- as.data.frame(t(values))
        # Create necessary columns to make the data frame more readable
        values$region <- region
        values$measure <- vars[k]
        # Rename row names to relevant patient information
        row_name <- paste(region, "_", variable)
        rownames(values) <- row_name 
        all_data_summary <- rbind(all_data_summary, values)
        k=k+1
    }
}
write.csv(all_data_summary, "/home/mtweed/linear_regression/UKBB_SWI_data_summary.csv")

In [10]:
all_data_summary <- read.csv("/home/mtweed/linear_regression/UKBB_SWI_data_summary.csv")

In [13]:
colnames(all_data_summary)[colnames(all_data_summary) == "Pr...t.."] <- "p.value"

# Loading UKBB summarized data and separating it based on ROI
t2_sub_nigra_data_summary <- all_data_summary[all_data_summary$region == 't2_sub_nigra',]
mms_accumbens_data_summary <- all_data_summary[all_data_summary$region == 'mms_accumbens',]
mms_amygdala_data_summary <- all_data_summary[all_data_summary$region == 'mms_amygdala',]
mms_caudate_data_summary <- all_data_summary[all_data_summary$region == 'mms_caudate',]
mms_hipp_data_summary <- all_data_summary[all_data_summary$region == 'mms_hipp',]
mms_pallidum_data_summary <- all_data_summary[all_data_summary$region == 'mms_pallidum',]
mms_putamen_data_summary <- all_data_summary[all_data_summary$region == 'mms_putamen',]
mms_sub_nigra_data_summary <- all_data_summary[all_data_summary$region == 'mms_sub_nigra',]
mms_thalamus_data_summary <- all_data_summary[all_data_summary$region == 'mms_thalamus',]

# Adjusting the p-value separately for each ROI to account for multiple comparisons
t2_sub_nigra_data_summary$p_fdr = p.adjust(t2_sub_nigra_data_summary$'p.value', method="fdr")
mms_accumbens_data_summary$p_fdr = p.adjust(mms_accumbens_data_summary$'p.value', method="fdr")
mms_amygdala_data_summary$p_fdr = p.adjust(mms_amygdala_data_summary$'p.value', method="fdr")
mms_caudate_data_summary$p_fdr = p.adjust(mms_caudate_data_summary$'p.value', method="fdr")
mms_hipp_data_summary$p_fdr = p.adjust(mms_hipp_data_summary$'p.value', method="fdr")
mms_pallidum_data_summary$p_fdr = p.adjust(mms_pallidum_data_summary$'p.value', method="fdr")
mms_putamen_data_summary$p_fdr = p.adjust(mms_putamen_data_summary$'p.value', method="fdr")
mms_sub_nigra_data_summary$p_fdr = p.adjust(mms_sub_nigra_data_summary$'p.value', method="fdr")
mms_thalamus_data_summary$p_fdr = p.adjust(mms_thalamus_data_summary$'p.value', method="fdr")

# Rebinding all the data into one dataframe
all_data_summary <- data.frame()
all_data_summary <- bind_rows(t2_sub_nigra_data_summary, mms_accumbens_data_summary)
all_data_summary <- bind_rows(all_data_summary, mms_amygdala_data_summary)
all_data_summary <- bind_rows(all_data_summary, mms_caudate_data_summary)
all_data_summary <- bind_rows(all_data_summary, mms_hipp_data_summary)
all_data_summary <- bind_rows(all_data_summary, mms_pallidum_data_summary)
all_data_summary <- bind_rows(all_data_summary, mms_putamen_data_summary)
all_data_summary <- bind_rows(all_data_summary, mms_sub_nigra_data_summary)
all_data_summary <- bind_rows(all_data_summary, mms_thalamus_data_summary)

In [14]:
# Defining variables for looping through dataframe columns and for radarchart titles
brain_regions <- c("t2_sub_nigra", "mms_accumbens", "mms_amygdala", "mms_caudate", "mms_hipp", "mms_pallidum", "mms_putamen", "mms_sub_nigra", "mms_thalamus")
regions <- c("T2 Substantia Nigra", "MMS Accumbens", "MMS Amygdala", "MMS Caudate", "MMS Hippocampus", "MMS Pallidum", "MMS Putamen", "MMS Substantia Nigra", "MMS Thalamus")

In [15]:
# Separating dataframe based on obesity-related parameters of interest
BMI_data <- all_data_summary[all_data_summary$measure == 'BMI',] 
CRP_data <- all_data_summary[all_data_summary$measure == 'CRP',] 
glucose_data <- all_data_summary[all_data_summary$measure == 'Glucose',] 
hba1c_data <- all_data_summary[all_data_summary$measure == 'HBA1C',] 
chol_data <- all_data_summary[all_data_summary$measure == 'Cholesterol',] 
hdl_chol_data <- all_data_summary[all_data_summary$measure == 'HDL_Chol',] 
ldl_chol_data <- all_data_summary[all_data_summary$measure == 'LDL_Chol',] 
triglycerides_data <- all_data_summary[all_data_summary$measure == 'Triglycerides',] 
sys_bp_data <- all_data_summary[all_data_summary$measure == 'Systolic_BP',] 
dia_bp_data <- all_data_summary[all_data_summary$measure == 'Diastolic_BP',] 

# Isolating brain region data
t2_sub_nigra_data <- all_data_summary[all_data_summary$region == 't2_sub_nigra',] 
mms_accumbens_data <- all_data_summary[all_data_summary$region == 'mms_accumbens',]
mms_amygdala_data <- all_data_summary[all_data_summary$region == 'mms_amygdala',] 
mms_caudate_data <- all_data_summary[all_data_summary$region == 'mms_caudate',]
mms_hipp_data <- all_data_summary[all_data_summary$region == 'mms_hipp',] 
mms_pallidum_data <- all_data_summary[all_data_summary$region == 'mms_pallidum',]
mms_putamen_data <- all_data_summary[all_data_summary$region == 'mms_putamen',] 
mms_sub_nigra_data <- all_data_summary[all_data_summary$region == 'mms_sub_nigra',]
mms_thalamus_data <- all_data_summary[all_data_summary$region == 'mms_thalamus',]

In [18]:
# Define lists and variables for radar charts
vars <- c("BMI", "CRP", "Glucose", "HBA1C", "Cholesterol", "HDL_Chol", "LDL_Chol", "Triglycerides", "Systolic_BP", "Diastolic_BP")
region_data <- list()
region_data_summary <- list()
spider_plots <- list()

# Open TIFF file
tiff('~/scratch/tractoflow_hcp_dwi/spider_plots/spider_plots_swi_ukbb.tiff', width=8000, height=5000, res=300)

# Set plot parameters
par(mar=c(3,3,3,3))
par(mfrow = c(3, 3))

# Loop through brain regions
for (i in 1:length(brain_regions)) {
    # Subset data based on FDR significance
    significant_data <- subset(get(paste(brain_regions[i],"_data",sep="")), p_fdr> 0.05)
    # Calculate min and max t-values
    min_t_val<- min(significant_data$t.value, na.rm = TRUE)
    max_t_val<- max(significant_data$t.value, na.rm = TRUE)
    # Create data frames for min and max t-values
    t_min <- data.frame(t(data.frame(replicate(length(vars), min_t_val))))
    colnames(t_min) <- vars
    t_max <- data.frame(t(data.frame(replicate(length(vars), max_t_val))))
    colnames(t_max) <- vars
    
    # Create a data frame for max and min values
    max_t <- max(get(paste(brain_regions[i],"_data_summary",sep=""))$t.value)
    min_t <- min(get(paste(brain_regions[i],"_data_summary",sep=""))$t.value)
    max_min <- data.frame(
    BMI = c(max_t, min_t), CRP = c(max_t, min_t), Glucose = c(max_t, min_t), 
    HBA1C = c(max_t, min_t), Cholesterol = c(max_t, min_t), HDL_Chol = c(max_t, min_t),
    LDL_Chol = c(max_t, min_t), Triglycerides = c(max_t, min_t), 
    Systolic_BP = c(max_t, min_t), Diastolic_BP = c(max_t, min_t))
    
    # Create data frames for region-specific t-values
    region_data[[1]] <- data.frame(t(get(paste(brain_regions[i],"_data",sep=""))$t.value))
    colnames(region_data[[1]]) = vars

    # Combine region-specific t-values into a summary data frame
    region_data_summary[[i]] <- bind_rows(max_min, region_data[[1]])
    region_data_summary[[i]] <- bind_rows(region_data_summary[i], t_min)
    region_data_summary[[i]] <- bind_rows(region_data_summary[i], t_max)
    rownames(region_data_summary[[i]]) = c("Max", "Min", paste(brain_regions[i]), "Min_T", "Max_T")
    colnames(region_data_summary[[i]]) = c("BMI", "CRP", "Glc", "HbA1c", "Cholesterol", "HDL", "LDL", "TG", "SBP", "DBP")
    data <- region_data_summary[[i]][c("Max", "Min", "Max_T", "Min_T", paste(brain_regions[i])), ]

    # Define colors to be used in radar charts
    my_colors <- c( "black", "black", "#944BE3")

    # Generate radar charts
    spider_plots[[i]] <- radarchart(
      data,  
      axistype = 1, 
      caxislabels = round(seq(min_t, max_t, ((max_t - min_t) / 4))), 
      title = paste(regions[i]),
      cex.main=3,
      pcol = my_colors,
      pfcol = c(NA,NA,NA, NA, NA, NA),
      pty = c(32 ,32 ,16, 16, 16, 16),
      plwd = c(2,2,4,4,4,4),  
      plty = c(1,1,3,3,3,3),
      cglcol = "grey", 
      cglty = 1,  
      cglwd = 0.8,
      axislabcol='black',
      seg=4,
      calcex=2.5,
      vlcex=2.8
    )

    # Add legend
    legend(
      "bottom", 
      legend = rownames(data[-c(1, 2, 3, 4), ]),
      fill = my_colors[3:6],
      bty = "n",
      ncol = length(rownames(data[-c(1, 2, 3, 4), ])),  
      cex = 2.5,
      inset = c(0, -0.1), 
      xpd = TRUE  
    )
}

# Close TIFF file
dev.off()

“no non-missing arguments to min; returning Inf”
“no non-missing arguments to max; returning -Inf”


In [17]:
write.csv(all_data_summary, "/home/mtweed/linear_regression/UKBB_SWI_data_summary.csv")