In [1]:
# Load necessary packages
library(ggplot2)
library(psych)
library(dplyr)
library(fmsb)
library(ggradar)
library(plotrix)


Attaching package: ‘psych’


The following objects are masked from ‘package:ggplot2’:

    %+%, alpha



Attaching package: ‘dplyr’


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union



Attaching package: ‘plotrix’


The following object is masked from ‘package:psych’:

    rescale




In [2]:
# radarchart function to plot data
# altered dfmax to be 1.25x larger than the max value, making the figures more readable
radarchart <- function(df, axistype=0, seg=4, pty=16, pcol=1:8, plty=1:6, plwd=1,
                       pdensity=NULL, pangle=45, pfcol=NA, cglty=3, cglwd=1,
                       cglcol="navy", axislabcol="blue", title="", maxmin=TRUE,
                       na.itp=TRUE, centerzero=FALSE, vlabels=NULL, vlcex=NULL,
                       caxislabels=NULL, calcex=NULL,
                       paxislabels=NULL, palcex=NULL, ...) {
  if (!is.data.frame(df)) { cat("The data must be given as dataframe.\n"); return() }
  if ((n <- length(df))<3) { cat("The number of variables must be 3 or more.\n"); return() }
  if (maxmin==FALSE) { # when the dataframe does not include max and min as the top 2 rows.
    dfmax <- apply(df, 2, max)
    dfmax <- round(dfmax+(dfmax/4), digits=2)
    dfmin <- apply(df, 2, min)
    df <- rbind(dfmax, dfmin, df)
  }
  plot(c(-1.2, 1.2), c(-1.2, 1.2), type="n", frame.plot=FALSE, axes=FALSE, 
       xlab="", ylab="", main=title, asp=1, ...) # define x-y coordinates without any plot
  theta <- seq(90, 450, length=n+1)*pi/180
  theta <- theta[1:n]
  xx <- cos(theta)
  yy <- sin(theta)
  CGap <- ifelse(centerzero, 0, 1)
  for (i in 0:seg) { # complementary guide lines, dotted navy line by default
    polygon(xx*(i+CGap)/(seg+CGap), yy*(i+CGap)/(seg+CGap), lty=cglty, lwd=cglwd, border=cglcol)
    if (axistype==1|axistype==3) CAXISLABELS <- paste(i/seg*100,"(%)")
    if (axistype==4|axistype==5) CAXISLABELS <- sprintf("%3.2f",i/seg)
    if (!is.null(caxislabels)&(i<length(caxislabels))) CAXISLABELS <- caxislabels[i+1]
    if (axistype==1|axistype==3|axistype==4|axistype==5) {
     if (is.null(calcex)) text(-0.05, (i+CGap)/(seg+CGap), CAXISLABELS, col=axislabcol) else
     text(-0.05, (i+CGap)/(seg+CGap), CAXISLABELS, col=axislabcol, cex=calcex)
    }
  }
  if (centerzero) {
    arrows(0, 0, xx*1, yy*1, lwd=cglwd, lty=cglty, length=0, col=cglcol)
  }
  else {
    arrows(xx/(seg+CGap), yy/(seg+CGap), xx*1, yy*1, lwd=cglwd, lty=cglty, length=0, col=cglcol)
  }
  PAXISLABELS <- df[1,1:n]
  if (!is.null(paxislabels)) PAXISLABELS <- paxislabels
  if (axistype==2|axistype==3|axistype==5) {
   if (is.null(palcex)) text(xx[1:n], yy[1:n], PAXISLABELS, col=axislabcol) else
   text(xx[1:n], yy[1:n], PAXISLABELS, col=axislabcol, cex=palcex)
  }
  VLABELS <- colnames(df)
  if (!is.null(vlabels)) VLABELS <- vlabels
  if (is.null(vlcex)) text(xx*1.2, yy*1.2, VLABELS) else
  text(xx*1.2, yy*1.2, VLABELS, cex=vlcex)
  series <- length(df[[1]])
  SX <- series-2
  if (length(pty) < SX) { ptys <- rep(pty, SX) } else { ptys <- pty }
  if (length(pcol) < SX) { pcols <- rep(pcol, SX) } else { pcols <- pcol }
  if (length(plty) < SX) { pltys <- rep(plty, SX) } else { pltys <- plty }
  if (length(plwd) < SX) { plwds <- rep(plwd, SX) } else { plwds <- plwd }
  if (length(pdensity) < SX) { pdensities <- rep(pdensity, SX) } else { pdensities <- pdensity }
  if (length(pangle) < SX) { pangles <- rep(pangle, SX)} else { pangles <- pangle }
  if (length(pfcol) < SX) { pfcols <- rep(pfcol, SX) } else { pfcols <- pfcol }
  for (i in 3:series) {
    xxs <- xx
    yys <- yy
    scale <- CGap/(seg+CGap)+(df[i,]-df[2,])/(df[1,]-df[2,])*seg/(seg+CGap)
    if (sum(!is.na(df[i,]))<3) { cat(sprintf("[DATA NOT ENOUGH] at %d\n%g\n",i,df[i,])) # for too many NA's (1.2.2012)
    } else {
      for (j in 1:n) {
        if (is.na(df[i, j])) { # how to treat NA
          if (na.itp) { # treat NA using interpolation
            left <- ifelse(j>1, j-1, n)
            while (is.na(df[i, left])) {
              left <- ifelse(left>1, left-1, n)
            }
            right <- ifelse(j<n, j+1, 1)
            while (is.na(df[i, right])) {
              right <- ifelse(right<n, right+1, 1)
            }
            xxleft <- xx[left]*CGap/(seg+CGap)+xx[left]*(df[i,left]-df[2,left])/(df[1,left]-df[2,left])*seg/(seg+CGap)
            yyleft <- yy[left]*CGap/(seg+CGap)+yy[left]*(df[i,left]-df[2,left])/(df[1,left]-df[2,left])*seg/(seg+CGap)
            xxright <- xx[right]*CGap/(seg+CGap)+xx[right]*(df[i,right]-df[2,right])/(df[1,right]-df[2,right])*seg/(seg+CGap)
            yyright <- yy[right]*CGap/(seg+CGap)+yy[right]*(df[i,right]-df[2,right])/(df[1,right]-df[2,right])*seg/(seg+CGap)
            if (xxleft > xxright) {
              xxtmp <- xxleft; yytmp <- yyleft;
              xxleft <- xxright; yyleft <- yyright;
              xxright <- xxtmp; yyright <- yytmp;
            }
            xxs[j] <- xx[j]*(yyleft*xxright-yyright*xxleft)/(yy[j]*(xxright-xxleft)-xx[j]*(yyright-yyleft))
            yys[j] <- (yy[j]/xx[j])*xxs[j]
          } else { # treat NA as zero (origin)
            xxs[j] <- 0
            yys[j] <- 0
          }
        }
        else {
          xxs[j] <- xx[j]*CGap/(seg+CGap)+xx[j]*(df[i, j]-df[2, j])/(df[1, j]-df[2, j])*seg/(seg+CGap)
          yys[j] <- yy[j]*CGap/(seg+CGap)+yy[j]*(df[i, j]-df[2, j])/(df[1, j]-df[2, j])*seg/(seg+CGap)
        }
      }
      if (is.null(pdensities)) {
        polygon(xxs, yys, lty=pltys[i-2], lwd=plwds[i-2], border=pcols[i-2], col=pfcols[i-2])
      } else {
        polygon(xxs, yys, lty=pltys[i-2], lwd=plwds[i-2], border=pcols[i-2], 
         density=pdensities[i-2], angle=pangles[i-2], col=pfcols[i-2])
      }
      points(xx*scale, yy*scale, pch=ptys[i-2], col=pcols[i-2])
    }
  }
}

In [4]:
# Loading UKBB summarized data and separating it based on ROI
all_data_summary <- read.csv("~/linear_regression/UKBB_all_data_summary_bmi_covariate.csv")
caudate_data_summary <- all_data_summary[all_data_summary$region == 'caudate',]
putamen_data_summary <- all_data_summary[all_data_summary$region == 'putamen',] 
nacc_data_summary <- all_data_summary[all_data_summary$region == 'nacc',] 
hippocampus_data_summary <- all_data_summary[all_data_summary$region == 'hippocampus',] 
pallidum_data_summary <- all_data_summary[all_data_summary$region == 'pallidum',] 
full_hypo_data_summary <- all_data_summary[all_data_summary$region == 'full_hypo',] 

# Adjusting the p-value separately for each ROI to account for multiple comparisons
caudate_data_summary$p_fdr = p.adjust(caudate_data_summary$Pr...t.., method="fdr")
putamen_data_summary$p_fdr = p.adjust(putamen_data_summary$Pr...t.., method="fdr")
nacc_data_summary$p_fdr = p.adjust(nacc_data_summary$Pr...t.., method="fdr")
hippocampus_data_summary$p_fdr = p.adjust(hippocampus_data_summary$Pr...t.., method="fdr")
pallidum_data_summary$p_fdr = p.adjust(pallidum_data_summary$Pr...t.., method="fdr")
full_hypo_data_summary$p_fdr = p.adjust(full_hypo_data_summary$Pr...t.., method="fdr")

# Rebinding all the data into one dataframe
all_data_summary <- data.frame()
all_data_summary <- bind_rows(caudate_data_summary, putamen_data_summary)
all_data_summary <- bind_rows(all_data_summary, nacc_data_summary)
all_data_summary <- bind_rows(all_data_summary, hippocampus_data_summary)
all_data_summary <- bind_rows(all_data_summary, pallidum_data_summary)
all_data_summary <- bind_rows(all_data_summary, full_hypo_data_summary)


In [5]:
# Defining variables for looping through dataframe columns and for radarchart titles
brain_regions <- c("caudate", "putamen", "nacc", "hippocampus", "pallidum", "full_hypo")
regions <- c("Caudate", "Putamen", "Nucleus Accumbens", "Hippocampus", "Pallidum", "Hypothalamus")

In [6]:
all_data_summary

X,Estimate,Std..Error,t.value,Pr...t..,metric,region,measure,p_fdr
<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<dbl>
ad _ caudate _ c.reactive_protein_30710.0.0,4.244933e-06,9.488018e-07,4.4739933,7.720244e-06,ad,caudate,BMI,3.088098e-05
ad _ caudate _ glucose_30740.0.0,3.762371e-06,1.945969e-06,1.9334174,5.320000e-02,ad,caudate,CRP,6.604137e-02
ad _ caudate _ glycated_haemoglobin_hba1c_30750.0.0,6.405129e-07,2.902144e-07,2.2070336,2.732277e-02,ad,caudate,Glucose,4.276607e-02
ad _ caudate _ cholesterol_30690.0.0,3.143001e-06,9.586258e-07,3.2786522,1.044744e-03,ad,caudate,HBA1C,2.893138e-03
ad _ caudate _ hdl_cholesterol_30760.0.0,7.961500e-06,3.277328e-06,2.4292657,1.513864e-02,ad,caudate,Cholesterol,2.477231e-02
ad _ caudate _ ldl_direct_30780.0.0,2.706446e-06,1.242041e-06,2.1790303,2.934057e-02,ad,caudate,HDL_Chol,4.401085e-02
ad _ caudate _ triglycerides_30870.0.0,4.920195e-06,1.410702e-06,3.4877649,4.880981e-04,ad,caudate,LDL_Chol,1.464294e-03
ad _ caudate _ systolic_blood_pressure_automated_reading_4080.0.1,3.386402e-07,6.225101e-08,5.4399157,5.391552e-08,ad,caudate,Triglycerides,2.772798e-07
ad _ caudate _ diastolic_blood_pressure_automated_reading_4079.0.1,6.583806e-07,1.053131e-07,6.2516518,4.141315e-10,ad,caudate,Systolic_BP,2.981747e-09
fa _ caudate _ c.reactive_protein_30710.0.0,-1.379000e-04,9.531820e-05,-1.4467331,1.479866e-01,fa,caudate,BMI,1.566917e-01


In [5]:
# Separating dataframe based on obesity-related parameters of interest
CRP_data <- all_data_summary[all_data_summary$measure == 'CRP',] 
glucose_data <- all_data_summary[all_data_summary$measure == 'Glucose',] 
hba1c_data <- all_data_summary[all_data_summary$measure == 'HBA1C',] 
chol_data <- all_data_summary[all_data_summary$measure == 'Cholesterol',] 
hdl_chol_data <- all_data_summary[all_data_summary$measure == 'HDL_Chol',] 
ldl_chol_data <- all_data_summary[all_data_summary$measure == 'LDL_Chol',] 
triglycerides_data <- all_data_summary[all_data_summary$measure == 'Triglycerides',] 
sys_bp_data <- all_data_summary[all_data_summary$measure == 'Systolic_BP',] 
dia_bp_data <- all_data_summary[all_data_summary$measure == 'Diastolic_BP',] 

# Isolating caudate data for each DTI measure
ad_caudate_data <- all_data_summary[all_data_summary$metric == 'ad' & all_data_summary$region == 'caudate',] 
fa_caudate_data <- all_data_summary[all_data_summary$metric == 'fa' & all_data_summary$region == 'caudate',] 
md_caudate_data <- all_data_summary[all_data_summary$metric == 'md' & all_data_summary$region == 'caudate',] 
rd_caudate_data <- all_data_summary[all_data_summary$metric == 'rd' & all_data_summary$region == 'caudate',] 

# Isolating putamen data for each DTI measure
ad_putamen_data <- all_data_summary[all_data_summary$metric == 'ad' & all_data_summary$region == 'putamen',] 
fa_putamen_data <- all_data_summary[all_data_summary$metric == 'fa' & all_data_summary$region == 'putamen',] 
md_putamen_data <- all_data_summary[all_data_summary$metric == 'md' & all_data_summary$region == 'putamen',] 
rd_putamen_data <- all_data_summary[all_data_summary$metric == 'rd' & all_data_summary$region == 'putamen',]

# Isolating nacc data for each DTI measure
ad_nacc_data <- all_data_summary[all_data_summary$metric == 'ad' & all_data_summary$region == 'nacc',] 
fa_nacc_data <- all_data_summary[all_data_summary$metric == 'fa' & all_data_summary$region == 'nacc',] 
md_nacc_data <- all_data_summary[all_data_summary$metric == 'md' & all_data_summary$region == 'nacc',] 
rd_nacc_data <- all_data_summary[all_data_summary$metric == 'rd' & all_data_summary$region == 'nacc',]

# Isolating hippocampus data for each DTI measure
ad_hippocampus_data <- all_data_summary[all_data_summary$metric == 'ad' & all_data_summary$region == 'hippocampus',] 
fa_hippocampus_data <- all_data_summary[all_data_summary$metric == 'fa' & all_data_summary$region == 'hippocampus',] 
md_hippocampus_data <- all_data_summary[all_data_summary$metric == 'md' & all_data_summary$region == 'hippocampus',] 
rd_hippocampus_data <- all_data_summary[all_data_summary$metric == 'rd' & all_data_summary$region == 'hippocampus',]

# Isolating pallidum data for each DTI measure
ad_pallidum_data <- all_data_summary[all_data_summary$metric == 'ad' & all_data_summary$region == 'pallidum',] 
fa_pallidum_data <- all_data_summary[all_data_summary$metric == 'fa' & all_data_summary$region == 'pallidum',] 
md_pallidum_data <- all_data_summary[all_data_summary$metric == 'md' & all_data_summary$region == 'pallidum',] 
rd_pallidum_data <- all_data_summary[all_data_summary$metric == 'rd' & all_data_summary$region == 'pallidum',]

# Isolating hypothalmus data for each DTI measure
ad_full_hypo_data <- all_data_summary[all_data_summary$metric == 'ad' & all_data_summary$region == 'full_hypo',] 
fa_full_hypo_data <- all_data_summary[all_data_summary$metric == 'fa' & all_data_summary$region == 'full_hypo',] 
md_full_hypo_data <- all_data_summary[all_data_summary$metric == 'md' & all_data_summary$region == 'full_hypo',] 
rd_full_hypo_data <- all_data_summary[all_data_summary$metric == 'rd' & all_data_summary$region == 'full_hypo',]

In [6]:
# Define lists and variables for radar charts
vars <- c("BMI", "CRP", "Glucose", "HBA1C", "Cholesterol", "HDL_Chol", "LDL_Chol", "Triglycerides", "Systolic_BP", "Diastolic_BP")
region_data <- list()
region_data_summary <- list()
spider_plots <- list()

# Open TIFF file
tiff('~/scratch/tractoflow_hcp_dwi/spider_plots/spider_plots_ukbb.tiff', width=8000, height=5000, res=300)

# Set plot parameters
par(mar=c(3,3,3,3))
par(mfrow = c(2, 3))

# Loop through brain regions
for (i in 1:length(brain_regions)) {
    # Subset data based on FDR significance
    significant_data <- subset(get(paste(brain_regions[i],"_data_summary",sep="")), p_fdr> 0.05)
    # Calculate min and max t-values
    min_t_val<- min(significant_data$t.value, na.rm = TRUE)
    max_t_val<- max(significant_data$t.value, na.rm = TRUE)
    # Create data frames for min and max t-values
    t_min <- data.frame(t(data.frame(replicate(length(vars), min_t_val))))
    colnames(t_min) <- vars
    t_max <- data.frame(t(data.frame(replicate(length(vars), max_t_val))))
    colnames(t_max) <- vars
    
    # Create a data frame for max and min values
    max_t <- max(get(paste(brain_regions[i],"_data_summary",sep=""))$t.value)
    min_t <- min(get(paste(brain_regions[i],"_data_summary",sep=""))$t.value)
    max_min <- data.frame(
    BMI = c(max_t, min_t), CRP = c(max_t, min_t), Glucose = c(max_t, min_t), 
    HBA1C = c(max_t, min_t), Cholesterol = c(max_t, min_t), HDL_Chol = c(max_t, min_t),
    LDL_Chol = c(max_t, min_t), Triglycerides = c(max_t, min_t), 
    Systolic_BP = c(max_t, min_t), Diastolic_BP = c(max_t, min_t))
    
    # Create data frames for region-specific t-values
    region_data[[1]] <- data.frame(t(get(paste("ad_",brain_regions[i],"_data",sep=""))$t.value))
    colnames(region_data[[1]]) = vars
    region_data[[2]] <- data.frame(t(get(paste("fa_",brain_regions[i],"_data",sep=""))$t.value))
    colnames(region_data[[2]]) = vars
    region_data[[3]] <- data.frame(t(get(paste("md_",brain_regions[i],"_data",sep=""))$t.value))
    colnames(region_data[[3]]) = vars
    region_data[[4]] <- data.frame(t(get(paste("rd_",brain_regions[i],"_data",sep=""))$t.value))
    colnames(region_data[[4]]) = vars

    # Combine region-specific t-values into a summary data frame
    region_data_summary[[i]] <- bind_rows(max_min, region_data[[1]])
    region_data_summary[[i]] <- bind_rows(region_data_summary[i], region_data[[2]])
    region_data_summary[[i]] <- bind_rows(region_data_summary[i], region_data[[3]])
    region_data_summary[[i]] <- bind_rows(region_data_summary[i], region_data[[4]])
    region_data_summary[[i]] <- bind_rows(region_data_summary[i], t_min)
    region_data_summary[[i]] <- bind_rows(region_data_summary[i], t_max)
    rownames(region_data_summary[[i]]) = c("Max", "Min", "AD", "FA", "MD", "RD", "Min_T", "Max_T")
    colnames(region_data_summary[[i]]) = c("BMI", "CRP", "Glc", "HbA1c", "Cholesterol", "HDL", "LDL", "TG", "SBP", "DBP")
    data <- region_data_summary[[i]][c("Max", "Min", "Max_T", "Min_T", "AD", "FA", "MD", "RD"), ]

    # Define colors to be used in radar charts
    my_colors <- c( "black", "black","#ED64C9", "#FAA46A", "#944BE3", "#62C0FE")

    # Generate radar charts
    spider_plots[[i]] <- radarchart(
      data,  
      axistype = 1, 
      caxislabels = round(seq(min_t, max_t, ((max_t - min_t) / 4))), 
      title = paste(regions[i]),
      cex.main=3,
      pcol = my_colors,
      pfcol = c(NA,NA,NA, NA, NA, NA),
      pty = c(32 ,32 ,16, 16, 16, 16),
      plwd = c(2,2,4,4,4,4),  
      plty = c(1,1,3,3,3,3),
      cglcol = "grey", 
      cglty = 1,  
      cglwd = 0.8,
      axislabcol='black',
      seg=4,
      calcex=2.5,
      vlcex=2.8
    )

    # Add legend
    legend(
      "bottom", 
      legend = rownames(data[-c(1, 2, 3, 4), ]),
      fill = my_colors[3:6],
      bty = "n",
      ncol = length(rownames(data[-c(1, 2, 3, 4), ])),  
      cex = 2.5,
      inset = c(0, -0.06), 
      xpd = TRUE  
    )
}

# Close TIFF file
dev.off()

In [17]:
# Write data summary as a CSV
write.csv(all_data_summary, "scratch/tractoflow_hcp_dwi/spider_plots/UKBB_data_final.csv")