In [2]:
# Download necessary packages
library(ggplot2)
library(psych)
library(dplyr)
library(fmsb)
library(ggradar)
library(plotrix)

In [3]:
# Extract patient data and patient ROI data
data <- read.csv("~/scratch//tractoflow_hcp_dwi/UKB_WM_Regionwise_Data_allAtlases_allMeasures.csv")
patient_data <- read.csv("~/scratch//tractoflow_hcp_dwi/UKBB_patient_data.csv")

In [4]:
# Subset data frame based on DTI measure
ad_all_data <- data[data$WM_Measure == 'AD', ]
fa_all_data <- data[data$WM_Measure == 'FA', ]
md_all_data <- data[data$WM_Measure == 'MD', ]
rd_all_data <- data[data$WM_Measure == 'RD', ]

In [5]:
# Find all unique values per DTI-based data frame
vec1 <- unique(ad_all_data$Subject_ID)
vec2 <- unique(fa_all_data$Subject_ID)
vec3 <- unique(md_all_data$Subject_ID)
vec4 <- unique(rd_all_data$Subject_ID)

# Find the values that are in vec1 but not in vec2
missing_in_df2 <- setdiff(vec1, vec2)

# Find the values that are in vec2 but not in vec1
missing_in_df1 <- setdiff(vec2, vec1)

# Find the values that are in vec3 but not in vec4
missing_in_df4 <- setdiff(vec3, vec4)

# Find the values that are in vec4 but not in vec3
missing_in_df3 <- setdiff(vec4, vec3)

# Combine the missing values from both directions
all_missing_values <- unique(c(missing_in_df1, missing_in_df2, missing_in_df3, missing_in_df4))

In [6]:
indices_to_remove_df1 <- which(fa_all_data$Subject_ID %in% all_missing_values)

# Remove rows from df1
fa_all_data <- fa_all_data[-indices_to_remove_df1, ]

# Find the indices of rows to be removed in df2
indices_to_remove_df2 <- which(md_all_data$Subject_ID %in% all_missing_values)

# Remove rows from df2
md_all_data <- md_all_data[-indices_to_remove_df2, ]

# Find the indices of rows to be removed in df3
indices_to_remove_df3 <- which(rd_all_data$Subject_ID %in% all_missing_values)

# Remove rows from df3
rd_all_data <- rd_all_data[-indices_to_remove_df3, ]

In [7]:
# Define variables with relevant ROIs
ad_data <- ad_all_data[c("Subject_ID", "WM_Measure", "left_caudate", "right_caudate", "left_putamen", "right_putamen", "left_nucleus_accumbens", "right_nucleus_accumbens", "left_hippocampus", "right_hippocampus", "left_globus_pallidus_externa", "right_globus_pallidus_externa", "left_globus_pallidus_interna", "right_globus_pallidus_Interna")]
fa_data <- fa_all_data[c("Subject_ID", "WM_Measure", "left_caudate", "right_caudate", "left_putamen", "right_putamen", "left_nucleus_accumbens", "right_nucleus_accumbens", "left_hippocampus", "right_hippocampus", "left_globus_pallidus_externa", "right_globus_pallidus_externa", "left_globus_pallidus_interna", "right_globus_pallidus_Interna")]
md_data <- md_all_data[c("Subject_ID", "WM_Measure", "left_caudate", "right_caudate", "left_putamen", "right_putamen", "left_nucleus_accumbens", "right_nucleus_accumbens", "left_hippocampus", "right_hippocampus", "left_globus_pallidus_externa", "right_globus_pallidus_externa", "left_globus_pallidus_interna", "right_globus_pallidus_Interna")]
rd_data <- rd_all_data[c("Subject_ID", "WM_Measure", "left_caudate", "right_caudate", "left_putamen", "right_putamen", "left_nucleus_accumbens", "right_nucleus_accumbens", "left_hippocampus", "right_hippocampus", "left_globus_pallidus_externa", "right_globus_pallidus_externa", "left_globus_pallidus_interna", "right_globus_pallidus_Interna")]

In [8]:
# Calculate average value for each ROI for AD data
ad_data$caudate <- rowMeans(ad_data[c("left_caudate", "right_caudate")], na.rm = TRUE)
ad_data$putamen <- rowMeans(ad_data[c("left_putamen", "right_putamen")], na.rm = TRUE)
ad_data$nacc <- rowMeans(ad_data[c("left_nucleus_accumbens", "right_nucleus_accumbens")], na.rm = TRUE)
ad_data$hippocampus <- rowMeans(ad_data[c("left_hippocampus", "right_hippocampus")], na.rm = TRUE)
ad_data$pallidum <- rowMeans(ad_data[c("left_globus_pallidus_externa", "right_globus_pallidus_externa", "left_globus_pallidus_interna", "right_globus_pallidus_Interna")], na.rm = TRUE)

# Calculate average value for each ROI for FA data
fa_data$caudate <- rowMeans(fa_data[c("left_caudate", "right_caudate")], na.rm = TRUE)
fa_data$putamen <- rowMeans(fa_data[c("left_putamen", "right_putamen")], na.rm = TRUE)
fa_data$nacc <- rowMeans(fa_data[c("left_nucleus_accumbens", "right_nucleus_accumbens")], na.rm = TRUE)
fa_data$hippocampus <- rowMeans(fa_data[c("left_hippocampus", "right_hippocampus")], na.rm = TRUE)
fa_data$pallidum <- rowMeans(fa_data[c("left_globus_pallidus_externa", "right_globus_pallidus_externa", "left_globus_pallidus_interna", "right_globus_pallidus_Interna")], na.rm = TRUE)

# Calculate average value for each ROI for MD data
md_data$caudate <- rowMeans(md_data[c("left_caudate", "right_caudate")], na.rm = TRUE)
md_data$putamen <- rowMeans(md_data[c("left_putamen", "right_putamen")], na.rm = TRUE)
md_data$nacc <- rowMeans(md_data[c("left_nucleus_accumbens", "right_nucleus_accumbens")], na.rm = TRUE)
md_data$hippocampus <- rowMeans(md_data[c("left_hippocampus", "right_hippocampus")], na.rm = TRUE)
md_data$pallidum <- rowMeans(md_data[c("left_globus_pallidus_externa", "right_globus_pallidus_externa", "left_globus_pallidus_interna", "right_globus_pallidus_Interna")], na.rm = TRUE)

# Calculate average value for each ROI for RD data
rd_data$caudate <- rowMeans(rd_data[c("left_caudate", "right_caudate")], na.rm = TRUE)
rd_data$putamen <- rowMeans(rd_data[c("left_putamen", "right_putamen")], na.rm = TRUE)
rd_data$nacc <- rowMeans(rd_data[c("left_nucleus_accumbens", "right_nucleus_accumbens")], na.rm = TRUE)
rd_data$hippocampus <- rowMeans(rd_data[c("left_hippocampus", "right_hippocampus")], na.rm = TRUE)
rd_data$pallidum <- rowMeans(rd_data[c("left_globus_pallidus_externa", "right_globus_pallidus_externa", "left_globus_pallidus_interna", "right_globus_pallidus_Interna")], na.rm = TRUE)

In [9]:
# Remove outliers for AD data
Q3_c=as.numeric(quantile(ad_data$caudate,0.75, na.rm=TRUE))
Q1_c=as.numeric(quantile(ad_data$caudate,0.25, na.rm=TRUE))
upper_c=Q3_c+(2.2*(Q3_c-Q1_c))
lower_c=Q1_c-(2.2*(Q3_c-Q1_c))
Q3_pu=as.numeric(quantile(ad_data$putamen,0.75, na.rm=TRUE))
Q1_pu=as.numeric(quantile(ad_data$putamen,0.25, na.rm=TRUE))
upper_pu=Q3_pu+(2.2*(Q3_pu-Q1_pu))
lower_pu=Q1_pu-(2.2*(Q3_pu-Q1_pu))
Q3_n=as.numeric(quantile(ad_data$nacc,0.75, na.rm=TRUE))
Q1_n=as.numeric(quantile(ad_data$nacc,0.25, na.rm=TRUE))
upper_n=Q3_n+(2.2*(Q3_n-Q1_n))
lower_n=Q1_n-(2.2*(Q3_n-Q1_n))
Q3_h=as.numeric(quantile(ad_data$hippocampus,0.75, na.rm=TRUE))
Q1_h=as.numeric(quantile(ad_data$hippocampus,0.25, na.rm=TRUE))
upper_h=Q3_h+(2.2*(Q3_h-Q1_h))
lower_h=Q1_h-(2.2*(Q3_h-Q1_h))
Q3_pa=as.numeric(quantile(ad_data$pallidum,0.75, na.rm=TRUE))
Q1_pa=as.numeric(quantile(ad_data$pallidum,0.25, na.rm=TRUE))
upper_pa=Q3_pa+(2.2*(Q3_pa-Q1_pa))
lower_pa=Q1_pa-(2.2*(Q3_pa-Q1_pa))
ad_data$caudate[ad_data$caudate<lower_c]=NA
ad_data$caudate[ad_data$caudate>upper_c]=NA
ad_data$caudate[ad_data$putamen<lower_pu]=NA
ad_data$caudate[ad_data$putamen>upper_pu]=NA
ad_data$caudate[ad_data$nacc<lower_n]=NA
ad_data$caudate[ad_data$nacc>upper_n]=NA
ad_data$caudate[ad_data$hippocampus<lower_h]=NA
ad_data$caudate[ad_data$hippocampus>upper_h]=NA
ad_data$caudate[ad_data$pallidum<lower_pa]=NA
ad_data$caudate[ad_data$pallidum>upper_pa]=NA

# Remove outliers for FA data
Q3_c=as.numeric(quantile(fa_data$caudate,0.75, na.rm=TRUE))
Q1_c=as.numeric(quantile(fa_data$caudate,0.25, na.rm=TRUE))
upper_c=Q3_c+(2.2*(Q3_c-Q1_c))
lower_c=Q1_c-(2.2*(Q3_c-Q1_c))
Q3_pu=as.numeric(quantile(fa_data$putamen,0.75, na.rm=TRUE))
Q1_pu=as.numeric(quantile(fa_data$putamen,0.25, na.rm=TRUE))
upper_pu=Q3_pu+(2.2*(Q3_pu-Q1_pu))
lower_pu=Q1_pu-(2.2*(Q3_pu-Q1_pu))
Q3_n=as.numeric(quantile(fa_data$nacc,0.75, na.rm=TRUE))
Q1_n=as.numeric(quantile(fa_data$nacc,0.25, na.rm=TRUE))
upper_n=Q3_n+(2.2*(Q3_n-Q1_n))
lower_n=Q1_n-(2.2*(Q3_n-Q1_n))
Q3_h=as.numeric(quantile(fa_data$hippocampus,0.75, na.rm=TRUE))
Q1_h=as.numeric(quantile(fa_data$hippocampus,0.25, na.rm=TRUE))
upper_h=Q3_h+(2.2*(Q3_h-Q1_h))
lower_h=Q1_h-(2.2*(Q3_h-Q1_h))
Q3_pa=as.numeric(quantile(fa_data$pallidum,0.75, na.rm=TRUE))
Q1_pa=as.numeric(quantile(fa_data$pallidum,0.25, na.rm=TRUE))
upper_pa=Q3_pa+(2.2*(Q3_pa-Q1_pa))
lower_pa=Q1_pa-(2.2*(Q3_pa-Q1_pa))
fa_data$caudate[fa_data$caudate<lower_c]=NA
fa_data$caudate[fa_data$caudate>upper_c]=NA
fa_data$caudate[fa_data$putamen<lower_pu]=NA
fa_data$caudate[fa_data$putamen>upper_pu]=NA
fa_data$caudate[fa_data$nacc<lower_n]=NA
fa_data$caudate[fa_data$nacc>upper_n]=NA
fa_data$caudate[fa_data$hippocampus<lower_h]=NA
fa_data$caudate[fa_data$hippocampus>upper_h]=NA
fa_data$caudate[fa_data$pallidum<lower_pa]=NA
fa_data$caudate[fa_data$pallidum>upper_pa]=NA

# Remove outliers for MD data
Q3_c=as.numeric(quantile(md_data$caudate,0.75, na.rm=TRUE))
Q1_c=as.numeric(quantile(md_data$caudate,0.25, na.rm=TRUE))
upper_c=Q3_c+(2.2*(Q3_c-Q1_c))
lower_c=Q1_c-(2.2*(Q3_c-Q1_c))
Q3_pu=as.numeric(quantile(md_data$putamen,0.75, na.rm=TRUE))
Q1_pu=as.numeric(quantile(md_data$putamen,0.25, na.rm=TRUE))
upper_pu=Q3_pu+(2.2*(Q3_pu-Q1_pu))
lower_pu=Q1_pu-(2.2*(Q3_pu-Q1_pu))
Q3_n=as.numeric(quantile(md_data$nacc,0.75, na.rm=TRUE))
Q1_n=as.numeric(quantile(md_data$nacc,0.25, na.rm=TRUE))
upper_n=Q3_n+(2.2*(Q3_n-Q1_n))
lower_n=Q1_n-(2.2*(Q3_n-Q1_n))
Q3_h=as.numeric(quantile(md_data$hippocampus,0.75, na.rm=TRUE))
Q1_h=as.numeric(quantile(md_data$hippocampus,0.25, na.rm=TRUE))
upper_h=Q3_h+(2.2*(Q3_h-Q1_h))
lower_h=Q1_h-(2.2*(Q3_h-Q1_h))
Q3_pa=as.numeric(quantile(md_data$pallidum,0.75, na.rm=TRUE))
Q1_pa=as.numeric(quantile(md_data$pallidum,0.25, na.rm=TRUE))
upper_pa=Q3_pa+(2.2*(Q3_pa-Q1_pa))
lower_pa=Q1_pa-(2.2*(Q3_pa-Q1_pa))
md_data$caudate[md_data$caudate<lower_c]=NA
md_data$caudate[md_data$caudate>upper_c]=NA
md_data$caudate[md_data$putamen<lower_pu]=NA
md_data$caudate[md_data$putamen>upper_pu]=NA
md_data$caudate[md_data$nacc<lower_n]=NA
md_data$caudate[md_data$nacc>upper_n]=NA
md_data$caudate[md_data$hippocampus<lower_h]=NA
md_data$caudate[md_data$hippocampus>upper_h]=NA
md_data$caudate[md_data$pallidum<lower_pa]=NA
md_data$caudate[md_data$pallidum>upper_pa]=NA

# Remove outliers for RD data
Q3_c=as.numeric(quantile(rd_data$caudate,0.75, na.rm=TRUE))
Q1_c=as.numeric(quantile(rd_data$caudate,0.25, na.rm=TRUE))
upper_c=Q3_c+(2.2*(Q3_c-Q1_c))
lower_c=Q1_c-(2.2*(Q3_c-Q1_c))
Q3_pu=as.numeric(quantile(rd_data$putamen,0.75, na.rm=TRUE))
Q1_pu=as.numeric(quantile(rd_data$putamen,0.25, na.rm=TRUE))
upper_pu=Q3_pu+(2.2*(Q3_pu-Q1_pu))
lower_pu=Q1_pu-(2.2*(Q3_pu-Q1_pu))
Q3_n=as.numeric(quantile(rd_data$nacc,0.75, na.rm=TRUE))
Q1_n=as.numeric(quantile(rd_data$nacc,0.25, na.rm=TRUE))
upper_n=Q3_n+(2.2*(Q3_n-Q1_n))
lower_n=Q1_n-(2.2*(Q3_n-Q1_n))
Q3_h=as.numeric(quantile(rd_data$hippocampus,0.75, na.rm=TRUE))
Q1_h=as.numeric(quantile(rd_data$hippocampus,0.25, na.rm=TRUE))
upper_h=Q3_h+(2.2*(Q3_h-Q1_h))
lower_h=Q1_h-(2.2*(Q3_h-Q1_h))
Q3_pa=as.numeric(quantile(rd_data$pallidum,0.75, na.rm=TRUE))
Q1_pa=as.numeric(quantile(rd_data$pallidum,0.25, na.rm=TRUE))
upper_pa=Q3_pa+(2.2*(Q3_pa-Q1_pa))
lower_pa=Q1_pa-(2.2*(Q3_pa-Q1_pa))
rd_data$caudate[rd_data$caudate<lower_c]=NA
rd_data$caudate[rd_data$caudate>upper_c]=NA
rd_data$caudate[rd_data$putamen<lower_pu]=NA
rd_data$caudate[rd_data$putamen>upper_pu]=NA
rd_data$caudate[rd_data$nacc<lower_n]=NA
rd_data$caudate[rd_data$nacc>upper_n]=NA
rd_data$caudate[rd_data$hippocampus<lower_h]=NA
rd_data$caudate[rd_data$hippocampus>upper_h]=NA
rd_data$caudate[rd_data$pallidum<lower_pa]=NA
rd_data$caudate[rd_data$pallidum>upper_pa]=NA

In [10]:
# Rename column names to patient DTI data frames
colnames(patient_data)[colnames(patient_data) == "eid"] <- "Subject_ID"

In [11]:
# Merge DTI data frames with relevant patient variables
subj_data <- patient_data[c("Subject_ID", "sex_31.0.0", "age_when_attended_assessment_centre_21003.2.0", "volume_of_estimatedtotalintracranial_whole_brain_26521.2.0", "uk_biobank_assessment_centre_54.2.0", "mean_tfmri_head_motion_averaged_across_space_and_time_points_25742.2.0", "mean_rfmri_head_motion_averaged_across_space_and_time_points_25741.2.0", "date_of_attending_assessment_centre_53.2.0", "body_mass_index_bmi_21001.2.0", "c.reactive_protein_30710.0.0", "glucose_30740.0.0", "glycated_haemoglobin_hba1c_30750.0.0", "cholesterol_30690.0.0", "hdl_cholesterol_30760.0.0", "ldl_direct_30780.0.0", "triglycerides_30870.0.0", "systolic_blood_pressure_automated_reading_4080.0.1", "diastolic_blood_pressure_automated_reading_4079.0.1")]
ad_subj_data=merge(subj_data, ad_data)
fa_subj_data=merge(subj_data, fa_data)
md_subj_data=merge(subj_data, md_data)
rd_subj_data=merge(subj_data, rd_data)

In [12]:
# Extract previously calculated hypothalamus data from scatter_plots_UKBB.ipynb
hypo_ad <- read.csv("~/linear_regression/UKBB_ad_data.csv")
hypo_fa <- read.csv("~/linear_regression/UKBB_fa_data.csv")
hypo_md <- read.csv("~/linear_regression/UKBB_md_data.csv")
hypo_rd <- read.csv("~/linear_regression/UKBB_rd_data.csv")

In [13]:
indices_to_remove_df1 <- which(hypo_fa$eid %in% all_missing_values)

# Remove rows from df1
hypo_fa <- hypo_fa[-indices_to_remove_df1, ]

# Find the indices of rows to be removed in df2
indices_to_remove_df2 <- which(hypo_md$eid %in% all_missing_values)

# Remove rows from df2
hypo_md <- hypo_md[-indices_to_remove_df2, ]

# Find the indices of rows to be removed in df3
indices_to_remove_df3 <- which(hypo_rd$eid %in% all_missing_values)

# Remove rows from df3
hypo_rd <- hypo_rd[-indices_to_remove_df3, ]

In [18]:
# Add hypothalamic values to subject data frames
ad_subj_data$full_hypo <- hypo_ad$full_hypo
fa_subj_data$full_hypo <- hypo_fa$full_hypo
md_subj_data$full_hypo <- hypo_md$full_hypo
rd_subj_data$full_hypo <- hypo_rd$full_hypo

ERROR: Error in `$<-.data.frame`(`*tmp*`, full_hypo, value = numeric(0)): replacement has 0 rows, data has 26242


In [19]:
# Remove outliers for hypothalamic AD data
Q3=as.numeric(quantile(ad_subj_data$full_hypo,0.75, na.rm=TRUE))
Q1=as.numeric(quantile(ad_subj_data$full_hypo,0.25, na.rm=TRUE))
upper=Q3+(2.2*(Q3-Q1))
lower=Q1-(2.2*(Q3-Q1))
ad_subj_data$full_hypo[ad_subj_data$full_hypo<lower]=NA
ad_subj_data$full_hypo[ad_subj_data$full_hypo>upper]=NA

# Remove outliers for hypothalamic Fa data
Q3=as.numeric(quantile(fa_subj_data$full_hypo,0.75, na.rm=TRUE))
Q1=as.numeric(quantile(fa_subj_data$full_hypo,0.25, na.rm=TRUE))
upper=Q3+(2.2*(Q3-Q1))
lower=Q1-(2.2*(Q3-Q1))
fa_subj_data$full_hypo[fa_subj_data$full_hypo<lower]=NA
fa_subj_data$full_hypo[fa_subj_data$full_hypo>upper]=NA

# Remove outliers for hypothalamic MD data
Q3=as.numeric(quantile(md_subj_data$full_hypo,0.75, na.rm=TRUE))
Q1=as.numeric(quantile(md_subj_data$full_hypo,0.25, na.rm=TRUE))
upper=Q3+(2.2*(Q3-Q1))
lower=Q1-(2.2*(Q3-Q1))
md_subj_data$full_hypo[md_subj_data$full_hypo<lower]=NA
md_subj_data$full_hypo[md_subj_data$full_hypo>upper]=NA

# Remove outliers for hypothalamic RD data
Q3=as.numeric(quantile(rd_subj_data$full_hypo,0.75, na.rm=TRUE))
Q1=as.numeric(quantile(rd_subj_data$full_hypo,0.25, na.rm=TRUE))
upper=Q3+(2.2*(Q3-Q1))
lower=Q1-(2.2*(Q3-Q1))
rd_subj_data$full_hypo[rd_subj_data$full_hypo<lower]=NA
rd_subj_data$full_hypo[rd_subj_data$full_hypo>upper]=NA

ERROR: Error in `$<-.data.frame`(`*tmp*`, full_hypo, value = logical(0)): replacement has 0 rows, data has 26242


In [20]:
# Write CSV files for DTI data frames
write.csv(ad_subj_data, "~/scratch/tractoflow_hcp_dwi/linear_regression/UKBB_ad_data_other.csv")
write.csv(fa_subj_data, "~/scratch/tractoflow_hcp_dwi/linear_regression/UKBB_fa_data_other.csv")
write.csv(md_subj_data, "~/scratch/tractoflow_hcp_dwi/linear_regression/UKBB_md_data_other.csv")
write.csv(rd_subj_data, "~/scratch/tractoflow_hcp_dwi/linear_regression/UKBB_rd_data_other.csv")

In [1]:
# It takes a while to load UKB_WM_Regionwise_Data_allAtlases_allMeasures.csv,
# so if the code needs to edited you can start from here by redefining the
# variables from the prewritten CSV files
ad_subj_data <- read.csv("~/linear_regression/UKBB_ad_data_other.csv")
fa_subj_data <- read.csv("~/linear_regression/UKBB_fa_data_other.csv")
md_subj_data <- read.csv("~/linear_regression/UKBB_md_data_other.csv")
rd_subj_data <- read.csv("~/linear_regression/UKBB_rd_data_other.csv")

In [24]:
ad_subj_data$gl

X,Subject_ID,sex_31.0.0,age_when_attended_assessment_centre_21003.2.0,volume_of_estimatedtotalintracranial_whole_brain_26521.2.0,uk_biobank_assessment_centre_54.2.0,mean_tfmri_head_motion_averaged_across_space_and_time_points_25742.2.0,mean_rfmri_head_motion_averaged_across_space_and_time_points_25741.2.0,date_of_attending_assessment_centre_53.2.0,body_mass_index_bmi_21001.2.0,⋯,left_globus_pallidus_externa,right_globus_pallidus_externa,left_globus_pallidus_interna,right_globus_pallidus_Interna,caudate,putamen,nacc,hippocampus,pallidum,full_hypo
<int>,<int>,<chr>,<int>,<int>,<chr>,<dbl>,<dbl>,<chr>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,1000394,Male,68,1873620,Cheadle (imaging),0.1290760,0.0972601,2019-01-17,25.9879,⋯,0.001009,0.000969,0.001011,0.001014,0.0014510,0.0009485,0.0010100,0.0011570,0.00100075,0.001763148
2,1000635,Male,67,1803160,Newcastle (imaging),0.2776660,,2017-12-21,,⋯,0.000879,0.000960,0.000973,0.001035,0.0011450,0.0009420,0.0009265,0.0010765,0.00096175,0.001639111
3,1001159,Female,49,1568080,Cheadle (imaging),,0.0848231,2016-12-10,37.0755,⋯,0.000843,0.000881,0.000907,0.000920,0.0009780,0.0008860,0.0008585,0.0010455,0.00088775,0.001557852
4,1001329,Male,69,1545120,Cheadle (imaging),0.2716030,0.1222250,2018-04-25,28.6979,⋯,0.001054,0.001110,0.001189,0.001232,,0.0010095,0.0009430,0.0011105,0.00114625,0.001716481
5,1001337,Male,65,1594040,Cheadle (imaging),0.1121660,0.1247030,2018-12-14,25.8546,⋯,0.000779,0.000820,0.000877,0.000870,0.0012440,0.0008670,0.0008840,0.0011300,0.00083650,0.001585148
6,1001523,Male,68,1724190,Reading (imaging),,0.0606534,2019-03-30,23.6854,⋯,0.000806,0.000975,0.000791,0.000860,0.0010915,0.0008640,0.0008940,0.0010830,0.00085800,0.001528704
7,1001598,Female,51,1569210,Newcastle (imaging),,0.1821780,2019-05-21,,⋯,0.000779,0.000883,0.000859,0.000849,0.0014430,0.0008815,0.0009405,0.0011855,0.00084250,0.002008704
8,1001645,Female,69,1495080,Newcastle (imaging),0.1400690,0.0925638,2018-05-03,20.2153,⋯,0.000938,0.000942,0.000897,0.000904,,0.0010635,0.0009045,0.0011600,0.00092025,0.001807519
9,1002545,Male,72,1706860,Cheadle (imaging),0.1723060,0.1297470,2019-05-06,24.9022,⋯,0.000954,0.000992,0.000979,0.000942,0.0012305,0.0010490,0.0009265,0.0010580,0.00096675,0.001801185
10,1002612,Male,67,1679210,Reading (imaging),0.1429010,0.0877905,2019-06-17,28.9686,⋯,0.000792,0.000944,0.000817,0.000980,0.0012165,0.0009155,0.0008795,0.0011385,0.00088325,0.001814481


In [20]:
# Define necessary variables
subject_data <- list(ad_subj_data, fa_subj_data, md_subj_data, rd_subj_data)
subject_datasets <- c("ad", "fa", "md", "rd")
brain_regions <- c("caudate", "putamen", "nacc", "hippocampus", "pallidum", "full_hypo")
vars <- c("CRP", "Glucose", "HBA1C", "Cholesterol", "HDL_Chol", "LDL_Chol", "Triglycerides", "Systolic_BP", "Diastolic_BP")
variables <- c("c.reactive_protein_30710.0.0", 
                        "glucose_30740.0.0", "glycated_haemoglobin_hba1c_30750.0.0", 
                        "cholesterol_30690.0.0", "hdl_cholesterol_30760.0.0", 
                        "ldl_direct_30780.0.0", "triglycerides_30870.0.0", 
                        "systolic_blood_pressure_automated_reading_4080.0.1", 
                        "diastolic_blood_pressure_automated_reading_4079.0.1")
all_data_summary <- data.frame()

In [21]:
# Loop through the DTI measure data frames
for (i in seq_along(subject_data)) {
    current_data <- subject_data[[i]]
    j=1
    # Loop through each brain region
    for (region in brain_regions) {
        k=1
        # Loop through each obesity-related parameter
        for (variable in variables) {
            # Calculate regression for current region, parameter, and data frame
            lm_model <- lm(get(region) ~ get(variable) + 
                           age_when_attended_assessment_centre_21003.2.0 + body_mass_index_bmi_21001.2.0 +
                           poly(age_when_attended_assessment_centre_21003.2.0, 2, raw=TRUE) * sex_31.0.0 +
                           poly(difftime(as.Date(date_of_attending_assessment_centre_53.2.0),
                                min(as.Date(date_of_attending_assessment_centre_53.2.0)), units='days'), 2, raw=TRUE) + 
                           uk_biobank_assessment_centre_54.2.0 + 
                           volume_of_estimatedtotalintracranial_whole_brain_26521.2.0, data = current_data)
            summary_model <- summary(lm_model)
            values <- data.frame(summary_model$coefficients[2,])
            values <- as.data.frame(t(values))
            # Create necessary columns to make the data frame more readable
            values$metric <- subject_datasets[i]
            values$region <- region
            values$measure <- vars[k]
            # Rename row names to relevant patient information
            row_name <- paste(subject_datasets[i],"_", region, "_", variable)
            rownames(values) <- row_name 
            all_data_summary <- rbind(all_data_summary, values)
            k=k+1
        j=j+1
        }
    }
}


In [22]:
# Write CSV file for summarized UKBB data
write.csv(all_data_summary, "~/linear_regression/UKBB_all_data_summary_bmi_covariate.csv")

In [23]:
all_data_summary

Unnamed: 0_level_0,Estimate,Std. Error,t value,Pr(>|t|),metric,region,measure
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>
ad _ caudate _ c.reactive_protein_30710.0.0,4.244933e-06,9.488018e-07,4.4739933,7.720244e-06,ad,caudate,CRP
ad _ caudate _ glucose_30740.0.0,3.762371e-06,1.945969e-06,1.9334174,5.320000e-02,ad,caudate,Glucose
ad _ caudate _ glycated_haemoglobin_hba1c_30750.0.0,6.405129e-07,2.902144e-07,2.2070336,2.732277e-02,ad,caudate,HBA1C
ad _ caudate _ cholesterol_30690.0.0,3.143001e-06,9.586258e-07,3.2786522,1.044744e-03,ad,caudate,Cholesterol
ad _ caudate _ hdl_cholesterol_30760.0.0,7.961500e-06,3.277328e-06,2.4292657,1.513864e-02,ad,caudate,HDL_Chol
ad _ caudate _ ldl_direct_30780.0.0,2.706446e-06,1.242041e-06,2.1790303,2.934057e-02,ad,caudate,LDL_Chol
ad _ caudate _ triglycerides_30870.0.0,4.920195e-06,1.410702e-06,3.4877649,4.880981e-04,ad,caudate,Triglycerides
ad _ caudate _ systolic_blood_pressure_automated_reading_4080.0.1,3.386402e-07,6.225101e-08,5.4399157,5.391552e-08,ad,caudate,Systolic_BP
ad _ caudate _ diastolic_blood_pressure_automated_reading_4079.0.1,6.583806e-07,1.053131e-07,6.2516518,4.141315e-10,ad,caudate,Diastolic_BP
ad _ putamen _ c.reactive_protein_30710.0.0,-1.776700e-05,2.193484e-05,-0.8099898,4.179548e-01,ad,putamen,CRP
