In [None]:
# Script to perform statistical analysis of socioeconomic status in children by mixed linear models
    
Author: Jose Jaime Martinez-Magana
Day: 13 February 2023

This script will perform linear models with qced epigenetic data from 450K arrays, usign the steps for quality control described in this github: https://github.com/martinezjaime/ewas_saliva_ses/blob/main/qc_data/probe_and_sample_quality_control.ipynb

In [None]:
# if you have slurm work handler, request computational resources
# request computational resources
srun --pty --mem=32G -p interactive bash
# load environment
module load miniconda
# activate environment
conda activate ewas_saliva
# this step should be included in a bash script

In [None]:
# content of the assoc_mixed_linear_450K.R

#!/usr/bin/env Rscript --vanilla --slave
##################################################################################################
# R script for running linear models for bulk tissue using limma (https://bioconductor.org/packages/release/bioc/html/limma.html)
# and cell-specific analysis using TCA (https://github.com/cozygene/TCA)
# day: 10 February 2023
# author: Jose Jaime Martinez-Magana
####################################################################################

# This script uses three inputs the qcdata following this github https://github.com/martinezjaime/ewas_saliva_ses/blob/main/qc_data/probe_and_sample_quality_control.ipynb,
# a list of samples to be included in the analysis as csv file
# the path for the results of the epistructure following this github: https://github.com/martinezjaime/ewas_saliva_ses/blob/main/epigenetic_ancestry/epigenetic_ancestry.ipynb

####################################################################################
# set parameters
# this script uses the library optparse to add arguments to the script
# adding arguments to the script
library(optparse) 
option_list=list(
    make_option(c("-f", "--file"),
                type="character",
                default=NULL,
                help="path to *rds object with qc information", metavar="character"),
    make_option(c("-o", "--out"),
                type="character",
                default="out.rds",
                help="output file name for rds [default= %default]", metavar="character"),
    make_option(c("-n", "--outname"),
                type="character",
                default=NULL,
                help="name of the rds file after performing this analysis", metavar="character"),
    make_option(c("-s", "--samplelist"),
                type="character",
                default=NULL,
                help="path to a *csv file with a list of samples to subset the analysis. This script uses a file with SampleID with Array_Sentrix structures and a header with SampleID", metavar="character"),
    make_option(c("-e", "--pcafile"),
                type="character",
                default=NULL,
                help="path to pca output from epistructure-glint for epigenetic ancestry", metavar="character"),
    make_option(c("-p", "--pheno"),
                type="character",
                default=NULL,
                help="name of the column identifying phenotype to be tested in the regression model", metavar="character"),
    make_option(c("-c", "--covar"),
                type="character",
                default=NULL,
                help="list of covariates to be included in the analysis from the phenofile separeted by ','", metavar="character"),
    make_option(c("-v", "--sva"),
                type="character",
                default=NULL,
                help="add TRUE if you want the model to be adjusted by subrrogate variables using SVA", metavar="character"),
    make_option(c("-x", "--fixed"),
                type="character",
                default=NULL,
                help="name of the column identifying phenotype to be tested in the regression model", metavar="character")
);

opt_parser=OptionParser(option_list=option_list);
opt=parse_args(opt_parser);

if (is.null(opt$file)){
  print_help(opt_parser)
  stop("At least one argument must be supplied (input file)", call.=FALSE)
}


In [None]:
####################################################################################
# testing script
opt=list()
opt$file="/vast/palmer/scratch/montalvo-ortiz/jjm262/epigenomics/ewas_saliva_ses/databases/qced/qced_data_v02062023.rds"
opt$out="/vast/palmer/scratch/montalvo-ortiz/jjm262/epigenomics/ewas_saliva_ses/results/"
opt$outname="linear_all_v02102023.rds"
opt$samplelist="/vast/palmer/scratch/montalvo-ortiz/jjm262/epigenomics/ewas_saliva_ses/databases/qced/sample_list_test.csv"
opt$pcafile="/vast/palmer/scratch/montalvo-ortiz/jjm262/epigenomics/ewas_saliva_ses/pcs/glint_epi_children_v08February2023.epistructure.pcs.txt"
opt$pheno=c("SES")
opt$covar=c('age,gender')
opt$sva=TRUE
opt$fixed=c('')
######################################################################################

In [None]:
# developing script
# load libraries
library(preprocessCore)
library(lumi)
library(TCA)
library(minfi)
library(pracma)
library(matrixStats)
library(limma)
library(sva)
library(SmartSVA)
library(variancePartition)

# loading rds
paste0("Start analysis of data:",Sys.time(),"---","###Analysis path[",opt$file,"]###")
file=readRDS(opt$file)
# setting output file
outfile=paste0(opt$out,opt$outname,sep="")
paste0("Output of statistical models will be saved to:",Sys.time(),"---","###Analysis path[",outfile,"]###")

# loading epigenetic ancestry pcs file
paste0("Loading epigenetic ancestry:",Sys.time())
paste0("Warning: this script will use the epigenetic ancestry estimated using glint")
pcs=read.table(opt$pcafile)
# add colnames to pcs
colnames(pcs)=c("SampleID",paste(rep('ances_pc',10),rep(1:10),sep=''))
# add rownames
rownames(pcs)=pcs$SampleID
# remove innecesary column
pcs$SampleID = NULL
# susetting pcs
paste0("Warning: this script will use only two epigenetic ancestry principal components")
pcs=pcs[,c(1:2)]

