# Computing PheWAS for male and female non-smokers
### Author: Mihail Mihov

In [None]:
# Load the necessary libraries
library(data.table)
library(broom)
options(repr.matrix.max.cols = 1000)

# Read in the phenotypes data
phenotypes <- fread("/home//mmihov/Projects/MS_TWAS_PheWAS_heritability/MS_Cox_PheWAS_Cor/data/V2/UoA_UKB_phenotypes_2021-19-11.txt", data.table=F)

### Data quality control 

In [None]:
# exclude individuals whith sex chromosome abnormalities
filtered=which(!is.na(phenotypes\$sex_aneu))

nrow(phenotypes[filtered,])
nrow(phenotypes[-filtered,])
phenotypes=phenotypes[-filtered,]

# exclude individuals which failed genotyping QC
filtered=which(is.na(phenotypes\$gen_exclude))

nrow(phenotypes[-filtered,])
nrow(phenotypes[filtered,])
phenotypes=phenotypes[filtered,]

# exclude samples which are not europeans genetically
filtered=which(phenotypes\$gen_ethnicity==1)

nrow(phenotypes[-filtered,])
nrow(phenotypes[filtered,])
phenotypes=phenotypes[filtered,]

# exclude samples whith missing Maternal smoking during pregnancy status
filtered=which(phenotype\s$maternal_smoking=="missing")

nrow(phenotypes[filtered,])
nrow(phenotypes[-filtered,])
phenotypes=phenotypes[-filtered,]

In [None]:
# Scale all continuous variables to be used to have a mean = 0 and SD = 1 
phenotypes\$x_WBC=scale(phenotypes\$x_WBC)
phenotypes\$x_RBC=scale(phenotypes\$x_RBC)
phenotypes\$x_lymphocytes=scale(phenotypes\$x_lymphocytes)
phenotypes\$x_platelet_count=scale(phenotypes\$x_platelet_count)
phenotypes\$x_neutrophill=scale(phenotypes\$x_neutrophill)
phenotypes\$x_Microalbumin=scale(phenotypes\$x_Microalbumin)
phenotypes\$x_Creatinine=scale(phenotypes\$x_Creatinine)
phenotypes\$x_APOA=scale(phenotypes\$x_APOA)
phenotypes\$x_APOB=scale(phenotypes\$x_APOB)
phenotypes\$x_Cholesterol=scale(phenotypes\$x_Cholesterol)
phenotypes\$x_HDL=scale(phenotypes\$x_HDL)
phenotypes\$x_LDL=scale(phenotypes\$x_LDL)
phenotypes\$x_Triglycerides=scale(phenotypes\$x_Triglycerides)
phenotypes\$x_CRP=scale(phenotypes\$x_CRP)
phenotypes\$x_Cystatin=scale(phenotypes\$x_Cystatin)
phenotypes\$x_HbA1c=scale(phenotypes\$x_HbA1c)
phenotypes\$x_IGF=scale(phenotypes\$x_IGF)
phenotypes\$x_VITD=scale(phenotypes\$x_VITD)
phenotypes\$x_GGT=scale(phenotypes\$x_GGT)
phenotypes\$x_ASP_AT=scale(phenotypes\$x_ASP_AT)
phenotypes\$x_ALA_AT=scale(phenotypes\$x_ALA_AT)
phenotypes\$x_AP=scale(phenotypes\$x_AP)

### Compute the PheWAS summary statistics 

In [None]:
# Load the PheWAS package
suppressMessages(library(PheWAS))

# Read in the PheWAS incidents phenotype data
phewas_phenotypes <- fread("/Your_directory/phenotypes_PhewasTable_incidents.txt", data.table=F)

# Compute PheWAS for non-smoking female participants
results= phewas(phenotypes=phewas_phenotypes,
genotypes=phenotypes[phenotypes\$smoking_status==0&phenotypes\$sex==0,c("studiepersonid", "maternal_smoking")],
covariates=phenotypes[,c("studiepersonid", "x_agebase", "alcohol", "x_BMI", "genoPC1", 
                "genoPC2","genoPC3", "lack_of_PA", "x_RBC", "x_lymphocytes", "x_APOB", "x_Cystatin", "x_HbA1c", "x_IGF", "x_VITD")],
                 cores=4, additive.genotypes=T,min.records=500)

# Compute Z scores
results\$Z=as.numeric(results\$beta)/as.numeric(results\$SE)

# Write the summary statistics to a file
write.table(results, file="/Your_directory/PHEWAS_nonsmokers_female.txt", 
            sep="\t", row.names=F, quote=F)

# Compute PheWAS for non-smoking female participants
results= phewas(phenotypes=phewas_phenotypes,
genotypes=phenotypes[phenotypes\$smoking_status==0&phenotypes\$sex==1,c("studiepersonid", "maternal_smoking")],
covariates=phenotypes[,c("studiepersonid", "x_agebase", "alcohol", "x_BMI", "genoPC1", 
                "genoPC2","genoPC3", "lack_of_PA", "x_RBC", "x_lymphocytes", "x_APOB", "x_Cystatin", "x_HbA1c", "x_IGF", "x_VITD")],
                 cores=4, additive.genotypes=T,min.records=500)

# Compute Z scores
results\$Z=as.numeric(results\$beta)/as.numeric(results\$SE)

# Write the summary statistics to a file
write.table(results, file="/Your_directory/PHEWAS_nonsmokers_male.txt", 
            sep="\t", row.names=F, quote=F)