Skip to content

Commit

Permalink
Scripts used for the RoSLA paper
Browse files Browse the repository at this point in the history
  • Loading branch information
Neil Davies committed May 17, 2017
0 parents commit b57b8a3
Show file tree
Hide file tree
Showing 25 changed files with 2,311 additions and 0 deletions.
54 changes: 54 additions & 0 deletions cr_1_covariates.do
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
//Neil Davies 08/07/15
//This produces Table 1: descriptive statistics for the biobank paper

cd "/Volumes/Height_BMI_and_schooling/UK Biobank - ROSLA/"
use "raw_data/biobank_phenotypes_nmd_150417.dta", clear

//Program to combine the fields 0 and 1 for each variable
cap prog drop merge_var
prog def merge_var
replace `1'_0_0=`1'_1_0 if (`1'_0_0==.|`1'_0_0<0 )& (`1'_1_0>0 & `1'_1_0!=.)
end

//Mother and father alive

merge_var n_1835
merge_var n_1797
gen cov_mother_alive=(n_1835_0_0==1) if n_1835_0_0>=0& n_1835_0_0!=.
gen cov_father_alive=(n_1797_0_0==1) if n_1797_0_0>=0& n_1797_0_0!=.

//Number of brothers and sisters
merge_var n_1883
merge_var n_1873

gen cov_num_sisters=n_1883_0_0 if n_1883_0_0>=0 & n_1883_0_0!=.
gen cov_num_brothers=n_1873_0_0 if n_1873_0_0>=0 & n_1873_0_0!=.

//Breastfed as a baby
merge_var n_1677
gen cov_breastfed=(n_1677_0_0) if n_1677_0_0>=0 & n_1677_0_0!=.

//Comparative body size and height aged 10
merge_var n_1687
merge_var n_1697
gen cov_comp_bodysize10=1 if n_1687_0_0==1
replace cov_comp_bodysize10=2 if n_1687_0_0==3
replace cov_comp_bodysize10=3 if n_1687_0_0==2

gen cov_comp_height10=1 if n_1697_0_0==1
replace cov_comp_height10=2 if n_1697_0_0==3
replace cov_comp_height10=3 if n_1697_0_0==2

//Maternal smoking at birth
merge_var n_1787
gen cov_matsmoking=( n_1787_0_0==1) if n_1787_0_0>=0& n_1787_0_0!=.

//birth weight
merge_var n_20022
gen cov_birthweight=n_20022_0_0

//Sex
gen cov_male=n_31_0_0
keep n_eid cov_*
compress
save "working data/covariates",replace
287 changes: 287 additions & 0 deletions cr_2_Educ_by_birth_quarter.do
Original file line number Diff line number Diff line change
@@ -0,0 +1,287 @@
//Neil Davies 03/07/15
//This creates an indicator for birth month in the UK Biobank data

cd "/Volumes/Height_BMI_and_schooling/UK Biobank - ROSLA"

use "raw_data/biobank_phenotypes_nmd_150417.dta", clear

//Gen indicator for months and year of birth
gen year_month_birth=100*n_34_0_0+n_52_0_0
tab year_month_birth
replace year_month_birth=100*n_34_0_0+n_52_0_0

//generate quarter of birth indicators

gen year_q=n_34_0_0*10+1 if n_52_0_0==12|n_52_0_0==1|n_52_0_0==2
replace year_q =n_34_0_0*10+2 if n_52_0_0==3|n_52_0_0==4|n_52_0_0==5
replace year_q =n_34_0_0*10+3 if n_52_0_0==6|n_52_0_0==7|n_52_0_0==8
replace year_q =n_34_0_0*10+4 if n_52_0_0==9|n_52_0_0==10|n_52_0_0==11

//Program to combine the fields 0 and 1 for each variable
cap prog drop merge_var
prog def merge_var
replace `1'_0_0=`1'_1_0 if (`1'_0_0==.|`1'_0_0<0 )& (`1'_1_0>0 & `1'_1_0!=.)
end

cap prog drop merge_var2
prog def merge_var2
replace `1'_0_0=`1'_1_`2' if (`1'_0_`2'==.|`1'_0_`2'<0 )& (`1'_1_`2'>0 & `1'_1_`2'!=.)
end

cap prog drop merge_svar
prog def merge_svar
replace `1'_0_0=`1'_1_0 if (`1'_0_0=="")& (`1'_1_0!="")
end

//Clean years of full time education
//Use Rietveld method for defining years of education

merge_var n_845
gen eduyears =n_845_0_0 if n_845_0_0>0
gen less_edu_14=(n_845_0_0<=14) if n_845_0_0>0 & eduyears!=.
gen less_edu_15=(n_845_0_0<=15) if n_845_0_0>0 & eduyears!=.
gen less_edu_16=(n_845_0_0<=16) if n_845_0_0>0 & eduyears!=.
gen less_edu_21=(n_845_0_0<=20) if n_845_0_0>0 & eduyears!=.

gen more_edu_15=1-less_edu_15

//Participants who said they had a university or college degree were not asked what age they left school.
//We impute these participants at 21, and set them to left school after age 15:

ds n_6138_*
foreach i in `r(varlist)'{
replace more_edu_15=1 if `i'==1 & more_edu_15==.
replace eduyears=21 if `i'==1 & eduyears==.
}


//Identify individuals who were not born in England:
merge_var n_1647
gen born_english=(n_1647_0_0==1 & n_20115_0_0==.)

//Again using quarter of birth
tabstat eduyears if born_english==1, by(year_q) stats(mean n)
tabstat less_edu_* if born_english==1, by(year_q) stats(mean)

tabstat eduyears if born_english==0, by(year_q) stats(mean n)
tabstat less_edu_* if born_english==0, by(year_q) stats(mean)

//Generate a indicator for 'after reform' which is after 1957Q4
gen post_reform=(year_month_birth>195708)

//Generate rosla variable
gen yob=n_34_0_0
gen mob=n_52_0_0
gen dob=ym(yob,mob)
gen bw12=0 if dob>=-40 & dob<=-29
replace bw12=1 if dob>=-28 & dob<=-17

//Generate negative control exposures for the year before and after the reform:
gen N1_bw12=0 if dob>=-52 & dob<=-31
replace N1_bw12=1 if dob>=-40 & dob<=-29
gen N2_bw12=0 if dob>=-28 & dob<=-17
replace N2_bw12=1 if dob>=-16 & dob<=-5

gen rosla=dob+28

//Clean the outcome data
//Physical Exercise
/*
Maximum 7
Decile 9 7
Decile 8 6
Decile 7 5
Decile 6 4
Median 3
Decile 4 3
Decile 3 2
Decile 2 1
Decile 1 0
Minimum 0
2304 items have value -3 (Prefer not to answer)
24680 items have value -1 (Do not know)
*/

merge_var n_904
merge_var n_884
gen out_phys_v_act=n_904_0_0 if n_904_0_0 >=0 &n_904_0_0!=.
gen out_phys_m_act=n_884_0_0 if n_884_0_0 >=0 & n_884_0_0!=.

//Sedentary activity
/*
Maximum 24
Decile 9 5
Decile 8 4
Decile 7 4
Decile 6 3
Median 3
Decile 4 2
Decile 3 2
Decile 2 2
Decile 1 1
Minimum 0
24259 items have value -10 (Less than an hour a day)
767 items have value -3 (Prefer not to answer)
3838 items have value -1 (Do not know)
*/

merge_var n_1070
gen out_sedentary=n_1070_0_0 if n_1070_0_0>0 & n_1070_0_0
replace out_sedentary=0 if n_1070_0_0==-10 & n_1070_0_0

//Income
//See http://biobank.ctsu.ox.ac.uk/crystal/field.cgi?id=738

merge_var n_738
gen out_income_under_18k=(n_738_0_0>1) if n_738_0_0>0 &n_738_0_0!=.
gen out_income_over_31k=(n_738_0_0>2) if n_738_0_0>0 &n_738_0_0!=.
gen out_income_over_52k=(n_738_0_0>3) if n_738_0_0>0 &n_738_0_0!=.
gen out_income_over_100k=(n_738_0_0>4) if n_738_0_0>0 &n_738_0_0!=.

//Smoking

merge_var n_20116
gen out_smoker=(n_20116_0_0==2) if n_20116_0_0>=0 & n_20116_0_0!=.
gen out_exsmoker=(n_20116_0_0==2|n_20116_0_0==1) if n_20116_0_0>=0 & n_20116_0_0!=.

//Alcohol consumption

merge_var n_1558
gen out_alcohol=6-n_1558_0_0 if n_1558_0_0>0 & n_1558_0_0!=.

//Depression
merge_var n_4620
gen out_depression=(n_4620_0_0>0 & n_4620_0_0!=.) if n_4620_0_0>0

//Happiness
merge_var n_4526
gen out_happiness=(6-n_4526_0_0) if n_4526_0_0>0 & n_4526_0_0!=.

//Cognition
merge_var n_20016
gen out_intell=n_20016_0_0

//Blood pressure
merge_var n_4080
merge_var n_4079
egen out_sys_bp=rowmean(n_4080_0_1 n_4080_0_0)
egen out_dia_bp=rowmean(n_4079_0_1 n_4079_0_0)

//Anthropometry
merge_var n_21001
merge_var n_50
gen out_bmi=n_21001_0_0
gen out_height=n_50_0_0

//Arterial Stiffness
merge_var n_21021
merge_svar s_4206

xi:reg n_21021_0_0 i.s_4206_0_0
predict out_arterial_stiffness,res

//Grip strength
merge_var n_46
merge_var n_47
merge_svar s_38
egen X=rowmean(n_46_0_0 n_47_0_0)
xi:reg X i.s_38_0_0
predict out_gripstrength if X!=.,res
drop X

//Mortality
gen out_dead=(n_40018_0_0!=.)

//Diagnosed with cancer
gen out_cancer=(n_40008_0_0>=30 & n_40008_0_0!=.) if n_40008_0_0>=30 | n_40008_0_0==.

//Had heart attack or stroke
merge_var n_6150

merge_var2 n_6150 1
merge_var2 n_6150 2
merge_var2 n_6150 3

gen out_heartattack=(n_6150_0_0==1|n_6150_0_1==1|n_6150_0_2==1|n_6150_0_3==1) if n_6150_0_0!=-3 & n_6150_0_0!=.
gen out_stroke=(n_6150_0_0==3|n_6150_0_1==3|n_6150_0_2==3|n_6150_0_3==3) if n_6150_0_0!=-3 & n_6150_0_0!=.

//Diagnosed with diabetes
merge_var n_2443
merge_var n_2976
gen out_diabetes=(n_2443_0_0==1) if n_2443_0_0>=0
replace out_diabete=. if n_2976_0_0<=21 & n_2976_0_0!=.

//Diagnosed with hypertension
merge_var n_2966
gen out_highbloodpressure=(n_2966_0_0>0 & n_2966_0_0!=.) if n_2966_0_0>0

//Gender
gen male=(n_31_0_0==1)

keep n_845_0_0 n_6138_* out_* male year_month_birth year_q eduyears less_edu_14 less_edu_15 less_edu_16 less_edu_21 more_edu_15 born_english post_reform yob mob dob rosla n_eid bw12 N1_bw12 N2_bw12
compress

//Limit the sample just to English born individuals
drop if born_english!=1

save "working data/temp",replace
use "working data/temp",clear

joinby n_eid using "working data/covariates",unmatched(master) _merge(XXX)
drop if XXX!=3
drop XXX

*doby here is the year of birth in four digit format i.e. 1956...
*dobm here is the month of birth

gen bw24 = 0 if dob >= -52 & dob <= -29
replace bw24 = 1 if dob >= -28 & dob <= -5

gen bw36 = 0 if dob >= -64 & dob <= -29
replace bw36 = 1 if dob >= -28 & dob <= 7

gen bw48 = 0 if dob >= -76 & dob <= -29
replace bw48 = 1 if dob >= -28 & dob <= 19

gen bw60 = 0 if dob >= -88 & dob <= -29
replace bw60 = 1 if dob >= -28 & dob <= 31

gen bw72 = 0 if dob >= -100 & dob <= -29
replace bw72 = 1 if dob >= -28 & dob <= 43

gen bw120 = 0 if dob >= -148 & dob <= -29
replace bw120 = 1 if dob >= -28 & dob <= 91

tab mob, gen(imob_)
drop imob_12
gen rosla_i=rosla*bw12

//Merge in the educational attainment allele score
joinby n_eid using "working data/EA_score", unmatched(master)
drop _m
joinby n_eid using "working data/genome_wide_EA2_score", unmatched(master)

drop _m

//Create month of birth variable and negative control ROSLAs

gen mobi=100*yob+mob
gen rosla_neg1=rosla+12
gen rosla_neg2=rosla-12

compress

//Generate interaction with birth month and year and reform
gen mob_post=mob*post_reform
gen rosla_post=rosla*post_reform

tab mob_post, gen(mob_post_I_)
tab mob, gen(mob_I_)


gen weight=1.8857 if more_edu_15==0
replace weight=1 if weight==.

save "working data/cleaned_biobank_outcomes_ENGLISH",replace

11 changes: 11 additions & 0 deletions cr_3_EA2_weights.do
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
//Neil Davies 08/10/15
//This creates the weights from the EA2 data

use "Documents/EduYears_pooled_Nweighted_excl_23andMe_singleGC.meta.dta",clear
keep rsid allele1 allele2 beta
compress
joinby rsid using "/Volumes/Height_BMI_and_schooling/UK biobank/raw_data/EA2_rsids.dta"
drop chr
gen a=_n
save "raw_data/EA2_snp_weights.dta",replace

32 changes: 32 additions & 0 deletions cr_4_EA2_GW_score.do
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
//Neil Davies 20/10/15
//This creates the genome-wide allele score from the EA2 coefficients.

//First we import all of the data and convert to stata format:

cd "/Volumes/Height_BMI_and_schooling/UK Biobank - ROSLA"

forvalues i=1(1)22{
if `i'<10{
local i="0`i'"
}
import delimited "/Volumes/Height_BMI_and_schooling/UK Biobank - ROSLA/results/score_`i'.txt.profile", ///
delimiter(space, collapse) encoding(ISO-8859-1)clear
keep fid score
save "working data/genome_wide_EA2_score_`i'",replace
}
rename score score_22
forvalues i=1(1)21{
if `i'<10{
local i="0`i'"
}
joinby fid using "working data/genome_wide_EA2_score_`i'",
rename score score_`i'
rm "working data/genome_wide_EA2_score_`i'.dta"
}

egen cov_GW_EA2_score=rowtotal(score_*)
keep fid cov_GW_EA2_score
rename fid n_eid
egen cov_Z_GW_EA2_score=std(cov_GW_EA2_score)
save "working data/genome_wide_EA2_score",replace

Loading

0 comments on commit b57b8a3

Please sign in to comment.