# Select 10000 individuals white British from UKBB to create reference panel

In [None]:
[global]
# Working directory: change accordingly
parameter: cwd = path
# The fam file to extract the individuals
parameter: famFile = path
# Path to bgen files
parameter: bgenFile = paths
# Path to sample file
parameter: sampleFile = path
# For cluster jobs, number commands to run per job
parameter: job_size = int
# Specific number of threads to use
parameter: numThreads = int
# Load specific modules for each step
parameter: qctool_module = '''
module load QCTOOL/2.0-foss-2016b-rc7-CentOS6.8
echo "Module qctool loaded"
{cmd}
'''
# The container with the lmm software. Can be either a dockerhub image or a singularity `sif` file.
parameter: container_lmm = 'statisticalgenetics/lmm:1.4'

In [None]:
#Use the most recent genotype file to select 10,000 individuals at random
[default_1: provides = [f'{cwd}/{famFile:bn}.10000ind_ref']]
input: famFile
output: f'{cwd}/{_input:bn}.10000ind_ref'
task: trunk_workers = 1, trunk_size = job_size, walltime = '48h',  mem = '10G', tags = f'{step_name}_{_output:bn}'
R: container=container_lmm, expand="${ }", stderr=f'{_output:n}.stderr', stdout=f'{_output:n}.stdout'
  library(dplyr)  
  sample = read.table(${famFile:r}, header=F)
  ref = sample %>%
    sample_n(10000) %>%
    select("V1")
  ref = t(ref)
  write.table(ref, ${_output:r}, col.name = F, row.names = F)

In [None]:
# Filter samples from bgen files
[default_2]
input: bgenFile, group_by=1
output: f'{cwd}/{_input:bn}.10000ind_ref.bim', f'{cwd}/{_input:bn}.10000ind_ref.sample'
depends: f'{cwd}/{famFile:bn}.10000ind_ref'
task: trunk_workers = 1, trunk_size = job_size, walltime = '48h',  mem = '10G', tags = f'{step_name}_{_output[0]:bn}'
bash: expand= "${ }", stderr = f'{_output[0]:n}.stderr', stdout = f'{_output[0]:n}.stdout', template = '{cmd}' if executable('qctools').target_exists() else qctool_module
    qctool \
    -g ${_input} \
    -s ${sampleFile} \
    -incl-samples ${_depends} \
    -og ${_output[0]} \
    -os ${_output[1]}