/
liftoverPEDMAP.ftl
136 lines (106 loc) · 3.42 KB
/
liftoverPEDMAP.ftl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
#MOLGENIS nodes=1 cores=1 mem=2
#FOREACH study,chr
#Parameter mapping
chr="${chr}"
studyInputDir="${studyInputDir}"
outputFolder="${outputFolder}"
outputFolderTmp="${outputFolderTmp}"
outputFolderChrDir="${outputFolderChrDir}"
liftOverChainFile="${liftOverChainFile}"
liftOverUcscBin="${liftOverUcscBin}"
plinkBin="${plinkBin}"
${stage} liftOverUcsc/${liftOverUcscVersion}
${stage} plink/${plinkVersion}
<#noparse>
#Echo parameter values
echo "chr: ${chr}"
echo "studyInputDir: ${studyInputDir}"
echo "outputFolder: ${outputFolder}"
echo "outputFolderTmp: ${outputFolderTmp}"
echo "outputFolderChrDir: ${outputFolderChrDir}"
echo "liftOverChainFile: ${liftOverChainFile}"
echo "liftOverUcscBin: ${liftOverUcscBin}"
echo "plinkBin: ${plinkBin}"
startTime=$(date +%s)
#Check if outputs exist
alloutputsexist \
"${outputFolderChrDir}/chr${chr}.ped" \
"${outputFolderChrDir}/chr${chr}.map" \
"${outputFolderChrDir}/chr${chr}.fam"
#Create output directories
mkdir -p $outputFolder
mkdir -p $outputFolderChrDir
mkdir -p $outputFolderTmp
#Retrieve input Files
inputs $studyInputDir/chr$chr.ped
inputs $studyInputDir/chr$chr.map
inputs $studyInputDir/chr$chr.fam
getFile $studyInputDir/chr$chr.ped
getFile $studyInputDir/chr$chr.map
getFile $studyInputDir/chr$chr.fam
#create bed file based on map file
awk '{$5=$2;$2=$4;$3=$4+1;$1="chr"$1;print $1,$2,$3,$5}' OFS="\t" $studyInputDir/chr$chr.map > $outputFolderTmp/chr$chr.old.bed
#map to b37
$liftOverUcscBin \
-bedPlus=4 $outputFolderTmp/chr$chr.old.bed \
$liftOverChainFile \
$outputFolderTmp/chr$chr.new.bed \
$outputFolderTmp/chr$chr.new.unmapped.txt
#create list of unmapped snps
awk '/^[^#]/ {print $4}' $outputFolderTmp/chr$chr.new.unmapped.txt > $outputFolderTmp/chr$chr.new.unmappedSnps.txt
#create mappings file used by plink
awk '{print $4, $2}' OFS="\t" $outputFolderTmp/chr$chr.new.bed > $outputFolderTmp/chr$chr.new.Mappings.txt
#Copy original fam file to new output directory
cp $studyInputDir/chr$chr.fam $outputFolderChrDir/chr$chr.fam
putFile $outputFolderChrDir/chr$chr.fam
#create new plink data without the unmapped snps
$plinkBin \
--noweb \
--file $studyInputDir/chr$chr \
--recode \
--out $outputFolderChrDir/~chr$chr \
--exclude $outputFolderTmp/chr$chr.new.unmappedSnps.txt \
--update-map $outputFolderTmp/chr$chr.new.Mappings.txt
#Get return code from last program call
returnCode=$?
echo "returnCode Plink: ${returnCode}"
if [ $returnCode -eq 0 ]
then
echo -e "\nMoving temp files to final files\n\n"
for tempFile in $outputFolderChrDir/~chr$chr* ; do
finalFile=`echo $tempFile | sed -e "s/~//g"`
echo "Moving temp file: ${tempFile} to ${finalFile}"
mv $tempFile $finalFile
putFile $finalFile
done
else
echo -e "\nNon zero return code not making files final. Existing temp files are kept for debugging purposes\n\n"
#Return non zero return code
exit 1
fi
endTime=$(date +%s)
#Source: http://stackoverflow.com/questions/12199631/convert-seconds-to-hours-minutes-seconds-in-bash
num=$endTime-$startTime
min=0
hour=0
day=0
if((num>59));then
((sec=num%60))
((num=num/60))
if((num>59));then
((min=num%60))
((num=num/60))
if((num>23));then
((hour=num%24))
((day=num/24))
else
((hour=num))
fi
else
((min=num))
fi
else
((sec=num))
fi
echo "Running time: ${day} days ${hour} hours ${min} mins ${sec} secs"
</#noparse>