-
Notifications
You must be signed in to change notification settings - Fork 0
/
sdBuild1Get.sh
executable file
·89 lines (77 loc) · 2.65 KB
/
sdBuild1Get.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
#!/bin/bash
# get raw data files
#######################################
# filenames for raw data pulled from db
#######################################
primTriageRawFiles="discard_after keep_after keep_before keep_tumor"
curGroupRawFiles="unselected_after selected_after selected_before"
groups="ap gxd go tumor"
#######################################
function Usage() {
#######################################
cat - <<ENDTEXT
$0 {--group groupname |--discard} [--server name] [--limit n] [--norestrict]
Get raw sample files from the db.
Puts all files into the current directory.
--group groupname
Get data for specific curation group: $groups
output files: $curGroupRawFiles
--discard Get data for primary triage (discard/keep)
output files: $primTriageRawFiles
--server Database server: dev (default) or test or prod
--limit limit on sql query results (default = 0 = no limit)
--norestrict when populating raw files, include all articles,
default: skip review and non-peer reviewed
ENDTEXT
exit 5
}
#######################################
# basic setup
projectHome=~/work/autolittriage
getRawLog=getRaw.log # log file from sdGetRaw
#######################################
# cmdline options
#######################################
restrictOpt='' # default: skip review papers and non-peer rev
limit="0" # getRaw record limit, "0" = no limit
#(set small for debugging)
server="dev"
doGroup="unspecified"
while [ $# -gt 0 ]; do
case "$1" in
-h|--help) Usage ;;
--group) doGroup=yes;group="$2"; shift; shift; ;;
--discard) doGroup=no; shift; ;;
--norestrict) restrictOpt=--norestrict; shift; ;;
--limit) limit="$2"; shift; shift; ;;
--server) server="$2"; shift; shift; ;;
-*|--*) echo "invalid option $1"; Usage ;;
*) break; ;;
esac
done
if [ "$doGroup" == "unspecified" ]; then
Usage
fi
#######################################
# Pull raw subsets from db
#######################################
echo "getting raw data from db: ${server}" | tee -a $getRawLog
date >> $getRawLog
rm -f counts
if [ "$doGroup" == "yes" ]; then
getRaw=$projectHome/sdGetRawCurGroups.py
$getRaw --server $server $restrictOpt --group $group --counts | tee -a $getRawLog counts
for f in $curGroupRawFiles; do
set -x
$getRaw --server $server -l $limit $restrictOpt --group $group --query $f > $f 2>> $getRawLog
set +x
done
else
getRaw=$projectHome/sdGetRawPrimTriage.py
$getRaw --server $server $restrictOpt counts | tee -a $getRawLog counts
for f in $primTriageRawFiles; do
set -x
$getRaw --server $server -l $limit $restrictOpt $f > $f 2>> $getRawLog
set +x
done
fi