-
Notifications
You must be signed in to change notification settings - Fork 12
/
qcstruct.h
224 lines (190 loc) · 6.51 KB
/
qcstruct.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
#ifndef QCSTRUCT_H
#define QCSTRUCT_H
#include <limits>
#include <boost/dynamic_bitset.hpp>
#include <boost/unordered_map.hpp>
#include <boost/date_time/posix_time/posix_time.hpp>
#include <boost/date_time/gregorian/gregorian.hpp>
#include <boost/iostreams/device/file.hpp>
#include <boost/iostreams/filtering_stream.hpp>
#include <boost/iostreams/filter/zlib.hpp>
#include <boost/iostreams/filter/gzip.hpp>
#include <htslib/sam.h>
#include <htslib/faidx.h>
#include "util.h"
namespace bamstats
{
struct ChrGC {
uint32_t ncount;
uint32_t gccount;
};
struct ReferenceFeatures {
typedef std::vector<uint64_t> TGCContent;
typedef std::vector<Interval> TChromosomeRegions;
typedef std::vector<TChromosomeRegions> TGenomicRegions;
uint64_t referencebp;
uint64_t ncount;
uint32_t totalBedSize;
uint32_t nchr;
std::vector<ChrGC> chrGC;
TGenomicRegions gRegions;
TGCContent refGcContent;
explicit ReferenceFeatures(uint32_t const nc) : referencebp(0), ncount(0), totalBedSize(0), nchr(nc) {
chrGC.resize(nc, ChrGC());
gRegions.resize(nc, TChromosomeRegions());
refGcContent.resize(102, 0);
}
};
struct BaseCounts {
typedef uint32_t TCountType;
typedef std::vector<TCountType> TCoverageBp;
typedef COVTYPE TMaxCoverage;
typedef std::vector<TMaxCoverage> TBpCoverage;
TMaxCoverage maxCoverage;
uint32_t maxIndelSize;
uint64_t n1;
uint64_t n2;
uint64_t nd;
uint64_t matchCount;
uint64_t mismatchCount;
uint64_t delCount;
uint64_t insCount;
uint64_t softClipCount;
uint64_t hardClipCount;
std::vector<uint32_t> delHomACGTN; // A:0, C:1, G:2, T:3, N:4, none:5
std::vector<uint32_t> insHomACGTN; // A:0, C:1, G:2, T:3, N:4, none:5
std::vector<uint32_t> delSize;
std::vector<uint32_t> insSize;
TCoverageBp bpWithCoverage;
TBpCoverage cov;
BaseCounts() : maxCoverage(std::numeric_limits<TMaxCoverage>::max()), maxIndelSize(50), n1(0), n2(0), nd(0), matchCount(0), mismatchCount(0), delCount(0), insCount(0), softClipCount(0), hardClipCount(0) {
delHomACGTN.resize(6, 0);
insHomACGTN.resize(6, 0);
bpWithCoverage.resize(std::numeric_limits<uint16_t>::max() + 1, 0);
delSize.resize(maxIndelSize + 1, 0);
insSize.resize(maxIndelSize + 1, 0);
cov.clear();
}
};
struct ReadCounts {
typedef uint16_t TMaxReadLength;
typedef uint32_t TCountType;
typedef std::vector<TCountType> TLengthReadCount;
typedef std::vector<TLengthReadCount> TLenRead12;
typedef std::vector<uint64_t> TBaseQualitySum;
typedef std::vector<TBaseQualitySum> TBQRead12;
typedef ReferenceFeatures::TGCContent TGCContent;
typedef boost::dynamic_bitset<> TBitSet;
typedef std::pair<int32_t, int32_t> TStartEndPair;
typedef std::map<int32_t, TStartEndPair> TBlockRange;
typedef std::vector<TBlockRange> TGenomicBlockRange;
typedef std::vector<uint64_t> TMappedChr;
int32_t maxReadLength;
int32_t maxUMI;
int64_t secondary;
int64_t qcfail;
int64_t dup;
int64_t supplementary;
int64_t unmap;
int64_t forward;
int64_t reverse;
int64_t spliced;
int64_t mapped1;
int64_t mapped2;
int64_t haplotagged;
int64_t mitagged;
TMappedChr mappedchr;
TLenRead12 lRc;
TLenRead12 nCount;
TLenRead12 aCount;
TLenRead12 cCount;
TLenRead12 gCount;
TLenRead12 tCount;
TBQRead12 bqCount;
TGCContent gcContent;
TBitSet umi;
TGenomicBlockRange brange;
explicit ReadCounts(uint32_t const n_targets) : maxReadLength(std::numeric_limits<TMaxReadLength>::max()), maxUMI(10000000), secondary(0), qcfail(0), dup(0), supplementary(0), unmap(0), forward(0), reverse(0), spliced(0), mapped1(0), mapped2(0), haplotagged(0), mitagged(0) {
mappedchr.resize(n_targets, 0);
lRc.resize(2, TLengthReadCount());
bqCount.resize(2, TBaseQualitySum());
aCount.resize(2, TLengthReadCount());
cCount.resize(2, TLengthReadCount());
gCount.resize(2, TLengthReadCount());
tCount.resize(2, TLengthReadCount());
nCount.resize(2, TLengthReadCount());
for(uint32_t k = 0; k<2; ++k) {
lRc[k].resize(maxReadLength + 1, 0);
bqCount[k].resize(maxReadLength + 1, 0);
aCount[k].resize(maxReadLength + 1, 0);
cCount[k].resize(maxReadLength + 1, 0);
gCount[k].resize(maxReadLength + 1, 0);
tCount[k].resize(maxReadLength + 1, 0);
nCount[k].resize(maxReadLength + 1, 0);
}
gcContent.resize(102, 0);
}
};
struct PairCounts {
typedef uint16_t TMaxInsertSize;
typedef uint32_t TCountType;
typedef std::vector<TCountType> TISizePairCount;
int32_t maxInsertSize;
int64_t paired;
int64_t mapped;
int64_t mappedSameChr;
int64_t mappedProper;
int64_t orient[4];
int64_t totalISizeCount;
TISizePairCount fPlus;
TISizePairCount rPlus;
TISizePairCount fMinus;
TISizePairCount rMinus;
PairCounts() : maxInsertSize(std::numeric_limits<TMaxInsertSize>::max()), paired(0), mapped(0), mappedSameChr(0), mappedProper(0), totalISizeCount(0) {
orient[0] = 0;
orient[1] = 0;
orient[2] = 0;
orient[3] = 0;
fPlus.resize(maxInsertSize + 1, 0);
rPlus.resize(maxInsertSize + 1, 0);
fMinus.resize(maxInsertSize + 1, 0);
rMinus.resize(maxInsertSize + 1, 0);
}
};
struct QualCounts {
typedef uint8_t TMaxQuality;
typedef uint32_t TCountType;
typedef std::vector<TCountType> TQualCount;
int32_t maxQuality;
TQualCount qcount;
QualCounts() : maxQuality(std::numeric_limits<TMaxQuality>::max()) {
qcount.resize(maxQuality + 1, 0);
}
};
struct ReadGroupStats {
BaseCounts bc;
ReadCounts rc;
PairCounts pc;
QualCounts qc;
ReadGroupStats(uint32_t const n_targets) : bc(BaseCounts()), rc(ReadCounts(n_targets)), pc(PairCounts()), qc(QualCounts()) {}
};
struct BedCounts {
typedef double TAvgCov;
typedef std::vector<TAvgCov> TBpCov;
typedef boost::unordered_map<std::string, TBpCov> TRgBpMap;
typedef std::vector<TRgBpMap> TGenomicBp;
typedef std::vector<int64_t> TOnTargetBp;
typedef boost::unordered_map<std::string, TOnTargetBp> TOnTargetMap;
typedef std::vector<uint64_t> TGCContent;
int32_t stepsize;
int32_t onTSize;
TGenomicBp gCov;
TOnTargetMap onTarget;
TGCContent bedGcContent;
BedCounts(int32_t nchr, int32_t s, int32_t vs) : stepsize(s), onTSize(vs) {
gCov.resize(nchr, TRgBpMap());
bedGcContent.resize(102, 0);
}
};
}
#endif