-
Notifications
You must be signed in to change notification settings - Fork 2
/
qc.go
158 lines (148 loc) · 5.8 KB
/
qc.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
package main
import (
"fmt"
"log"
"path/filepath"
"regexp"
"strconv"
"strings"
"github.com/liserjrqlxue/goUtil/fmtUtil"
"github.com/liserjrqlxue/goUtil/osUtil"
"github.com/liserjrqlxue/goUtil/simpleUtil"
"github.com/liserjrqlxue/goUtil/textUtil"
)
func loadFilterStat(filterStat string, quality map[string]string) {
if filterStat == "" {
return
}
var db = make(map[string]float64)
filters := strings.Split(filterStat, ",")
for _, filter := range filters {
var fDb = simpleUtil.HandleError(textUtil.File2Map(filter, "\t", false)).(map[string]string)
var numberOfReads = simpleUtil.HandleError(strconv.ParseFloat(fDb["Number of Reads:"], 32)).(float64)
var GCfq1 = simpleUtil.HandleError(strconv.ParseFloat(fDb["GC(%) of fq1:"], 32)).(float64)
var GCfq2 = simpleUtil.HandleError(strconv.ParseFloat(fDb["GC(%) of fq2:"], 32)).(float64)
var Q20fq1 = simpleUtil.HandleError(strconv.ParseFloat(fDb["Q20(%) of fq1:"], 32)).(float64)
var Q20fq2 = simpleUtil.HandleError(strconv.ParseFloat(fDb["Q20(%) of fq2:"], 32)).(float64)
var Q30fq1 = simpleUtil.HandleError(strconv.ParseFloat(fDb["Q30(%) of fq1:"], 32)).(float64)
var Q30fq2 = simpleUtil.HandleError(strconv.ParseFloat(fDb["Q30(%) of fq2:"], 32)).(float64)
fDb["Discard Reads related to low qual:"] = strings.TrimSpace(fDb["Discard Reads related to low qual:"])
var lowQualReads = 0.0
if fDb["Discard Reads related to low qual:"] != "" {
lowQualReads = simpleUtil.HandleError(strconv.ParseFloat(strings.TrimSpace(fDb["Discard Reads related to low qual:"]), 32)).(float64)
}
db["numberOfReads"] += numberOfReads
db["lowQualReads"] += lowQualReads
db["GC"] += (GCfq1 + GCfq2) / 2 * numberOfReads
db["Q20"] += (Q20fq1 + Q20fq2) / 2 * numberOfReads
db["Q30"] += (Q30fq1 + Q30fq2) / 2 * numberOfReads
}
quality["Q20 碱基的比例"] = strconv.FormatFloat(db["Q20"]/db["numberOfReads"], 'f', 2, 32) + "%"
quality["Q30 碱基的比例"] = strconv.FormatFloat(db["Q30"]/db["numberOfReads"], 'f', 2, 32) + "%"
quality["测序数据的 GC 含量"] = strconv.FormatFloat(db["GC"]/db["numberOfReads"], 'f', 2, 32) + "%"
quality["低质量 reads 比例"] = strconv.FormatFloat(db["lowQualReads"]/db["numberOfReads"], 'f', 2, 32) + "%"
}
func updateQC(stats map[string]int, quality map[string]string) {
quality["罕见变异占比(Tier1/总)"] = fmt.Sprintf("%0.2f%%", float64(stats["Tier1"])/float64(stats["Total"])*100)
quality["罕见烈性变异占比 in tier1"] = fmt.Sprintf("%0.2f%%", float64(stats["Tier1LoF"])/float64(stats["Tier1"])*100)
quality["罕见纯合变异占比 in tier1"] = fmt.Sprintf("%0.2f%%", float64(stats["Tier1Hom"])/float64(stats["Tier1"])*100)
quality["纯合变异占比 in all"] = fmt.Sprintf("%0.2f%%", float64(stats["Hom"])/float64(stats["Total"])*100)
for _, chr := range []string{
"1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13",
"14", "15", "16", "17", "18", "19", "20", "21", "22", "X",
} {
quality["chr"+chr+"纯合变异占比"] = fmt.Sprintf("%0.2f%%", float64(stats["Hom:chr"+chr])/float64(stats["chr"+chr])*100)
}
quality["SNVs_all"] = strconv.Itoa(stats["snv"])
quality["SNVs_tier1"] = strconv.Itoa(stats["Tier1snv"])
quality["Small insertion(包含 dup)_all"] = strconv.Itoa(stats["ins"])
quality["Small insertion(包含 dup)_tier1"] = strconv.Itoa(stats["Tier1ins"])
quality["Small deletion_all"] = strconv.Itoa(stats["del"])
quality["Small deletion_tier1"] = strconv.Itoa(stats["Tier1del"])
quality["exon CNV_all"] = strconv.Itoa(stats["exonCNV"])
quality["exon CNV_tier1"] = strconv.Itoa(stats["Tier1exonCNV"])
quality["large CNV_all"] = strconv.Itoa(stats["largeCNV"])
quality["large CNV_tier1"] = strconv.Itoa(stats["Tier1largeCNV"])
}
var isSharp = regexp.MustCompile(`^#`)
var isBamPath = regexp.MustCompile(`^## Files : (\S+)`)
func loadQC(files, kinship string, quality []map[string]string, isWGS bool) {
var kinshipHash = make(map[string]map[string]string)
if kinship != "" {
kinshipHash, _ = textUtil.File2MapMap(kinship, "样品ID", "\t", nil)
}
sep := "\t"
if isWGS {
sep = ": "
}
file := strings.Split(files, ",")
for i, in := range file {
report := textUtil.File2Array(in)
for _, line := range report {
if isSharp.MatchString(line) {
if m := isBamPath.FindStringSubmatch(line); m != nil {
if osUtil.FileExists(m[1]) {
quality[i]["bamPath"] = m[1]
}
}
} else {
m := strings.Split(line, sep)
if len(m) > 1 {
quality[i][strings.TrimSpace(m[0])] = strings.TrimSpace(m[1])
}
}
}
if isWGS {
absPath, err := filepath.Abs(in)
if err == nil {
quality[i]["bamPath"] = filepath.Join(filepath.Dir(absPath), "..", "bam_chr")
} else {
log.Println(err, in)
quality[i]["bamPath"] = filepath.Join(filepath.Dir(in), "..", "bam_chr")
}
}
kinshipInfo, ok := kinshipHash[quality[i]["样本编号"]]
if ok {
for k, v := range kinshipInfo {
quality[i][k] = v
}
}
}
}
func parseQC() {
var karyotypeMap = make(map[string]string)
if *karyotype != "" {
karyotypeMap, err = textUtil.Files2Map(*karyotype, "\t", true)
simpleUtil.CheckErr(err)
}
// load coverage.report
if *qc != "" {
loadQC(*qc, *kinship, qualitys, *wgs)
for _, quality := range qualitys {
for k, v := range qualityKeyMap {
quality[k] = quality[v]
}
quality["核型预测"] = karyotypeMap[quality["样本编号"]]
if *wesim {
var qcArray []string
for _, key := range qcColumn {
qcArray = append(qcArray, quality[key])
}
fmtUtil.FprintStringArray(qcFile, qcArray, "\t")
}
}
if *wesim {
simpleUtil.CheckErr(qcFile.Close())
}
logTime("load coverage.report")
loadFilterStat(*filterStat, qualitys[0])
}
}
func parseList() {
for _, sample := range sampleList {
sampleMap[sample] = true
var quality = make(map[string]string)
quality["样本编号"] = sample
qualitys = append(qualitys, quality)
}
}