-
Notifications
You must be signed in to change notification settings - Fork 20
/
_format.py
84 lines (61 loc) · 2.77 KB
/
_format.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
# ----------------------------------------------------------------------------
# Copyright (c) 2016-2020, QIIME 2 development team.
#
# Distributed under the terms of the Modified BSD License.
#
# The full license is in the file LICENSE, distributed with this software.
# ----------------------------------------------------------------------------
from q2_types.per_sample_sequences import FastqGzFormat
import qiime2.plugin.model as model
from qiime2.plugin import ValidationError
# TODO: deprecate this and alias it
class EMPMultiplexedDirFmt(model.DirectoryFormat):
sequences = model.File(
r'sequences.fastq.gz', format=FastqGzFormat)
barcodes = model.File(
r'barcodes.fastq.gz', format=FastqGzFormat)
# The new cannonical name for EMPMultiplexedDirFmt
class EMPSingleEndDirFmt(EMPMultiplexedDirFmt):
pass # contents inherited
class EMPPairedEndDirFmt(model.DirectoryFormat):
forward = model.File(
r'forward.fastq.gz', format=FastqGzFormat)
reverse = model.File(
r'reverse.fastq.gz', format=FastqGzFormat)
barcodes = model.File(
r'barcodes.fastq.gz', format=FastqGzFormat)
# Originally called EMPMultiplexedSingleEndDirFmt, rename was possible as no
# artifacts where created with this view, it is just for import.
class EMPSingleEndCasavaDirFmt(model.DirectoryFormat):
# TODO: generalize this with a regex when we have validation in place for
# model.FileCollections. The file names are currently designed more
# specificially for handling MiSeq data.
sequences = model.File(
r'Undetermined_S0_L001_R1_001.fastq.gz', format=FastqGzFormat)
barcodes = model.File(
r'Undetermined_S0_L001_I1_001.fastq.gz', format=FastqGzFormat)
class EMPPairedEndCasavaDirFmt(model.DirectoryFormat):
forward = model.File(
r'Undetermined_S0_L001_R1_001.fastq.gz', format=FastqGzFormat)
reverse = model.File(
r'Undetermined_S0_L001_R2_001.fastq.gz', format=FastqGzFormat)
barcodes = model.File(
r'Undetermined_S0_L001_I1_001.fastq.gz', format=FastqGzFormat)
class ErrorCorrectionDetailsFmt(model.TextFileFormat):
METADATA_COLUMNS = {
'sample',
'barcode-sequence-id',
'barcode-uncorrected',
'barcode-corrected',
'barcode-errors',
}
def _validate_(self, level):
line = open(str(self)).readline()
if len(line.strip()) == 0:
raise ValidationError("Failed to locate header.")
header = set(line.strip().split('\t'))
for column in sorted(self.METADATA_COLUMNS):
if column not in header:
raise ValidationError(f"{column} is not a column")
ErrorCorrectionDetailsDirFmt = model.SingleFileDirectoryFormat(
'ErrorCorrectionDetailsDirFmt', 'details.tsv', ErrorCorrectionDetailsFmt)