Skip to content
Permalink
Browse files

support skipLines and commentChar for TSV files

  • Loading branch information
ssadedin committed Nov 19, 2019
1 parent de50b9e commit e6afaff9b28c3400c048320e244d6f2d56679c76
Showing with 31 additions and 4 deletions.
  1. +1 −1 build.gradle
  2. +19 −3 src/main/groovy/graxxia/TSV.groovy
  3. +11 −0 src/test/data/tsv_with_comments_and_headers.tsv
@@ -41,7 +41,7 @@ dependencies {

compile 'junit:junit:4.4'
compile 'commons-cli:commons-cli:1.4'
compile 'com.xlson.groovycsv:groovycsv:1.0'
compile 'com.xlson.groovycsv:groovycsv:1.3'
compile 'log4j:log4j:1.2.17'
compile files(fileTree(dir:'lib', includes:['*.jar']))
}
@@ -83,7 +83,20 @@ class TSV implements Iterable<PropertyMapper> {
CsvIterator newIterator() {
if(!options.containsKey("separator"))
options.separator = "\t"
CsvParser.parseCsv(options, reader())

Reader originalReader = reader()
if(this.options.commentChar) {
Reader r = reader()
if(r.is(originalReader))
throw new IllegalArgumentException("The commentChar option cannot be used with a raw Reader. Please create a TSV with a fileName argument instead")

int skipLines = 0
while(r.readLine()?.startsWith(this.options.commentChar))
++skipLines
this.options.skipLines = this.options.get('skipLines',0) + skipLines
}

CsvParser.parseCsv(options, originalReader)
}

/*
@@ -115,8 +128,11 @@ class TSV implements Iterable<PropertyMapper> {
// }
//

TSV(Reader reader, List<String> columnNames) {
parser = CsvParser.parseCsv(reader, columnNames: columnNames, readFirstLine: true, separator: '\t')
TSV(Map options=[:], Reader reader, List<String> columnNames) {

this.options = [columnNames: columnNames, readFirstLine: true, separator: '\t'] + options

parser = CsvParser.parseCsv(reader, options)
}

Iterator<PropertyMapper> iterator() {
@@ -0,0 +1,11 @@
#description: HPO annotations for rare diseases [7623: OMIM; 47: DECIPHER; 3771 ORPHANET]
#date: 2019-11-08
#tracker: https://github.com/obophenotype/human-phenotype-ontology
#HPO-version: http://purl.obolibrary.org/obo/hp.obo/hp/releases/2019-11-08/hp.obo.owl
DatabaseID DiseaseName
OMIM:210100 BETA-AMINOISOBUTYRIC ACID, URINARY EXCRETION OF
OMIM:210100 BETA-AMINOISOBUTYRIC ACID, URINARY EXCRETION OF
OMIM:163600 NIPPLES INVERTED
OMIM:163600 NIPPLES INVERTED
OMIM:615763 #615763 CORTICAL DYSPLASIA, COMPLEX, WITH OTHER BRAIN MALFORMATIONS 5; CDCBM5
OMIM:615763 #615763 CORTICAL DYSPLASIA, COMPLEX, WITH OTHER BRAIN MALFORMATIONS 5; CDCBM5

0 comments on commit e6afaff

Please sign in to comment.
You can’t perform that action at this time.