Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP

Comparing changes

Choose two branches to see what's changed or to start a new pull request. If you need to, you can also compare across forks.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also compare across forks.
...
  • 2 commits
  • 7 files changed
  • 0 commit comments
  • 1 contributor
View
2  project/Build.scala
@@ -8,7 +8,7 @@ object ScalaCSVProject extends Build {
base = file ("."),
settings = Defaults.defaultSettings ++ Seq (
name := "scala-csv",
- version := "0.8.0",
+ version := "1.0.0-SNAPSHOT",
crossScalaVersions := Seq("2.9.1", "2.9.2", "2.9.3", "2.10.0"),
organization := "com.github.tototoshi",
libraryDependencies ++= Seq(
View
56 src/main/scala/com/github/tototoshi/csv/CSVParser.scala
@@ -0,0 +1,56 @@
+package com.github.tototoshi.csv
+
+import scala.util.parsing.combinator.{RegexParsers}
+import java.io.{Reader, InputStream}
+
+
+protected trait Between extends RegexParsers {
+
+ def between[A](start: String, p: Parser[A], end: String): Parser[A] = start ~> p <~ end
+
+ def between[A](startAndEnd: String, p: Parser[A]): Parser[A] = between(startAndEnd, p, startAndEnd)
+
+}
+
+class CSVParser(val separatorChar: Char = ',',
+ val quoteChar: Char = '"')
+ extends RegexParsers
+ with Between {
+
+ override def skipWhitespace = false
+
+ def cr = "\r"
+
+ def lf = "\n"
+
+ def quote = quoteChar.toString
+
+ def separator = separatorChar.toString
+
+ def record = field ~ rep(separator ~> field) ^^ {
+ case head ~ tail => head :: tail
+ }
+
+ def name = field
+
+ def field = escaped | nonEscaped
+
+ def escapedQuote = repN(2, quote) ^^ {
+ _ => ""
+ }
+
+ def escaped = between(quote, rep(textData | separator | crlf | escapedQuote)) ^^ {
+ _.mkString
+ }
+
+ def nonEscaped = rep(textData) ^^ {
+ _.mkString
+ }
+
+ def crlf = cr | lf
+
+ def textData = not(separator | quote | crlf) ~> """.""".r
+
+ def parseLine(in: Input): ParseResult[List[String]] = parse(record <~ opt(crlf), in)
+
+}
View
44 src/main/scala/com/github/tototoshi/csv/CSVReader.scala
@@ -20,9 +20,16 @@ import au.com.bytecode.opencsv.{CSVReader => JCSVReader}
import java.io._
import scala.collection.JavaConversions._
import java.util.NoSuchElementException
-import au.com.bytecode.opencsv
+import scala.util.parsing.input.PagedSeqReader
+import scala.collection.immutable.PagedSeq
-class CSVReader protected (private val underlying: JCSVReader) {
+class CSVParserException(msg: String) extends Exception(msg)
+
+class CSVReader protected (private val reader: Reader, separatorChar: Char = ',', quoteChar: Char = '"') {
+
+ private val parser = new CSVParser(separatorChar = separatorChar, quoteChar = quoteChar)
+
+ private var pagedReader: parser.Input = new PagedSeqReader(PagedSeq.fromReader(reader))
@deprecated("No longer supported", "0.8.0")
def apply[A](f: Iterator[Seq[String]] => A): A = {
@@ -33,7 +40,28 @@ class CSVReader protected (private val underlying: JCSVReader) {
}
}
- def readNext(): Option[List[String]] = Option(underlying.readNext).map(_.toList)
+ private def handleParseError[A, B]: PartialFunction[parser.ParseResult[A], B] = {
+ case parser.Failure(msg, _) => throw new CSVParserException(msg)
+ case parser.Error(msg, _) => throw new CSVParserException(msg)
+ }
+
+ def readNext(): Option[List[String]] = {
+
+ def handleParseResult = handleParseSuccess.orElse(handleParseError[List[String], (List[String], parser.Input)])
+
+ def handleParseSuccess: PartialFunction[parser.ParseResult[List[String]], (List[String], parser.Input)] = {
+ case parser.Success(result, input) => (result, input)
+ }
+
+ if (pagedReader.atEnd) {
+ None
+ } else {
+ val parseResult = parser.parseLine(pagedReader)
+ val (result, input) = handleParseResult(parseResult)
+ pagedReader = input
+ Some(result)
+ }
+ }
def foreach(f: Seq[String] => Unit): Unit = iterator.foreach(f)
@@ -64,8 +92,10 @@ class CSVReader protected (private val underlying: JCSVReader) {
def toStream(): Stream[List[String]] =
Stream.continually(readNext).takeWhile(_.isDefined).map(_.get)
- def all(): List[List[String]] =
- underlying.readAll().map(_.toList).toList
+ def all(): List[List[String]] = {
+ toStream().toList
+ }
+
def allWithHeaders(): List[Map[String, String]] = {
readNext() map { headers =>
@@ -74,7 +104,7 @@ class CSVReader protected (private val underlying: JCSVReader) {
} getOrElse List()
}
- def close(): Unit = underlying.close()
+ def close(): Unit = reader.close()
}
@@ -89,7 +119,7 @@ object CSVReader {
def apply(reader: Reader): CSVReader = open(reader)(defaultCSVFormat)
def open(reader: Reader)(implicit format: CSVFormat): CSVReader =
- new CSVReader(new JCSVReader(reader, format.separator, format.quoteChar, format.numberOfLinesToSkip))
+ new CSVReader(reader, format.separator, format.quoteChar)
def open(file: File)(implicit format: CSVFormat): CSVReader = {
open(file, this.DEFAULT_ENCODING)(format)
View
5 src/test/resources/beginning-junk-hash-separated-dollar-quote.csv
@@ -1,5 +0,0 @@
-This
-is
-junk
-$Foo $#$Bar $#$Baz $
-$a$#$b$#$c$
View
2  src/test/resources/simple.csv
@@ -1,2 +1,2 @@
a,b,c
-d,e,f
+d,e,f
View
2  src/test/resources/with-headers.csv
@@ -1,3 +1,3 @@
Foo,Bar,Baz
a,b,c
-d,e,f
+d,e,f
View
13 src/test/scala/com/github/tototoshi/csv/CSVReaderSpec.scala
@@ -73,19 +73,6 @@ class CSVReaderSpec extends FunSpec with ShouldMatchers with Using {
}
}
- it("should be constructed with separators, quotes, and line skipping") {
- implicit object format extends DefaultCSVFormat {
- override val separator: Char = '#'
- override val quoteChar: Char = '$'
- override val numberOfLinesToSkip: Int = 3
- }
- using (CSVReader.open("src/test/resources/beginning-junk-hash-separated-dollar-quote.csv")) { reader => {
- val map = reader.allWithHeaders()
- map(0)("Foo ") should be ("a")
- }
- }
- }
-
it("read CSV from file") {
var res: List[String] = Nil
using (CSVReader.open(new FileReader("src/test/resources/simple.csv"))) { reader =>

No commit comments for this range

Something went wrong with that request. Please try again.