Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Additional csv functionality

- csv-row doesn't close the stream
- csv* takes no args and processes from the input-stream
- added initial support for tweaking csv dialect settings
  • Loading branch information...
commit 752e6a5e075dfc07a692a0ae797164c5da85d2d7 1 parent caf4b6c
Phil Dawes authored
14 basis/csv/csv-docs.factor
View
@@ -1,11 +1,10 @@
-USING: help.syntax help.markup kernel prettyprint sequences
-io.pathnames ;
+USING: help.syntax help.markup kernel prettyprint sequences io.pathnames ;
IN: csv
HELP: csv
{ $values { "stream" "an input stream" }
{ "rows" "an array of arrays of fields" } }
-{ $description "Parses a csv stream into an array of row arrays." } ;
+{ $description "Parses a csv stream into an array of row arrays. (closes the stream)" } ;
HELP: file>csv
{ $values
@@ -24,7 +23,7 @@ HELP: csv>file
HELP: csv-row
{ $values { "stream" "an input stream" }
{ "row" "an array of fields" } }
-{ $description "parses a row from a csv stream" } ;
+{ $description "parses a row from a csv stream. Doesn't close the stream" } ;
HELP: write-csv
{ $values { "rows" "a sequence of sequences of strings" }
@@ -36,6 +35,11 @@ HELP: with-delimiter
{ "quot" "a quotation" } }
{ $description "Sets the field delimiter for csv or csv-row words." } ;
+HELP: with-dialect
+{ $values { "dialect" "a dialect object" }
+ { "quot" "a quotation" } }
+ { $description "Sets the " { $link dialect } " for reading and writing csv files." } ;
+
ARTICLE: "csv" "Comma-separated-values parsing and writing"
"The " { $vocab-link "csv" } " vocabulary can read and write CSV (comma-separated-value) files." $nl
"Reading a csv file:"
@@ -44,6 +48,8 @@ ARTICLE: "csv" "Comma-separated-values parsing and writing"
{ $subsection csv>file }
"Changing the delimiter from a comma:"
{ $subsection with-delimiter }
+"Changing the csv dialect for reading/writing:"
+{ $subsection with-dialect }
"Reading from a stream:"
{ $subsection csv }
"Writing to a stream:"
19 basis/csv/csv-tests.factor
View
@@ -1,6 +1,5 @@
-USING: io.streams.string csv tools.test kernel strings
-io.pathnames io.files.unique io.encodings.utf8 io.files
-io.directories ;
+USING: accessors csv io.directories io.encodings.utf8 io.files
+io.files.unique io.streams.string kernel strings tools.test ;
IN: csv.tests
! I like to name my unit tests
@@ -63,6 +62,13 @@ IN: csv.tests
[ { { "foo" "bah" "baz" } } ]
[ "foo\tbah\tbaz\n" <string-reader> CHAR: \t [ csv ] with-delimiter ] named-unit-test
+"allows setting of dialect"
+[ { { "foo" "bah" "baz" } } ]
+[ "foo\tbah\tbaz\n" <string-reader>
+ <dialect> CHAR: \t >>delimiter [ csv ] with-dialect ] named-unit-test
+
+
+
"Quoted field followed immediately by newline"
[ { { "foo" "bar" }
{ "1" "2" } } ]
@@ -76,6 +82,13 @@ IN: csv.tests
[ "\"fo\"\"o1\",bar1\n\"fo\no2\",\"b,ar2\"\n" ]
[ { { "fo\"o1" "bar1" } { "fo\no2" "b,ar2" } } <string-writer> tuck write-csv >string ] named-unit-test ! "
+"can output dos carriage returns"
+[ "\"fo\"\"o1\",bar1\r\n\"fo\no2\",\"b,ar2\"\r\n" ]
+[ { { "fo\"o1" "bar1" } { "fo\no2" "b,ar2" } } <string-writer> tuck
+ <dialect> "\r\n" >>lineterminator [ write-csv ] with-dialect >string ] named-unit-test ! "
+
+
+
[ { { "writing" "some" "csv" "tests" } } ]
[
"writing,some,csv,tests"
64 basis/csv/csv.factor
View
@@ -1,44 +1,58 @@
! Copyright (C) 2007, 2008 Phil Dawes
! See http://factorcode.org/license.txt for BSD license.
-USING: kernel sequences io namespaces make combinators
-unicode.categories io.files combinators.short-circuit ;
+USING: accessors combinators combinators.short-circuit io io.files
+kernel make namespaces sequences unicode.categories ;
IN: csv
SYMBOL: delimiter
CHAR: , delimiter set-global
+TUPLE: dialect delimiter quotechar lineterminator ;
+
+: <dialect> ( -- d )
+ dialect new
+ CHAR: , >>delimiter
+ CHAR: " >>quotechar
+ "\n" >>lineterminator ;
+
+SYMBOL: thedialect
+
+<dialect> thedialect set-global
+
<PRIVATE
-: delimiter> ( -- delimiter ) delimiter get ; inline
-
+: delimiter> ( -- ch ) thedialect get delimiter>> ; inline
+: quotechar> ( -- ch ) thedialect get quotechar>> ; inline
+: lineterminator> ( -- str ) thedialect get lineterminator>> ; inline
+
DEFER: quoted-field ( -- endchar )
: trim-whitespace ( str -- str )
[ blank? ] trim ; inline
: skip-to-field-end ( -- endchar )
- "\n" delimiter> suffix read-until nip ; inline
+ "\n" delimiter> suffix read-until nip ; inline
: not-quoted-field ( -- endchar )
- "\"\n" delimiter> suffix read-until
+ "\n" delimiter> suffix quotechar> suffix read-until
dup {
- { CHAR: " [ 2drop quoted-field ] }
- { delimiter> [ swap trim-whitespace % ] }
- { CHAR: \n [ swap trim-whitespace % ] }
- { f [ swap trim-whitespace % ] }
+ { quotechar> [ 2drop quoted-field ] }
+ { delimiter> [ swap trim-whitespace % ] }
+ { CHAR: \n [ swap trim-whitespace % ] }
+ { f [ swap trim-whitespace % ] }
} case ;
: maybe-escaped-quote ( -- endchar )
read1 dup {
- { CHAR: " [ , quoted-field ] }
+ { quotechar> [ , quoted-field ] }
{ delimiter> [ ] }
{ CHAR: \n [ ] }
[ 2drop skip-to-field-end ]
} case ;
: quoted-field ( -- endchar )
- "\"" read-until
+ "" quotechar> suffix read-until
drop % maybe-escaped-quote ;
: field ( -- sep string )
@@ -59,33 +73,41 @@ DEFER: quoted-field ( -- endchar )
PRIVATE>
: csv-row ( stream -- row )
- [ row nip ] with-input-stream ;
+ [ row nip ] with-input-stream* ;
+: csv* ( -- rows )
+ [ (csv) ] { } make dup last { "" } = [ but-last ] when ;
+
: csv ( stream -- rows )
- [ [ (csv) ] { } make ] with-input-stream
- dup last { "" } = [ but-last ] when ;
+ [ csv* ] with-input-stream ;
: file>csv ( path encoding -- csv )
<file-reader> csv ;
+: with-dialect ( dialect quot -- )
+ [ thedialect ] dip with-variable ; inline
+
: with-delimiter ( ch quot -- )
- [ delimiter ] dip with-variable ; inline
+ [ <dialect> swap >>delimiter ] dip with-dialect ; inline
+
<PRIVATE
: needs-escaping? ( cell -- ? )
- [ { [ "\n\"" member? ] [ delimiter get = ] } 1|| ] any? ; inline
+ [ { [ "" lineterminator> suffix quotechar> suffix member? ] [ delimiter> = ] } 1|| ] any? ; inline
: escape-quotes ( cell -- cell' )
[
[
[ , ]
- [ dup CHAR: " = [ , ] [ drop ] if ] bi
+ [ dup quotechar> = [ , ] [ drop ] if ] bi
] each
] "" make ; inline
+: quotechar-as-str ( -- s ) "" quotechar> suffix ; inline
+
: enclose-in-quotes ( cell -- cell' )
- "\"" dup surround ; inline
+ quotechar-as-str dup surround ; inline
: escape-if-required ( cell -- cell' )
dup needs-escaping?
@@ -94,8 +116,8 @@ PRIVATE>
PRIVATE>
: write-row ( row -- )
- [ delimiter get write1 ]
- [ escape-if-required write ] interleave nl ; inline
+ [ delimiter> write1 ]
+ [ escape-if-required write ] interleave lineterminator> write ; inline
: write-csv ( rows stream -- )
[ [ write-row ] each ] with-output-stream ;
Please sign in to comment.
Something went wrong with that request. Please try again.