Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP

Loading…

Compatibility with CSV files that have no headers #189

Closed
wants to merge 5 commits into from

2 participants

@jpmckinney

These tools work without changes:

  • csvclean
  • csvjoin

The following had specific edits:

  • csvstack (the reason for this issue!): don't eat the first row of the 2nd, 3rd, etc. files
  • csvlook: don't render first row as a header

The following had some options that are incompatible:

  • csvpy: I doubt anyone wants fake column names when using --dict, so we just raise an error
  • csvcut: --names conflicts with --no-header-row
  • csvgrep: --names conflicts with --no-header-row
  • csvsort: --names conflicts with --no-header-row

Still left to do:

  • csvsql
  • csvjson
  • csvstat
@onyxfish onyxfish closed this in cc20a57
@onyxfish
Owner

I don't know if "better late than never" really applies to software, but never-the-less... merged! I modified the implementation a little bit to make it more consistent with the "always the same output" philosophy. It will now generate default headers whenever the option is specified.

@onyxfish
Owner

Thanks for the pull request, James!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
This page is out of date. Refresh to see the latest.
View
8 csvkit/cli.py
@@ -7,7 +7,7 @@
import sys
from csvkit import CSVKitReader
-from csvkit.exceptions import ColumnIdentifierError
+from csvkit.exceptions import ColumnIdentifierError, RequiredHeaderError
def lazy_opener(fn):
def wrapped(self, *args, **kwargs):
@@ -177,6 +177,9 @@ def _init_common_parser(self):
if 'e' not in self.override_flags:
self.argparser.add_argument('-e', '--encoding', dest='encoding', default='utf-8',
help='Specify the encoding the input CSV file.')
+ if 'H' not in self.override_flags:
+ self.argparser.add_argument('-H', '--no-header-row', dest='no_header_row', action='store_true',
+ help='Specifies that the input CSV file has no header row.')
if 'v' not in self.override_flags:
self.argparser.add_argument('-v', '--verbose', dest='verbose', action='store_true',
help='Print detailed tracebacks when errors occur.')
@@ -255,6 +258,9 @@ def print_column_names(self):
"""
Pretty-prints the names and indices of all columns to a file-like object (usually sys.stdout).
"""
+ if self.args.no_header_row:
+ raise RequiredHeaderError, 'You cannot use --no-header-row with the -n or --names options.'
+
f = self.args.file
output = self.output_file
try:
View
6 csvkit/exceptions.py
@@ -85,3 +85,9 @@ def __init__(self, errors):
msg = 'Encountered errors converting values in %i columns' % len(errors)
super(InvalidValueForTypeListException, self).__init__(msg)
+class RequiredHeaderError(CustomException):
+ """
+ Exception raised when an operation requires a CSV file to have a header row.
+ """
+ pass
+
View
2  csvkit/utilities/csvlook.py
@@ -40,7 +40,7 @@ def main(self):
self.output_file.write(('| %s |\n' % ('|'.join(output))).encode('utf-8'))
- if i == 0 or i == len(rows) - 1:
+ if (i == 0 and not self.args.no_header_row) or i == len(rows) - 1:
self.output_file.write('%s\n' % divider)
def launch_new_instance():
View
6 csvkit/utilities/csvpy.py
@@ -2,6 +2,7 @@
from csvkit import CSVKitReader, CSVKitDictReader
from csvkit.cli import CSVFileType, CSVKitUtility
+from csvkit.exceptions import RequiredHeaderError
class CSVPy(CSVKitUtility):
description = 'Load a CSV file into a CSVKitReader object and then drops into a Python shell.'
@@ -18,7 +19,10 @@ def main(self):
filename = self.args.file.name
if self.args.as_dict:
- reader_class = CSVKitDictReader
+ if self.args.no_header_row:
+ raise RequiredHeaderError, 'You cannot use --no-header-row with the csvpy --dict option.'
+ else:
+ reader_class = CSVKitDictReader
else:
reader_class = CSVKitReader
View
13 csvkit/utilities/csvstack.py
@@ -38,13 +38,14 @@ def main(self):
for i, f in enumerate(self.args.files):
rows = CSVKitReader(f, **self.reader_kwargs)
- headers = rows.next()
+ if not self.args.no_header_row:
+ headers = rows.next()
- if i == 0:
- if groups:
- headers.insert(0, group_name)
-
- output.writerow(headers)
+ if i == 0:
+ if groups:
+ headers.insert(0, group_name)
+
+ output.writerow(headers)
for row in rows:
if groups:
View
1  docs/scripts/common_arguments.rst
@@ -26,6 +26,7 @@ All utilities which accept CSV as input share a set of common command-line argum
-z MAXFIELDSIZE, --maxfieldsize MAXFIELDSIZE
Maximum length of a single field in the input CSV
file.
+ -H, --no-header-row Specifies that the input CSV file has no header row.
-e ENCODING, --encoding ENCODING
-v, --verbose Print detailed tracebacks when errors occur.
Specify the encoding the input file.
View
2  docs/scripts/csvjson.rst
@@ -8,7 +8,7 @@ Description
Converts a CSV file into JSON or GeoJSON (depending on flags)::
usage: csvjson [-h] [-d DELIMITER] [-t] [-q QUOTECHAR] [-u {0,1,2,3}] [-b]
- [-p ESCAPECHAR] [-z MAXFIELDSIZE] [-e ENCODING] [-v] [-l]
+ [-p ESCAPECHAR] [-z MAXFIELDSIZE] [-e ENCODING] [-H] [-v] [-l]
[--zero] [-i INDENT] [-k KEY] [--lat LAT] [--lon LON]
[--crs CRS]
[FILE]
View
2  docs/scripts/csvpy.rst
@@ -8,7 +8,7 @@ Description
Loads a CSV file into a :class:`csvkit.CSVKitReader` object and then drops into a Python shell so the user can inspect the data however they see fit::
usage: csvpy [-h] [-d DELIMITER] [-t] [-q QUOTECHAR] [-u {0,1,2,3}] [-b]
- [-p ESCAPECHAR] [-z MAXFIELDSIZE] [-e ENCODING] [-v]
+ [-p ESCAPECHAR] [-z MAXFIELDSIZE] [-e ENCODING] [-H] [-v]
FILE
Load a CSV file into a CSVKitReader object and then drops into a Python shell.
View
2  docs/scripts/csvsql.rst
@@ -8,7 +8,7 @@ Description
Generate SQL statements for a CSV file or create execute those statements directly on a database. In the latter case supports both creating tables and inserting data.::
usage: csvsql [-h] [-d DELIMITER] [-t] [-q QUOTECHAR] [-u {0,1,2,3}] [-b]
- [-p ESCAPECHAR] [-z MAXFIELDSIZE] [-e ENCODING] [-v]
+ [-p ESCAPECHAR] [-z MAXFIELDSIZE] [-e ENCODING] [-H] [-v]
[-y SNIFFLIMIT]
[-i {access,sybase,sqlite,informix,firebird,mysql,oracle,maxdb,postgresql,mssql}]
[--db CONNECTION_STRING] [--insert]
View
1  examples/no_header_row.csv
@@ -0,0 +1 @@
+1,2,3
View
1  examples/no_header_row2.csv
@@ -0,0 +1 @@
+4,5,6
View
2  examples/no_header_row3.csv
@@ -0,0 +1,2 @@
+1,2,3
+4,5,6
View
14 tests/test_utilities/test_csvcut.py
@@ -5,6 +5,7 @@
from csvkit import CSVKitReader
from csvkit.utilities.csvcut import CSVCut
+from csvkit.exceptions import ColumnIdentifierError, RequiredHeaderError
class TestCSVCut(unittest.TestCase):
def test_simple(self):
@@ -85,3 +86,16 @@ def test_include_and_exclude(self):
self.assertEqual(reader.next(), ['a'])
self.assertEqual(reader.next(), ['1'])
+ def test_invalid_column(self):
+ args = ['-c', '0', 'examples/dummy.csv']
+ output_file = StringIO.StringIO()
+ utility = CSVCut(args, output_file)
+
+ self.assertRaises(ColumnIdentifierError, utility.main)
+
+ def test_invalid_options(self):
+ args = ['-n', '--no-header-row', 'examples/dummy.csv']
+ output_file = StringIO.StringIO()
+ utility = CSVCut(args, output_file)
+
+ self.assertRaises(RequiredHeaderError, utility.main)
View
14 tests/test_utilities/test_csvgrep.py
@@ -5,6 +5,7 @@
from csvkit import CSVKitReader
from csvkit.utilities.csvgrep import CSVGrep
+from csvkit.exceptions import ColumnIdentifierError, RequiredHeaderError
class TestCSVCut(unittest.TestCase):
def test_match(self):
@@ -71,3 +72,16 @@ def test_string_match(self):
self.assertEqual(reader.next(), ['State Name', 'State Abbreviate', 'Code', 'Montgomery GI Bill-Active Duty', 'Montgomery GI Bill- Selective Reserve', 'Dependents\' Educational Assistance', 'Reserve Educational Assistance Program', 'Post-Vietnam Era Veteran\'s Educational Assistance Program', 'TOTAL', ''])
self.assertEqual(reader.next(), ['ILLINOIS', 'IL', '17', '15,659', '2,491', '2,025', '1,770', '19', '21,964', ''])
+ def test_invalid_column(self):
+ args = ['-c', '0', '-m', '1', 'examples/dummy.csv']
+ output_file = StringIO.StringIO()
+ utility = CSVGrep(args, output_file)
+
+ self.assertRaises(ColumnIdentifierError, utility.main)
+
+ def test_invalid_options(self):
+ args = ['-n', '--no-header-row', 'examples/dummy.csv']
+ output_file = StringIO.StringIO()
+ utility = CSVGrep(args, output_file)
+
+ self.assertRaises(RequiredHeaderError, utility.main)
View
14 tests/test_utilities/test_csvlook.py
@@ -22,3 +22,17 @@ def test_simple(self):
self.assertEqual(input_file.next(), '| 1 | 4 | 5 |\n')
self.assertEqual(input_file.next(), '|----+---+----|\n')
+ def test_no_header(self):
+ args = ['--no-header-row', 'examples/no_header_row3.csv']
+ output_file = StringIO.StringIO()
+ utility = CSVLook(args, output_file)
+
+ utility.main()
+
+ input_file = StringIO.StringIO(output_file.getvalue())
+
+ self.assertEqual(input_file.next(), '|----+---+----|\n')
+ self.assertEqual(input_file.next(), '| 1 | 2 | 3 |\n')
+ self.assertEqual(input_file.next(), '| 4 | 5 | 6 |\n')
+ self.assertEqual(input_file.next(), '|----+---+----|\n')
+
View
14 tests/test_utilities/test_csvsort.py
@@ -6,6 +6,7 @@
from csvkit import CSVKitReader
from csvkit.utilities.csvsort import CSVSort
+from csvkit.exceptions import ColumnIdentifierError, RequiredHeaderError
class TestCSVSort(unittest.TestCase):
def test_sort_string_reverse(self):
@@ -38,3 +39,16 @@ def test_sort_date(self):
self.assertEqual(test_order, new_order)
+ def test_invalid_column(self):
+ args = ['-c', '0', 'examples/dummy.csv']
+ output_file = StringIO.StringIO()
+ utility = CSVSort(args, output_file)
+
+ self.assertRaises(ColumnIdentifierError, utility.main)
+
+ def test_invalid_options(self):
+ args = ['-n', '--no-header-row', 'examples/dummy.csv']
+ output_file = StringIO.StringIO()
+ utility = CSVSort(args, output_file)
+
+ self.assertRaises(RequiredHeaderError, utility.main)
View
14 tests/test_utilities/test_csvstack.py
@@ -55,3 +55,17 @@ def test_no_grouping(self):
self.assertEqual(reader.next()[0], '1')
self.assertEqual(reader.next()[0], '1')
+ def test_no_header_row(self):
+ # stack two CSV files
+ args = ['--no-header-row', 'examples/no_header_row.csv', 'examples/no_header_row2.csv']
+ output_file = StringIO.StringIO()
+ utility = CSVStack(args, output_file)
+
+ utility.main()
+
+ # verify the stacked file's contents
+ input_file = StringIO.StringIO(output_file.getvalue())
+ reader = CSVKitReader(input_file)
+
+ self.assertEqual(reader.next()[0], '1')
+ self.assertEqual(reader.next()[0], '4')
Something went wrong with that request. Please try again.