Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Compatibility with CSV files that have no headers #189

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
8 changes: 7 additions & 1 deletion csvkit/cli.py
Expand Up @@ -7,7 +7,7 @@
import sys

from csvkit import CSVKitReader
from csvkit.exceptions import ColumnIdentifierError
from csvkit.exceptions import ColumnIdentifierError, RequiredHeaderError

def lazy_opener(fn):
def wrapped(self, *args, **kwargs):
Expand Down Expand Up @@ -177,6 +177,9 @@ def _init_common_parser(self):
if 'e' not in self.override_flags:
self.argparser.add_argument('-e', '--encoding', dest='encoding', default='utf-8',
help='Specify the encoding the input CSV file.')
if 'H' not in self.override_flags:
self.argparser.add_argument('-H', '--no-header-row', dest='no_header_row', action='store_true',
help='Specifies that the input CSV file has no header row.')
if 'v' not in self.override_flags:
self.argparser.add_argument('-v', '--verbose', dest='verbose', action='store_true',
help='Print detailed tracebacks when errors occur.')
Expand Down Expand Up @@ -255,6 +258,9 @@ def print_column_names(self):
"""
Pretty-prints the names and indices of all columns to a file-like object (usually sys.stdout).
"""
if self.args.no_header_row:
raise RequiredHeaderError, 'You cannot use --no-header-row with the -n or --names options.'

f = self.args.file
output = self.output_file
try:
Expand Down
6 changes: 6 additions & 0 deletions csvkit/exceptions.py
Expand Up @@ -85,3 +85,9 @@ def __init__(self, errors):
msg = 'Encountered errors converting values in %i columns' % len(errors)
super(InvalidValueForTypeListException, self).__init__(msg)

class RequiredHeaderError(CustomException):
"""
Exception raised when an operation requires a CSV file to have a header row.
"""
pass

2 changes: 1 addition & 1 deletion csvkit/utilities/csvlook.py
Expand Up @@ -40,7 +40,7 @@ def main(self):

self.output_file.write(('| %s |\n' % ('|'.join(output))).encode('utf-8'))

if i == 0 or i == len(rows) - 1:
if (i == 0 and not self.args.no_header_row) or i == len(rows) - 1:
self.output_file.write('%s\n' % divider)

def launch_new_instance():
Expand Down
6 changes: 5 additions & 1 deletion csvkit/utilities/csvpy.py
Expand Up @@ -2,6 +2,7 @@

from csvkit import CSVKitReader, CSVKitDictReader
from csvkit.cli import CSVFileType, CSVKitUtility
from csvkit.exceptions import RequiredHeaderError

class CSVPy(CSVKitUtility):
description = 'Load a CSV file into a CSVKitReader object and then drops into a Python shell.'
Expand All @@ -18,7 +19,10 @@ def main(self):
filename = self.args.file.name

if self.args.as_dict:
reader_class = CSVKitDictReader
if self.args.no_header_row:
raise RequiredHeaderError, 'You cannot use --no-header-row with the csvpy --dict option.'
else:
reader_class = CSVKitDictReader
else:
reader_class = CSVKitReader

Expand Down
13 changes: 7 additions & 6 deletions csvkit/utilities/csvstack.py
Expand Up @@ -38,13 +38,14 @@ def main(self):

for i, f in enumerate(self.args.files):
rows = CSVKitReader(f, **self.reader_kwargs)
headers = rows.next()
if not self.args.no_header_row:
headers = rows.next()

if i == 0:
if groups:
headers.insert(0, group_name)
output.writerow(headers)
if i == 0:
if groups:
headers.insert(0, group_name)

output.writerow(headers)

for row in rows:
if groups:
Expand Down
1 change: 1 addition & 0 deletions docs/scripts/common_arguments.rst
Expand Up @@ -26,6 +26,7 @@ All utilities which accept CSV as input share a set of common command-line argum
-z MAXFIELDSIZE, --maxfieldsize MAXFIELDSIZE
Maximum length of a single field in the input CSV
file.
-H, --no-header-row Specifies that the input CSV file has no header row.
-e ENCODING, --encoding ENCODING
-v, --verbose Print detailed tracebacks when errors occur.
Specify the encoding the input file.
Expand Down
2 changes: 1 addition & 1 deletion docs/scripts/csvjson.rst
Expand Up @@ -8,7 +8,7 @@ Description
Converts a CSV file into JSON or GeoJSON (depending on flags)::

usage: csvjson [-h] [-d DELIMITER] [-t] [-q QUOTECHAR] [-u {0,1,2,3}] [-b]
[-p ESCAPECHAR] [-z MAXFIELDSIZE] [-e ENCODING] [-v] [-l]
[-p ESCAPECHAR] [-z MAXFIELDSIZE] [-e ENCODING] [-H] [-v] [-l]
[--zero] [-i INDENT] [-k KEY] [--lat LAT] [--lon LON]
[--crs CRS]
[FILE]
Expand Down
2 changes: 1 addition & 1 deletion docs/scripts/csvpy.rst
Expand Up @@ -8,7 +8,7 @@ Description
Loads a CSV file into a :class:`csvkit.CSVKitReader` object and then drops into a Python shell so the user can inspect the data however they see fit::

usage: csvpy [-h] [-d DELIMITER] [-t] [-q QUOTECHAR] [-u {0,1,2,3}] [-b]
[-p ESCAPECHAR] [-z MAXFIELDSIZE] [-e ENCODING] [-v]
[-p ESCAPECHAR] [-z MAXFIELDSIZE] [-e ENCODING] [-H] [-v]
FILE

Load a CSV file into a CSVKitReader object and then drops into a Python shell.
Expand Down
2 changes: 1 addition & 1 deletion docs/scripts/csvsql.rst
Expand Up @@ -8,7 +8,7 @@ Description
Generate SQL statements for a CSV file or create execute those statements directly on a database. In the latter case supports both creating tables and inserting data.::

usage: csvsql [-h] [-d DELIMITER] [-t] [-q QUOTECHAR] [-u {0,1,2,3}] [-b]
[-p ESCAPECHAR] [-z MAXFIELDSIZE] [-e ENCODING] [-v]
[-p ESCAPECHAR] [-z MAXFIELDSIZE] [-e ENCODING] [-H] [-v]
[-y SNIFFLIMIT]
[-i {access,sybase,sqlite,informix,firebird,mysql,oracle,maxdb,postgresql,mssql}]
[--db CONNECTION_STRING] [--insert]
Expand Down
1 change: 1 addition & 0 deletions examples/no_header_row.csv
@@ -0,0 +1 @@
1,2,3
1 change: 1 addition & 0 deletions examples/no_header_row2.csv
@@ -0,0 +1 @@
4,5,6
2 changes: 2 additions & 0 deletions examples/no_header_row3.csv
@@ -0,0 +1,2 @@
1,2,3
4,5,6
14 changes: 14 additions & 0 deletions tests/test_utilities/test_csvcut.py
Expand Up @@ -5,6 +5,7 @@

from csvkit import CSVKitReader
from csvkit.utilities.csvcut import CSVCut
from csvkit.exceptions import ColumnIdentifierError, RequiredHeaderError

class TestCSVCut(unittest.TestCase):
def test_simple(self):
Expand Down Expand Up @@ -85,3 +86,16 @@ def test_include_and_exclude(self):
self.assertEqual(reader.next(), ['a'])
self.assertEqual(reader.next(), ['1'])

def test_invalid_column(self):
args = ['-c', '0', 'examples/dummy.csv']
output_file = StringIO.StringIO()
utility = CSVCut(args, output_file)

self.assertRaises(ColumnIdentifierError, utility.main)

def test_invalid_options(self):
args = ['-n', '--no-header-row', 'examples/dummy.csv']
output_file = StringIO.StringIO()
utility = CSVCut(args, output_file)

self.assertRaises(RequiredHeaderError, utility.main)
14 changes: 14 additions & 0 deletions tests/test_utilities/test_csvgrep.py
Expand Up @@ -5,6 +5,7 @@

from csvkit import CSVKitReader
from csvkit.utilities.csvgrep import CSVGrep
from csvkit.exceptions import ColumnIdentifierError, RequiredHeaderError

class TestCSVCut(unittest.TestCase):
def test_match(self):
Expand Down Expand Up @@ -71,3 +72,16 @@ def test_string_match(self):
self.assertEqual(reader.next(), ['State Name', 'State Abbreviate', 'Code', 'Montgomery GI Bill-Active Duty', 'Montgomery GI Bill- Selective Reserve', 'Dependents\' Educational Assistance', 'Reserve Educational Assistance Program', 'Post-Vietnam Era Veteran\'s Educational Assistance Program', 'TOTAL', ''])
self.assertEqual(reader.next(), ['ILLINOIS', 'IL', '17', '15,659', '2,491', '2,025', '1,770', '19', '21,964', ''])

def test_invalid_column(self):
args = ['-c', '0', '-m', '1', 'examples/dummy.csv']
output_file = StringIO.StringIO()
utility = CSVGrep(args, output_file)

self.assertRaises(ColumnIdentifierError, utility.main)

def test_invalid_options(self):
args = ['-n', '--no-header-row', 'examples/dummy.csv']
output_file = StringIO.StringIO()
utility = CSVGrep(args, output_file)

self.assertRaises(RequiredHeaderError, utility.main)
14 changes: 14 additions & 0 deletions tests/test_utilities/test_csvlook.py
Expand Up @@ -22,3 +22,17 @@ def test_simple(self):
self.assertEqual(input_file.next(), '| 1 | 4 | 5 |\n')
self.assertEqual(input_file.next(), '|----+---+----|\n')

def test_no_header(self):
args = ['--no-header-row', 'examples/no_header_row3.csv']
output_file = StringIO.StringIO()
utility = CSVLook(args, output_file)

utility.main()

input_file = StringIO.StringIO(output_file.getvalue())

self.assertEqual(input_file.next(), '|----+---+----|\n')
self.assertEqual(input_file.next(), '| 1 | 2 | 3 |\n')
self.assertEqual(input_file.next(), '| 4 | 5 | 6 |\n')
self.assertEqual(input_file.next(), '|----+---+----|\n')

14 changes: 14 additions & 0 deletions tests/test_utilities/test_csvsort.py
Expand Up @@ -6,6 +6,7 @@

from csvkit import CSVKitReader
from csvkit.utilities.csvsort import CSVSort
from csvkit.exceptions import ColumnIdentifierError, RequiredHeaderError

class TestCSVSort(unittest.TestCase):
def test_sort_string_reverse(self):
Expand Down Expand Up @@ -38,3 +39,16 @@ def test_sort_date(self):

self.assertEqual(test_order, new_order)

def test_invalid_column(self):
args = ['-c', '0', 'examples/dummy.csv']
output_file = StringIO.StringIO()
utility = CSVSort(args, output_file)

self.assertRaises(ColumnIdentifierError, utility.main)

def test_invalid_options(self):
args = ['-n', '--no-header-row', 'examples/dummy.csv']
output_file = StringIO.StringIO()
utility = CSVSort(args, output_file)

self.assertRaises(RequiredHeaderError, utility.main)
14 changes: 14 additions & 0 deletions tests/test_utilities/test_csvstack.py
Expand Up @@ -55,3 +55,17 @@ def test_no_grouping(self):
self.assertEqual(reader.next()[0], '1')
self.assertEqual(reader.next()[0], '1')

def test_no_header_row(self):
# stack two CSV files
args = ['--no-header-row', 'examples/no_header_row.csv', 'examples/no_header_row2.csv']
output_file = StringIO.StringIO()
utility = CSVStack(args, output_file)

utility.main()

# verify the stacked file's contents
input_file = StringIO.StringIO(output_file.getvalue())
reader = CSVKitReader(input_file)

self.assertEqual(reader.next()[0], '1')
self.assertEqual(reader.next()[0], '4')