Skip to content

Commit

Permalink
Fix csvlook and csvstat properly. Closes #346. Closes #363.
Browse files Browse the repository at this point in the history
  • Loading branch information
onyxfish committed Nov 22, 2014
1 parent 63234a0 commit 4548bcd
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 35 deletions.
9 changes: 1 addition & 8 deletions csvkit/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,14 +78,7 @@ def __init__(self, args=None, output_file=None):
self._install_exception_handler()

if output_file is None:
# Use binary output when supported for PY3
# Note we can't always specify `.buffer`, because sys.stdout
# can be swapped out with `io.StringIO` (e.g. in tests)
# which does not support it.
if hasattr(sys.stdout, 'buffer'):
self.output_file = sys.stdout.buffer
else:
self.output_file = sys.stdout
self.output_file = sys.stdout
else:
self.output_file = output_file

Expand Down
8 changes: 3 additions & 5 deletions csvkit/utilities/csvlook.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,7 @@ def main(self):
# horizontal and vertical dividers.
divider = '|--' + '-+-'.join('-'* w for w in widths) + '--|'

write = lambda t: self.output_file.write(t.encode('utf-8'))

write('%s\n' % divider)
self.output_file.write('%s\n' % divider)

for i, row in enumerate(rows):
output = []
Expand All @@ -68,10 +66,10 @@ def main(self):
d = ''
output.append(' %s ' % six.text_type(d).ljust(widths[j]))

write('| %s |\n' % ('|'.join(output)))
self.output_file.write('| %s |\n' % ('|'.join(output)))

if (i == 0 or i == len(rows) - 1):
write('%s\n' % divider)
self.output_file.write('%s\n' % divider)

def launch_new_instance():
utility = CSVLook()
Expand Down
42 changes: 20 additions & 22 deletions csvkit/utilities/csvstat.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,15 +57,13 @@ def main(self):
if operations and self.args.count_only:
self.argparser.error('You may not specify --count and a statistical argument at the same time.')

write = lambda t: self.output_file.write(t.encode('utf-8'))

if self.args.count_only:
count = len(list(CSVKitReader(self.input_file)))

if not self.args.no_header_row:
count -= 1

write('Row count: %i\n' % count)
self.output_file.write('Row count: %i\n' % count)

return

Expand Down Expand Up @@ -96,51 +94,51 @@ def main(self):
stat = '{ %s }' % stat

if len(tab) == 1:
write(six.text_type(stat))
self.output_file.write(six.text_type(stat))
else:
write('%3i. %s: %s\n' % (c.order + 1, c.name, stat))
self.output_file.write('%3i. %s: %s\n' % (c.order + 1, c.name, stat))
# Output all stats
else:
for op in OPERATIONS:
stats[op] = getattr(self, 'get_%s' % op)(c, values, stats)

write(('%3i. %s\n' % (c.order + 1, c.name)))
self.output_file.write(('%3i. %s\n' % (c.order + 1, c.name)))

if c.type == None:
write('\tEmpty column\n')
self.output_file.write('\tEmpty column\n')
continue

write('\t%s\n' % c.type)
write('\tNulls: %s\n' % stats['nulls'])
self.output_file.write('\t%s\n' % c.type)
self.output_file.write('\tNulls: %s\n' % stats['nulls'])

if len(stats['unique']) <= MAX_UNIQUE and c.type is not bool:
uniques = [six.text_type(u) for u in list(stats['unique'])]
data = u'\tValues: %s\n' % ', '.join(uniques)
write(data)
self.output_file.write(data)
else:
if c.type not in [six.text_type, bool]:
write('\tMin: %s\n' % stats['min'])
write('\tMax: %s\n' % stats['max'])
self.output_file.write('\tMin: %s\n' % stats['min'])
self.output_file.write('\tMax: %s\n' % stats['max'])

if c.type in [int, float]:
write('\tSum: %s\n' % stats['sum'])
write('\tMean: %s\n' % stats['mean'])
write('\tMedian: %s\n' % stats['median'])
write('\tStandard Deviation: %s\n' % stats['stdev'])
self.output_file.write('\tSum: %s\n' % stats['sum'])
self.output_file.write('\tMean: %s\n' % stats['mean'])
self.output_file.write('\tMedian: %s\n' % stats['median'])
self.output_file.write('\tStandard Deviation: %s\n' % stats['stdev'])

write('\tUnique values: %i\n' % len(stats['unique']))
self.output_file.write('\tUnique values: %i\n' % len(stats['unique']))

if len(stats['unique']) != len(values):
write('\t%i most frequent values:\n' % MAX_FREQ)
self.output_file.write('\t%i most frequent values:\n' % MAX_FREQ)
for value, count in stats['freq']:
write(('\t\t%s:\t%s\n' % (six.text_type(value), count)))
self.output_file.write(('\t\t%s:\t%s\n' % (six.text_type(value), count)))

if c.type == six.text_type:
write('\tMax length: %i\n' % stats['len'])
self.output_file.write('\tMax length: %i\n' % stats['len'])

if not operations:
write('\n')
write('Row count: %s\n' % tab.count_rows())
self.output_file.write('\n')
self.output_file.write('Row count: %s\n' % tab.count_rows())

def get_min(self, c, values, stats):
if c.type == NoneType:
Expand Down

0 comments on commit 4548bcd

Please sign in to comment.