Permalink
Browse files

DOC: added info on encoding parameter for csv i/o

  • Loading branch information...
1 parent db2114a commit e3df4e2ea341b9a631d40b7ec447113078180b12 @adamklein adamklein committed Jan 31, 2012
Showing with 62 additions and 17 deletions.
  1. +2 −0 RELEASE.rst
  2. +4 −0 doc/source/io.rst
  3. +4 −0 doc/source/whatsnew/v0.7.0.txt
  4. +7 −3 pandas/core/frame.py
  5. +41 −14 pandas/core/series.py
  6. +4 −0 pandas/io/parsers.py
View
@@ -1188,6 +1188,8 @@ Release notes
* `timeRule` argument in `shift` has been deprecated in favor of using the
`offset` argument for everything. So you can still pass a time rule string
to `offset`
+ * Added optional `encoding` argument to `read_csv`, `read_table`, `to_csv`,
+ `from_csv` to handle unicode in python 2.x
**Bug fixes**
View
@@ -93,6 +93,8 @@ data into a DataFrame object. They can take a number of arguments:
- ``skip_footer``: number of lines to skip at bottom of file (default 0)
- ``converters``: a dictionary of functions for converting values in certain
columns, where keys are either integers or column labels
+ - ``encoding``: a string representing the encoding to use if the contents are
+ non-ascii, for python versions prior to 3
.. ipython:: python
:suppress:
@@ -277,6 +279,8 @@ function takes a number of arguments. Only the first is required.
used. (A sequence should be given if the DataFrame uses MultiIndex).
- ``mode`` : Python write mode, default 'w'
- ``sep`` : Field delimiter for the output file (default "'")
+ - ``encoding``: a string representing the encoding to use if the contents are
+ non-ascii, for python versions prior to 3
Writing a formatted string
~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -94,6 +94,9 @@ New features
- ``value_range`` added as utility function to get min and max of a dataframe
(GH288_)
+- Added ``encoding`` argument to ``read_csv``, ``read_table``, ``to_csv`` and
+ ``from_csv`` for non-ascii text (GH717_)
+
API Changes to integer indexing
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -299,6 +302,7 @@ similar operation to the above but using a Python function:
.. _GH595: https://github.com/wesm/pandas/issues/595
.. _GH647: https://github.com/wesm/pandas/issues/647
.. _GH699: https://github.com/wesm/pandas/issues/699
+.. _GH717: https://github.com/wesm/pandas/issues/717
.. _GH93: https://github.com/wesm/pandas/issues/93
.. _GH93: https://github.com/wesm/pandas/issues/93
.. _PR521: https://github.com/wesm/pandas/pull/521
View
@@ -744,7 +744,7 @@ def from_items(cls, items, columns=None, orient='columns'):
@classmethod
def from_csv(cls, path, header=0, sep=',', index_col=0,
- parse_dates=True):
+ parse_dates=True, encoding=None):
"""
Read delimited file into DataFrame
@@ -773,7 +773,8 @@ def from_csv(cls, path, header=0, sep=',', index_col=0,
"""
from pandas.io.parsers import read_table
return read_table(path, header=header, sep=sep,
- parse_dates=parse_dates, index_col=index_col)
+ parse_dates=parse_dates, index_col=index_col,
+ encoding=encoding)
def to_sparse(self, fill_value=None, kind='block'):
"""
@@ -834,7 +835,7 @@ def to_panel(self):
to_wide = deprecate('to_wide', to_panel)
def to_csv(self, path, sep=",", na_rep='', cols=None, header=True,
- index=True, index_label=None, mode='w', nanRep=None,
+ index=True, index_label=None, mode='w', nanRep=None,
encoding=None):
"""
Write DataFrame to a comma-separated values (csv) file
@@ -858,6 +859,9 @@ def to_csv(self, path, sep=",", na_rep='', cols=None, header=True,
mode : Python write mode, default 'w'
sep : character, default ","
Field delimiter for the output file.
+ encoding : string, optional
+ a string representing the encoding to use if the contents are
+ non-ascii, for python versions prior to 3
"""
f = open(path, mode)
csvout = csv.writer(f, lineterminator='\n', delimiter=sep)
View
@@ -1979,7 +1979,8 @@ def hist(self, ax=None, grid=True, **kwds):
return ax
@classmethod
- def from_csv(cls, path, sep=',', parse_dates=True):
+ def from_csv(cls, path, sep=',', parse_dates=True, header=None,
+ index_col=0, encoding=None):
"""
Read delimited file into Series
@@ -1990,30 +1991,56 @@ def from_csv(cls, path, sep=',', parse_dates=True):
Field delimiter
parse_dates : boolean, default True
Parse dates. Different default from read_table
+ header : int, default 0
+ Row to use at header (skip prior rows)
+ index_col : int or sequence, default 0
+ Column to use for index. If a sequence is given, a MultiIndex
+ is used. Different default from read_table
+ encoding : string, optional
+ a string representing the encoding to use if the contents are
+ non-ascii, for python versions prior to 3
Returns
-------
y : Series
"""
from pandas.core.frame import DataFrame
- df = DataFrame.from_csv(path, header=None, sep=sep, parse_dates=parse_dates)
- return df[df.columns[0]]
+ df = DataFrame.from_csv(path, header=header, index_col=index_col,
+ sep=sep, parse_dates=parse_dates,
+ encoding=encoding)
+ return df.ix[:, 0]
- def to_csv(self, path, index=True):
+ def to_csv(self, path, index=True, sep=",", na_rep='', header=False,
+ index_label=None, mode='w', nanRep=None, encoding=None):
"""
- Write the Series to a CSV file
+ Write Series to a comma-separated values (csv) file
Parameters
----------
- path : string or None
- Output filepath. If None, write to stdout
- index : bool, optional
- Include the index as row names or not
- """
- f = open(path, 'w')
- csvout = csv.writer(f, lineterminator='\n')
- csvout.writerows(self.iteritems(index))
- f.close()
+ path : string
+ File path
+ nanRep : string, default ''
+ Missing data rep'n
+ header : boolean, default False
+ Write out series name
+ index : boolean, default True
+ Write row names (index)
+ index_label : string or sequence, default None
+ Column label for index column(s) if desired. If None is given, and
+ `header` and `index` are True, then the index names are used. A
+ sequence should be given if the DataFrame uses MultiIndex.
+ mode : Python write mode, default 'w'
+ sep : character, default ","
+ Field delimiter for the output file.
+ encoding : string, optional
+ a string representing the encoding to use if the contents are
+ non-ascii, for python versions prior to 3
+ """
+ from pandas.core.frame import DataFrame
+ df = DataFrame(self)
+ df.to_csv(path, index=index, sep=sep, na_rep=na_rep, header=header,
+ index_label=index_label,mode=mode, nanRep=nanRep,
+ encoding=encoding)
def dropna(self):
"""
View
@@ -51,6 +51,8 @@
Indicate number of NA values placed in non-numeric columns
delimiter : string, default None
Alternative argument name for sep
+encoding : string, default None
+ Encoding to use for UTF when reading/writing (ex. 'utf-8')
Returns
-------
@@ -185,6 +187,8 @@ class TextParser(object):
Row numbers to skip
skip_footer : int
Number of line at bottom of file to skip
+ encoding : string, default None
+ Encoding to use for UTF when reading/writing (ex. 'utf-8')
"""
# common NA values

0 comments on commit e3df4e2

Please sign in to comment.