|
|
@@ -12,14 +12,17 @@ |
|
|
from pandas.core.index import Index
|
|
|
from pandas.core.frame import DataFrame
|
|
|
|
|
|
-def read_csv(filepath_or_buffer, header=0, skiprows=None, index_col=0,
|
|
|
+def read_csv(filepath_or_buffer, sep=None, header=0, skiprows=None, index_col=0,
|
|
|
na_values=None, date_parser=None, names=None):
|
|
|
"""
|
|
|
Read CSV file into DataFrame
|
|
|
|
|
|
Parameters
|
|
|
----------
|
|
|
filepath_or_buffer : string or file handle / StringIO
|
|
|
+ sep : string, default None
|
|
|
+ Delimiter to use. By default will try to automatically determine
|
|
|
+ this
|
|
|
header : int, default 0
|
|
|
Row to use for the column labels of the parsed DataFrame
|
|
|
skiprows : list-like
|
|
|
@@ -50,7 +53,20 @@ def read_csv(filepath_or_buffer, header=0, skiprows=None, index_col=0, |
|
|
except Exception: # pragma: no cover
|
|
|
f = open(filepath_or_buffer, 'r')
|
|
|
|
|
|
- reader = csv.reader(f, dialect='excel')
|
|
|
+ sniff_sep = True
|
|
|
+ # default dialect
|
|
|
+ dia = csv.excel
|
|
|
+ if sep is not None:
|
|
|
+ sniff_sep = False
|
|
|
+ dia.delimiter = sep
|
|
|
+ # attempt to sniff the delimiter
|
|
|
+ if sniff_sep:
|
|
|
+ sample = f.readline()
|
|
|
+ sniffed = csv.Sniffer().sniff(sample)
|
|
|
+ dia.delimiter = sniffed.delimiter
|
|
|
+ f.seek(0)
|
|
|
+
|
|
|
+ reader = csv.reader(f, dialect=dia)
|
|
|
|
|
|
if skiprows is not None:
|
|
|
skiprows = set(skiprows)
|
|
|
@@ -63,8 +79,7 @@ def read_csv(filepath_or_buffer, header=0, skiprows=None, index_col=0, |
|
|
date_parser=date_parser)
|
|
|
|
|
|
def read_table(filepath_or_buffer, sep='\t', header=0, skiprows=None,
|
|
|
- index_col=0, na_values=None, names=None,
|
|
|
- date_parser=None):
|
|
|
+ index_col=0, na_values=None, date_parser=None, names=None):
|
|
|
"""
|
|
|
Read delimited file into DataFrame
|
|
|
|
|
|
@@ -92,25 +107,8 @@ def read_table(filepath_or_buffer, sep='\t', header=0, skiprows=None, |
|
|
-------
|
|
|
parsed : DataFrame
|
|
|
"""
|
|
|
- if hasattr(filepath_or_buffer, 'read'):
|
|
|
- reader = filepath_or_buffer
|
|
|
- else:
|
|
|
- try:
|
|
|
- # universal newline mode
|
|
|
- reader = open(filepath_or_buffer, 'U')
|
|
|
- except Exception: # pragma: no cover
|
|
|
- reader = open(filepath_or_buffer, 'r')
|
|
|
-
|
|
|
- if skiprows is not None:
|
|
|
- skiprows = set(skiprows)
|
|
|
- lines = [l for i, l in enumerate(reader) if i not in skiprows]
|
|
|
- else:
|
|
|
- lines = [l for l in reader]
|
|
|
-
|
|
|
- lines = [re.split(sep, l.rstrip()) for l in lines]
|
|
|
- return _simple_parser(lines, header=header, indexCol=index_col,
|
|
|
- colNames=names, na_values=na_values,
|
|
|
- date_parser=date_parser)
|
|
|
+ return read_csv(filepath_or_buffer, sep, header, skiprows,
|
|
|
+ index_col, na_values, date_parser, names)
|
|
|
|
|
|
def _simple_parser(lines, colNames=None, header=0, indexCol=0,
|
|
|
na_values=None, date_parser=None, parse_dates=True):
|
|
|
|