Skip to content

Commit

Permalink
CLN: pandas/io/parsers.py (pandas-dev#36269)
Browse files Browse the repository at this point in the history
  • Loading branch information
ivanovmg authored and Kevin D Smith committed Nov 2, 2020
1 parent 44b196a commit 57be5b0
Showing 1 changed file with 19 additions and 34 deletions.
53 changes: 19 additions & 34 deletions pandas/io/parsers.py
Expand Up @@ -421,10 +421,6 @@ def _read(filepath_or_buffer: FilePathOrBuffer, kwds):
kwds["encoding"] = encoding
compression = kwds.get("compression", "infer")

# TODO: get_filepath_or_buffer could return
# Union[FilePathOrBuffer, s3fs.S3File, gcsfs.GCSFile]
# though mypy handling of conditional imports is difficult.
# See https://github.com/python/mypy/issues/1297
ioargs = get_filepath_or_buffer(
filepath_or_buffer, encoding, compression, storage_options=storage_options
)
Expand Down Expand Up @@ -914,7 +910,6 @@ def __init__(self, f, engine=None, **kwds):

# miscellanea
self.engine = engine
self._engine = None
self._currow = 0

options = self._get_options_with_defaults(engine)
Expand All @@ -923,14 +918,13 @@ def __init__(self, f, engine=None, **kwds):
self.nrows = options.pop("nrows", None)
self.squeeze = options.pop("squeeze", False)

# might mutate self.engine
self.engine = self._check_file_or_buffer(f, engine)
self._check_file_or_buffer(f, engine)
self.options, self.engine = self._clean_options(options, engine)

if "has_index_names" in kwds:
self.options["has_index_names"] = kwds["has_index_names"]

self._make_engine(self.engine)
self._engine = self._make_engine(self.engine)

def close(self):
self._engine.close()
Expand Down Expand Up @@ -987,24 +981,21 @@ def _check_file_or_buffer(self, f, engine):
msg = "The 'python' engine cannot iterate through this file buffer."
raise ValueError(msg)

return engine

def _clean_options(self, options, engine):
result = options.copy()

engine_specified = self._engine_specified
fallback_reason = None

sep = options["delimiter"]
delim_whitespace = options["delim_whitespace"]

# C engine not supported yet
if engine == "c":
if options["skipfooter"] > 0:
fallback_reason = "the 'c' engine does not support skipfooter"
engine = "python"

encoding = sys.getfilesystemencoding() or "utf-8"
sep = options["delimiter"]
delim_whitespace = options["delim_whitespace"]

if sep is None and not delim_whitespace:
if engine == "c":
fallback_reason = (
Expand All @@ -1029,6 +1020,7 @@ def _clean_options(self, options, engine):
result["delimiter"] = r"\s+"
elif sep is not None:
encodeable = True
encoding = sys.getfilesystemencoding() or "utf-8"
try:
if len(sep.encode(encoding)) > 1:
encodeable = False
Expand Down Expand Up @@ -1161,29 +1153,26 @@ def __next__(self):
raise

def _make_engine(self, engine="c"):
if engine == "c":
self._engine = CParserWrapper(self.f, **self.options)
mapping = {
"c": CParserWrapper,
"python": PythonParser,
"python-fwf": FixedWidthFieldParser,
}
try:
klass = mapping[engine]
except KeyError:
raise ValueError(
f"Unknown engine: {engine} (valid options are {mapping.keys()})"
)
else:
if engine == "python":
klass = PythonParser
elif engine == "python-fwf":
klass = FixedWidthFieldParser
else:
raise ValueError(
f"Unknown engine: {engine} (valid options "
'are "c", "python", or "python-fwf")'
)
self._engine = klass(self.f, **self.options)
return klass(self.f, **self.options)

def _failover_to_python(self):
raise AbstractMethodError(self)

def read(self, nrows=None):
nrows = validate_integer("nrows", nrows)
ret = self._engine.read(nrows)

# May alter columns / col_dict
index, columns, col_dict = self._create_index(ret)
index, columns, col_dict = self._engine.read(nrows)

if index is None:
if col_dict:
Expand All @@ -1203,10 +1192,6 @@ def read(self, nrows=None):
return df[df.columns[0]].copy()
return df

def _create_index(self, ret):
index, columns, col_dict = ret
return index, columns, col_dict

def get_chunk(self, size=None):
if size is None:
size = self.chunksize
Expand Down

0 comments on commit 57be5b0

Please sign in to comment.