Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CLN: pandas/io/parsers.py #36269

Merged
merged 8 commits into from Sep 12, 2020
53 changes: 19 additions & 34 deletions pandas/io/parsers.py
Expand Up @@ -421,10 +421,6 @@ def _read(filepath_or_buffer: FilePathOrBuffer, kwds):
kwds["encoding"] = encoding
compression = kwds.get("compression", "infer")

# TODO: get_filepath_or_buffer could return
# Union[FilePathOrBuffer, s3fs.S3File, gcsfs.GCSFile]
# though mypy handling of conditional imports is difficult.
# See https://github.com/python/mypy/issues/1297
ioargs = get_filepath_or_buffer(
filepath_or_buffer, encoding, compression, storage_options=storage_options
)
Expand Down Expand Up @@ -914,7 +910,6 @@ def __init__(self, f, engine=None, **kwds):

# miscellanea
self.engine = engine
self._engine = None
self._currow = 0

options = self._get_options_with_defaults(engine)
Expand All @@ -923,14 +918,13 @@ def __init__(self, f, engine=None, **kwds):
self.nrows = options.pop("nrows", None)
self.squeeze = options.pop("squeeze", False)

# might mutate self.engine
self.engine = self._check_file_or_buffer(f, engine)
self._check_file_or_buffer(f, engine)
self.options, self.engine = self._clean_options(options, engine)

if "has_index_names" in kwds:
self.options["has_index_names"] = kwds["has_index_names"]

self._make_engine(self.engine)
self._engine = self._make_engine(self.engine)
WillAyd marked this conversation as resolved.
Show resolved Hide resolved

def close(self):
self._engine.close()
Expand Down Expand Up @@ -987,24 +981,21 @@ def _check_file_or_buffer(self, f, engine):
msg = "The 'python' engine cannot iterate through this file buffer."
raise ValueError(msg)

return engine

def _clean_options(self, options, engine):
result = options.copy()

engine_specified = self._engine_specified
fallback_reason = None

sep = options["delimiter"]
delim_whitespace = options["delim_whitespace"]

# C engine not supported yet
if engine == "c":
if options["skipfooter"] > 0:
fallback_reason = "the 'c' engine does not support skipfooter"
engine = "python"

encoding = sys.getfilesystemencoding() or "utf-8"
sep = options["delimiter"]
delim_whitespace = options["delim_whitespace"]

if sep is None and not delim_whitespace:
if engine == "c":
fallback_reason = (
Expand All @@ -1029,6 +1020,7 @@ def _clean_options(self, options, engine):
result["delimiter"] = r"\s+"
elif sep is not None:
encodeable = True
encoding = sys.getfilesystemencoding() or "utf-8"
try:
if len(sep.encode(encoding)) > 1:
encodeable = False
Expand Down Expand Up @@ -1161,29 +1153,26 @@ def __next__(self):
raise

def _make_engine(self, engine="c"):
if engine == "c":
self._engine = CParserWrapper(self.f, **self.options)
mapping = {
"c": CParserWrapper,
"python": PythonParser,
"python-fwf": FixedWidthFieldParser,
}
try:
klass = mapping[engine]
except KeyError:
raise ValueError(
f"Unknown engine: {engine} (valid options are {mapping.keys()})"
)
else:
if engine == "python":
klass = PythonParser
elif engine == "python-fwf":
klass = FixedWidthFieldParser
else:
raise ValueError(
f"Unknown engine: {engine} (valid options "
'are "c", "python", or "python-fwf")'
)
self._engine = klass(self.f, **self.options)
return klass(self.f, **self.options)

def _failover_to_python(self):
raise AbstractMethodError(self)

def read(self, nrows=None):
nrows = validate_integer("nrows", nrows)
ret = self._engine.read(nrows)

# May alter columns / col_dict
index, columns, col_dict = self._create_index(ret)
index, columns, col_dict = self._engine.read(nrows)

if index is None:
if col_dict:
Expand All @@ -1203,10 +1192,6 @@ def read(self, nrows=None):
return df[df.columns[0]].copy()
return df

def _create_index(self, ret):
index, columns, col_dict = ret
return index, columns, col_dict

def get_chunk(self, size=None):
if size is None:
size = self.chunksize
Expand Down