diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index da3db1c18e534..811836d0e8a4d 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -671,7 +671,7 @@ def _transform_item_by_item(self, obj, wrapper): except Exception: pass - if len(output) == 0: # pragma: no cover + if len(output) == 0: raise TypeError("Transform function invalid for data types") columns = obj.columns diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 15b94e59c065c..12b9cf25687cf 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1206,7 +1206,7 @@ def mean(self, *args, **kwargs): ) except GroupByError: raise - except Exception: # pragma: no cover + except Exception: with _group_selection_context(self): f = lambda x: x.mean(axis=self.axis, **kwargs) return self._python_agg_general(f) @@ -1232,7 +1232,7 @@ def median(self, **kwargs): ) except GroupByError: raise - except Exception: # pragma: no cover + except Exception: def f(x): if isinstance(x, np.ndarray): @@ -2470,7 +2470,7 @@ def groupby(obj, by, **kwds): from pandas.core.groupby.generic import DataFrameGroupBy klass = DataFrameGroupBy - else: # pragma: no cover + else: raise TypeError("invalid type: {}".format(obj)) return klass(obj, by, **kwds) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 4a8ee8fa2c5f4..6d70fcfb62d52 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -760,7 +760,7 @@ def to_native_types(self, slicer=None, na_rep="nan", quoting=None, **kwargs): values[mask] = na_rep return values - # block actions #### + # block actions # def copy(self, deep=True): """ copy constructor """ values = self.values @@ -1538,16 +1538,14 @@ def quantile(self, qs, interpolation="linear", axis=0): ).reshape(len(values), len(qs)) else: # asarray needed for Sparse, see GH#24600 - # Note: we use self.values below instead of values because the - # `asi8` conversion above will behave differently under `isna` - mask = np.asarray(isna(self.values)) + mask = np.asarray(isna(values)) result = nanpercentile( values, np.array(qs) * 100, axis=axis, na_value=self.fill_value, mask=mask, - ndim=self.ndim, + ndim=values.ndim, interpolation=interpolation, ) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 344d41ed26943..8956821740bf3 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -975,8 +975,6 @@ def iget(self, i): """ block = self.blocks[self._blknos[i]] values = block.iget(self._blklocs[i]) - if values.ndim != 1: - return values # shortcut for select a single-dim from a 2-dim BM return SingleBlockManager( diff --git a/pandas/io/clipboards.py b/pandas/io/clipboards.py index 0006824f09fe7..d38221d784273 100644 --- a/pandas/io/clipboards.py +++ b/pandas/io/clipboards.py @@ -121,7 +121,7 @@ def to_clipboard(obj, excel=True, sep=None, **kwargs): # pragma: no cover return except TypeError: warnings.warn( - "to_clipboard in excel mode requires a single " "character separator." + "to_clipboard in excel mode requires a single character separator." ) elif sep is not None: warnings.warn("to_clipboard with excel=False ignores the sep argument") diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index 7afc234446a71..154656fbb250b 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -297,7 +297,7 @@ def read_excel( for arg in ("sheet", "sheetname", "parse_cols"): if arg in kwds: raise TypeError( - "read_excel() got an unexpected keyword argument " "`{}`".format(arg) + "read_excel() got an unexpected keyword argument `{}`".format(arg) ) if not isinstance(io, ExcelFile): @@ -353,7 +353,7 @@ def __init__(self, filepath_or_buffer): self.book = self.load_workbook(filepath_or_buffer) else: raise ValueError( - "Must explicitly set engine if not passing in" " buffer or path for io." + "Must explicitly set engine if not passing in buffer or path for io." ) @property @@ -713,9 +713,7 @@ def _get_sheet_name(self, sheet_name): if sheet_name is None: sheet_name = self.cur_sheet if sheet_name is None: # pragma: no cover - raise ValueError( - "Must pass explicit sheet_name or set " "cur_sheet property" - ) + raise ValueError("Must pass explicit sheet_name or set cur_sheet property") return sheet_name def _value_with_fmt(self, val): @@ -851,7 +849,7 @@ def parse( """ if "chunksize" in kwds: raise NotImplementedError( - "chunksize keyword of read_excel " "is not implemented" + "chunksize keyword of read_excel is not implemented" ) return self._reader.parse( diff --git a/pandas/io/feather_format.py b/pandas/io/feather_format.py index 35a62b627823a..6fe22f14c2c5b 100644 --- a/pandas/io/feather_format.py +++ b/pandas/io/feather_format.py @@ -53,7 +53,7 @@ def to_feather(df, path): if df.index.name is not None: raise ValueError( - "feather does not serialize index meta-data on a " "default index" + "feather does not serialize index meta-data on a default index" ) # validate columns diff --git a/pandas/io/formats/csvs.py b/pandas/io/formats/csvs.py index d86bf432b83c4..60daf311397e8 100644 --- a/pandas/io/formats/csvs.py +++ b/pandas/io/formats/csvs.py @@ -96,9 +96,7 @@ def __init__( # validate mi options if self.has_mi_columns: if cols is not None: - raise TypeError( - "cannot specify cols with a MultiIndex on the " "columns" - ) + raise TypeError("cannot specify cols with a MultiIndex on the columns") if cols is not None: if isinstance(cols, ABCIndexClass): @@ -158,7 +156,7 @@ def save(self): """ # GH21227 internal compression is not used when file-like passed. if self.compression and hasattr(self.path_or_buf, "write"): - msg = "compression has no effect when passing file-like " "object as input." + msg = "compression has no effect when passing file-like object as input." warnings.warn(msg, RuntimeWarning, stacklevel=2) # when zip compression is called. diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 6e4894bdb0f56..980fc4888d625 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -2,9 +2,10 @@ Internal module for formatting output data in csv, html, and latex files. This module also applies to display formatting. """ - +import decimal from functools import partial from io import StringIO +import math import re from shutil import get_terminal_size from typing import ( @@ -862,7 +863,7 @@ def to_latex( with codecs.open(self.buf, "w", encoding=encoding) as f: latex_renderer.write_result(f) else: - raise TypeError("buf is not a file name and it has no write " "method") + raise TypeError("buf is not a file name and it has no write method") def _format_col(self, i: int) -> List[str]: frame = self.tr_frame @@ -907,7 +908,7 @@ def to_html( with open(self.buf, "w") as f: buffer_put_lines(f, html) else: - raise TypeError("buf is not a file name and it has no write " " method") + raise TypeError("buf is not a file name and it has no write method") def _get_formatted_column_labels(self, frame: "DataFrame") -> List[List[str]]: from pandas.core.index import _sparsify @@ -1782,9 +1783,6 @@ def __call__(self, num: Union[int, float]) -> str: @return: engineering formatted string """ - import decimal - import math - dnum = decimal.Decimal(str(num)) if decimal.Decimal.is_nan(dnum): diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 3e5b200c4643b..f4b00b0aac5f7 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -687,7 +687,7 @@ def parser_f( read_csv = Appender( _doc_read_csv_and_table.format( func_name="read_csv", - summary=("Read a comma-separated values (csv) file " "into DataFrame."), + summary=("Read a comma-separated values (csv) file into DataFrame."), _default_sep="','", ) )(read_csv) @@ -770,7 +770,7 @@ def read_fwf( if colspecs is None and widths is None: raise ValueError("Must specify either colspecs or widths") elif colspecs not in (None, "infer") and widths is not None: - raise ValueError("You must specify only one of 'widths' and " "'colspecs'") + raise ValueError("You must specify only one of 'widths' and 'colspecs'") # Compute 'colspecs' from 'widths', if specified. if widths is not None: @@ -901,9 +901,7 @@ def _get_options_with_defaults(self, engine): # see gh-12935 if argname == "mangle_dupe_cols" and not value: - raise ValueError( - "Setting mangle_dupe_cols=False is " "not supported yet" - ) + raise ValueError("Setting mangle_dupe_cols=False is not supported yet") else: options[argname] = value @@ -942,7 +940,7 @@ def _check_file_or_buffer(self, f, engine): # needs to have that attribute ("next" for Python 2.x, "__next__" # for Python 3.x) if engine != "c" and not hasattr(f, next_attr): - msg = "The 'python' engine cannot iterate " "through this file buffer." + msg = "The 'python' engine cannot iterate through this file buffer." raise ValueError(msg) return engine @@ -959,7 +957,7 @@ def _clean_options(self, options, engine): # C engine not supported yet if engine == "c": if options["skipfooter"] > 0: - fallback_reason = "the 'c' engine does not support" " skipfooter" + fallback_reason = "the 'c' engine does not support skipfooter" engine = "python" encoding = sys.getfilesystemencoding() or "utf-8" @@ -1397,11 +1395,11 @@ def __init__(self, kwds): raise ValueError("header must be integer or list of integers") if kwds.get("usecols"): raise ValueError( - "cannot specify usecols when " "specifying a multi-index header" + "cannot specify usecols when specifying a multi-index header" ) if kwds.get("names"): raise ValueError( - "cannot specify names when " "specifying a multi-index header" + "cannot specify names when specifying a multi-index header" ) # validate index_col that only contains integers @@ -1611,7 +1609,7 @@ def _get_name(icol): if col_names is None: raise ValueError( - ("Must supply column order to use {icol!s} " "as index").format( + ("Must supply column order to use {icol!s} as index").format( icol=icol ) ) @@ -2379,7 +2377,7 @@ def _make_reader(self, f): if sep is None or len(sep) == 1: if self.lineterminator: raise ValueError( - "Custom line terminators not supported in " "python parser (yet)" + "Custom line terminators not supported in python parser (yet)" ) class MyDialect(csv.Dialect): @@ -2662,7 +2660,7 @@ def _infer_columns(self): "number of header fields in the file" ) if len(columns) > 1: - raise TypeError("Cannot pass names with multi-index " "columns") + raise TypeError("Cannot pass names with multi-index columns") if self.usecols is not None: # Set _use_cols. We don't store columns because they are @@ -2727,7 +2725,7 @@ def _handle_usecols(self, columns, usecols_key): elif any(isinstance(u, str) for u in self.usecols): if len(columns) > 1: raise ValueError( - "If using multiple headers, usecols must " "be integers." + "If using multiple headers, usecols must be integers." ) col_indices = [] diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index da9264557931d..415cb50472a4c 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -366,7 +366,7 @@ def read_hdf(path_or_buf, key=None, mode="r", **kwargs): path_or_buf = _stringify_path(path_or_buf) if not isinstance(path_or_buf, str): raise NotImplementedError( - "Support for generic buffers has not " "been implemented." + "Support for generic buffers has not been implemented." ) try: exists = os.path.exists(path_or_buf) @@ -1047,7 +1047,7 @@ def append( """ if columns is not None: raise TypeError( - "columns is not a supported keyword in append, " "try data_columns" + "columns is not a supported keyword in append, try data_columns" ) if dropna is None: @@ -2161,7 +2161,7 @@ def set_atom( # which is an error raise TypeError( - "too many timezones in this block, create separate " "data columns" + "too many timezones in this block, create separate data columns" ) elif inferred_type == "unicode": raise TypeError("[unicode] is not implemented as a table column") @@ -2338,9 +2338,7 @@ def validate_attr(self, append): if append: existing_fields = getattr(self.attrs, self.kind_attr, None) if existing_fields is not None and existing_fields != list(self.values): - raise ValueError( - "appended items do not match existing items" " in table!" - ) + raise ValueError("appended items do not match existing items in table!") existing_dtype = getattr(self.attrs, self.dtype_attr, None) if existing_dtype is not None and existing_dtype != self.dtype: @@ -2834,7 +2832,7 @@ def write_multi_index(self, key, index): # write the level if is_extension_type(lev): raise NotImplementedError( - "Saving a MultiIndex with an " "extension dtype is not supported." + "Saving a MultiIndex with an extension dtype is not supported." ) level_key = "{key}_level{idx}".format(key=key, idx=i) conv_level = _convert_index( @@ -3079,7 +3077,7 @@ def validate_read(self, kwargs): kwargs = super().validate_read(kwargs) if "start" in kwargs or "stop" in kwargs: raise NotImplementedError( - "start and/or stop are not supported " "in fixed Sparse reading" + "start and/or stop are not supported in fixed Sparse reading" ) return kwargs @@ -3376,7 +3374,7 @@ def validate_multiindex(self, obj): return obj.reset_index(), levels except ValueError: raise ValueError( - "duplicate names/columns in the multi-index when " "storing as a table" + "duplicate names/columns in the multi-index when storing as a table" ) @property @@ -4081,7 +4079,7 @@ def read_column(self, column, where=None, start=None, stop=None): return False if where is not None: - raise TypeError("read_column does not currently accept a where " "clause") + raise TypeError("read_column does not currently accept a where clause") # find the axes for a in self.axes: @@ -4990,7 +4988,7 @@ def __init__(self, table, where=None, start=None, stop=None): self.stop is not None and (where >= self.stop).any() ): raise ValueError( - "where must have index locations >= start and " "< stop" + "where must have index locations >= start and < stop" ) self.coordinates = where diff --git a/pandas/io/sas/sas_xport.py b/pandas/io/sas/sas_xport.py index 34b93d72d0e29..ea26a9b8efdbf 100644 --- a/pandas/io/sas/sas_xport.py +++ b/pandas/io/sas/sas_xport.py @@ -26,7 +26,7 @@ "000000000000000000000000000000 " ) _correct_header1 = ( - "HEADER RECORD*******MEMBER HEADER RECORD!!!!!!!" "000000000000000001600000000" + "HEADER RECORD*******MEMBER HEADER RECORD!!!!!!!000000000000000001600000000" ) _correct_header2 = ( "HEADER RECORD*******DSCRPTR HEADER RECORD!!!!!!!" diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 6fe34e4e9705a..f1f52a9198d29 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -233,7 +233,7 @@ def read_sql_table( con = _engine_builder(con) if not _is_sqlalchemy_connectable(con): raise NotImplementedError( - "read_sql_table only supported for " "SQLAlchemy connectable." + "read_sql_table only supported for SQLAlchemy connectable." ) import sqlalchemy from sqlalchemy.schema import MetaData @@ -503,7 +503,7 @@ def to_sql( frame = frame.to_frame() elif not isinstance(frame, DataFrame): raise NotImplementedError( - "'frame' argument should be either a " "Series or a DataFrame" + "'frame' argument should be either a Series or a DataFrame" ) pandas_sql.to_sql( @@ -1756,7 +1756,7 @@ def has_table(self, name, schema=None): wld = "?" query = ( - "SELECT name FROM sqlite_master " "WHERE type='table' AND name={wld};" + "SELECT name FROM sqlite_master WHERE type='table' AND name={wld};" ).format(wld=wld) return len(self.execute(query, [name]).fetchall()) > 0 diff --git a/pandas/io/stata.py b/pandas/io/stata.py index 32122a9daa1db..69bafc7749258 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -367,7 +367,7 @@ def convert_delta_safe(base, deltas, unit): conv_dates = convert_delta_safe(base, ms, "ms") elif fmt.startswith(("%tC", "tC")): - warnings.warn("Encountered %tC format. Leaving in Stata " "Internal Format.") + warnings.warn("Encountered %tC format. Leaving in Stata Internal Format.") conv_dates = Series(dates, dtype=np.object) if has_bad_values: conv_dates[bad_locs] = NaT @@ -856,7 +856,7 @@ def __init__(self, value): string = property( lambda self: self._str, - doc="The Stata representation of the missing value: " "'.', '.a'..'.z'", + doc="The Stata representation of the missing value: '.', '.a'..'.z'", ) value = property( lambda self: self._value, doc="The binary representation of the missing value." @@ -1959,7 +1959,7 @@ def _maybe_convert_to_int_keys(convert_dates, varlist): new_dict.update({varlist.index(key): convert_dates[key]}) else: if not isinstance(key, int): - raise ValueError("convert_dates key must be a " "column or an integer") + raise ValueError("convert_dates key must be a column or an integer") new_dict.update({key: convert_dates[key]}) return new_dict @@ -2533,9 +2533,7 @@ def _write_variable_labels(self): if col in self._variable_labels: label = self._variable_labels[col] if len(label) > 80: - raise ValueError( - "Variable labels must be 80 characters " "or fewer" - ) + raise ValueError("Variable labels must be 80 characters or fewer") is_latin1 = all(ord(c) < 256 for c in label) if not is_latin1: raise ValueError( @@ -3093,9 +3091,7 @@ def _write_variable_labels(self): if col in self._variable_labels: label = self._variable_labels[col] if len(label) > 80: - raise ValueError( - "Variable labels must be 80 characters " "or fewer" - ) + raise ValueError("Variable labels must be 80 characters or fewer") is_latin1 = all(ord(c) < 256 for c in label) if not is_latin1: raise ValueError(