CLN: Use generators instead of lists in built-in Python functions (#1…

…8276)
pandas-dev · Nov 15, 2017 · 9c799e2 · 9c799e2
1 parent fa4ebd4
commit 9c799e2
Show file tree

Hide file tree

Showing 20 changed files with 73 additions and 74 deletions.
diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
@@ -1484,7 +1484,7 @@ def get_blkno_indexers(int64_t[:] blknos, bint group=True):
             if len(slices) == 1:
                 yield blkno, slice(slices[0][0], slices[0][1])
             else:
-                tot_len = sum([stop - start for start, stop in slices])
+                tot_len = sum(stop - start for start, stop in slices)
                 result = np.empty(tot_len, dtype=np.int64)
                 res_view = result
 

diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py
@@ -266,7 +266,7 @@ def east_asian_len(data, encoding=None, ambiguous_width=1):
         Calculate display width considering unicode East Asian Width
         """
         if isinstance(data, text_type):
-            return sum([_EAW_MAP.get(east_asian_width(c), ambiguous_width) for c in data])
+            return sum(_EAW_MAP.get(east_asian_width(c), ambiguous_width) for c in data)
         else:
             return len(data)
 
@@ -318,7 +318,7 @@ def east_asian_len(data, encoding=None, ambiguous_width=1):
                 data = data.decode(encoding)
             except UnicodeError:
                 pass
-            return sum([_EAW_MAP.get(east_asian_width(c), ambiguous_width) for c in data])
+            return sum(_EAW_MAP.get(east_asian_width(c), ambiguous_width) for c in data)
         else:
             return len(data)
 

diff --git a/pandas/core/base.py b/pandas/core/base.py
@@ -486,13 +486,13 @@ def _agg(arg, func):
 
             def is_any_series():
                 # return a boolean if we have *any* nested series
-                return any([isinstance(r, ABCSeries)
-                            for r in compat.itervalues(result)])
+                return any(isinstance(r, ABCSeries)
+                           for r in compat.itervalues(result))
 
             def is_any_frame():
                 # return a boolean if we have *any* nested series
-                return any([isinstance(r, ABCDataFrame)
-                            for r in compat.itervalues(result)])
+                return any(isinstance(r, ABCDataFrame)
+                           for r in compat.itervalues(result))
 
             if isinstance(result, list):
                 return concat(result, keys=keys, axis=1), True

diff --git a/pandas/core/config.py b/pandas/core/config.py
@@ -800,7 +800,7 @@ def inner(x):
         from pandas.io.formats.printing import pprint_thing as pp
         if x not in legal_values:
 
-            if not any([c(x) for c in callables]):
+            if not any(c(x) for c in callables):
                 pp_values = pp("|".join(lmap(pp, legal_values)))
                 msg = "Value must be one of {pp_values}"
                 if len(callables):

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -601,7 +601,7 @@ def _repr_fits_horizontal_(self, ignore_width=False):
 
         d.to_string(buf=buf)
         value = buf.getvalue()
-        repr_width = max([len(l) for l in value.split('\n')])
+        repr_width = max(len(l) for l in value.split('\n'))
 
         return repr_width < width
 
@@ -1798,7 +1798,7 @@ def info(self, verbose=None, buf=None, max_cols=None, memory_usage=None,
         def _verbose_repr():
             lines.append('Data columns (total %d columns):' %
                          len(self.columns))
-            space = max([len(pprint_thing(k)) for k in self.columns]) + 4
+            space = max(len(pprint_thing(k)) for k in self.columns) + 4
             counts = None
 
             tmpl = "%s%s"
@@ -6424,7 +6424,7 @@ def convert(arr):
 
 
 def _get_names_from_index(data):
-    has_some_name = any([getattr(s, 'name', None) is not None for s in data])
+    has_some_name = any(getattr(s, 'name', None) is not None for s in data)
     if not has_some_name:
         return _default_index(len(data))
 

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -1006,8 +1006,8 @@ def _set_axis_name(self, name, axis=0, inplace=False):
     # Comparisons
 
     def _indexed_same(self, other):
-        return all([self._get_axis(a).equals(other._get_axis(a))
-                    for a in self._AXIS_ORDERS])
+        return all(self._get_axis(a).equals(other._get_axis(a))
+                   for a in self._AXIS_ORDERS)
 
     def __neg__(self):
         values = _values_from_object(self)
@@ -2989,8 +2989,8 @@ def reindex(self, *args, **kwargs):
 
         # if all axes that are requested to reindex are equal, then only copy
         # if indicated must have index names equal here as well as values
-        if all([self._get_axis(axis).identical(ax)
-                for axis, ax in axes.items() if ax is not None]):
+        if all(self._get_axis(axis).identical(ax)
+               for axis, ax in axes.items() if ax is not None):
             if copy:
                 return self.copy()
             return self
@@ -5886,8 +5886,8 @@ def _where(self, cond, other=np.nan, inplace=False, axis=None, level=None,
 
                 # if we are NOT aligned, raise as we cannot where index
                 if (axis is None and
-                        not all([other._get_axis(i).equals(ax)
-                                 for i, ax in enumerate(self.axes)])):
+                        not all(other._get_axis(i).equals(ax)
+                                for i, ax in enumerate(self.axes))):
                     raise InvalidIndexError
 
             # slice me out of the other

diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
@@ -3890,8 +3890,7 @@ def first_not_none(values):
             # values are not series or array-like but scalars
             else:
                 # only coerce dates if we find at least 1 datetime
-                coerce = True if any([isinstance(x, Timestamp)
-                                      for x in values]) else False
+                coerce = any(isinstance(x, Timestamp) for x in values)
                 # self._selection_name not passed through to Series as the
                 # result should not take the name of original selection
                 # of columns
@@ -4303,8 +4302,8 @@ def _reindex_output(self, result):
             return result
         elif len(groupings) == 1:
             return result
-        elif not any([isinstance(ping.grouper, (Categorical, CategoricalIndex))
-                      for ping in groupings]):
+        elif not any(isinstance(ping.grouper, (Categorical, CategoricalIndex))
+                     for ping in groupings):
             return result
 
         levels_list = [ping.group_index for ping in groupings]

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
@@ -907,7 +907,7 @@ def _extend_line(s, line, value, display_width, next_line_prefix):
 
         def best_len(values):
             if values:
-                return max([adj.len(x) for x in values])
+                return max(adj.len(x) for x in values)
             else:
                 return 0
 
@@ -4246,7 +4246,7 @@ def _trim_front(strings):
     Trims zeros and decimal points
     """
     trimmed = strings
-    while len(strings) > 0 and all([x[0] == ' ' for x in trimmed]):
+    while len(strings) > 0 and all(x[0] == ' ' for x in trimmed):
         trimmed = [x[1:] for x in trimmed]
     return trimmed
 

diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
@@ -461,7 +461,7 @@ def _is_memory_usage_qualified(self):
         """ return a boolean if we need a qualified .info display """
         def f(l):
             return 'mixed' in l or 'string' in l or 'unicode' in l
-        return any([f(l) for l in self._inferred_type_levels])
+        return any(f(l) for l in self._inferred_type_levels)
 
     @Appender(Index.memory_usage.__doc__)
     def memory_usage(self, deep=False):
@@ -489,9 +489,9 @@ def _nbytes(self, deep=False):
         # for implementations with no useful getsizeof (PyPy)
         objsize = 24
 
-        level_nbytes = sum((i.memory_usage(deep=deep) for i in self.levels))
-        label_nbytes = sum((i.nbytes for i in self.labels))
-        names_nbytes = sum((getsizeof(i, objsize) for i in self.names))
+        level_nbytes = sum(i.memory_usage(deep=deep) for i in self.levels)
+        label_nbytes = sum(i.nbytes for i in self.labels)
+        names_nbytes = sum(getsizeof(i, objsize) for i in self.names)
         result = level_nbytes + label_nbytes + names_nbytes
 
         # include our engine hashtable
@@ -2214,12 +2214,12 @@ def partial_selection(key, indexer=None):
                         # here we have a completely specified key, but are
                         # using some partial string matching here
                         # GH4758
-                        all_dates = [(l.is_all_dates and
+                        all_dates = ((l.is_all_dates and
                                       not isinstance(k, compat.string_types))
-                                     for k, l in zip(key, self.levels)]
+                                     for k, l in zip(key, self.levels))
                         can_index_exactly = any(all_dates)
-                        if (any([l.is_all_dates
-                                 for k, l in zip(key, self.levels)]) and
+                        if (any(l.is_all_dates
+                                for k, l in zip(key, self.levels)) and
                                 not can_index_exactly):
                             indexer = self.get_loc(key)
 

diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py
@@ -193,8 +193,8 @@ def nbytes(self):
         On implementations where this is undetermined (PyPy)
         assume 24 bytes for each value
         """
-        return sum([getsizeof(getattr(self, v), 24) for v in
-                    ['_start', '_stop', '_step']])
+        return sum(getsizeof(getattr(self, v), 24) for v in
+                   ['_start', '_stop', '_step'])
 
     def memory_usage(self, deep=False):
         """
@@ -613,8 +613,8 @@ def _evaluate_numeric_binop(self, other):
                     # for compat with numpy / Int64Index
                     # even if we can represent as a RangeIndex, return
                     # as a Float64Index if we have float-like descriptors
-                    if not all([is_integer(x) for x in
-                                [rstart, rstop, rstep]]):
+                    if not all(is_integer(x) for x in
+                               [rstart, rstop, rstep]):
                         result = result.astype('float64')
 
                     return result

diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
@@ -217,8 +217,8 @@ def _should_validate_iterable(self, axis=None):
         return True
 
     def _is_nested_tuple_indexer(self, tup):
-        if any([isinstance(ax, MultiIndex) for ax in self.obj.axes]):
-            return any([is_nested_tuple(tup, ax) for ax in self.obj.axes])
+        if any(isinstance(ax, MultiIndex) for ax in self.obj.axes):
+            return any(is_nested_tuple(tup, ax) for ax in self.obj.axes)
         return False
 
     def _convert_tuple(self, key, is_setter=False):
@@ -342,7 +342,7 @@ def _setitem_with_indexer(self, indexer, value):
                             len(_ax) for _i, _ax in enumerate(self.obj.axes)
                             if _i != i
                         ]
-                        if any([not l for l in len_non_info_axes]):
+                        if any(not l for l in len_non_info_axes):
                             if not is_list_like_indexer(value):
                                 raise ValueError("cannot set a frame with no "
                                                  "defined index and a scalar")
@@ -690,7 +690,7 @@ def _align_series(self, indexer, ser, multiindex_indexer=False):
             # we have a frame, with multiple indexers on both axes; and a
             # series, so need to broadcast (see GH5206)
             if (sum_aligners == self.ndim and
-                    all([is_sequence(_) for _ in indexer])):
+                    all(is_sequence(_) for _ in indexer)):
                 ser = ser.reindex(obj.axes[0][indexer[0]], copy=True)._values
 
                 # single indexer

diff --git a/pandas/core/internals.py b/pandas/core/internals.py
@@ -3539,13 +3539,13 @@ def is_mixed_type(self):
     def is_numeric_mixed_type(self):
         # Warning, consolidation needs to get checked upstairs
         self._consolidate_inplace()
-        return all([block.is_numeric for block in self.blocks])
+        return all(block.is_numeric for block in self.blocks)
 
     @property
     def is_datelike_mixed_type(self):
         # Warning, consolidation needs to get checked upstairs
         self._consolidate_inplace()
-        return any([block.is_datelike for block in self.blocks])
+        return any(block.is_datelike for block in self.blocks)
 
     @property
     def is_view(self):
@@ -4574,7 +4574,7 @@ def concat(self, to_concat, new_axis):
         if len(non_empties) > 0:
             blocks = [obj.blocks[0] for obj in non_empties]
 
-            if all([type(b) is type(blocks[0]) for b in blocks[1:]]):  # noqa
+            if all(type(b) is type(blocks[0]) for b in blocks[1:]):  # noqa
                 new_block = blocks[0].concat_same_type(blocks)
             else:
                 values = [x.values for x in blocks]
@@ -5211,13 +5211,13 @@ def is_uniform_join_units(join_units):
     """
     return (
         # all blocks need to have the same type
-        all([type(ju.block) is type(join_units[0].block) for ju in join_units]) and  # noqa
+        all(type(ju.block) is type(join_units[0].block) for ju in join_units) and  # noqa
         # no blocks that would get missing values (can lead to type upcasts)
-        all([not ju.is_na for ju in join_units]) and
+        all(not ju.is_na for ju in join_units) and
         # no blocks with indexers (as then the dimensions do not fit)
-        all([not ju.indexers for ju in join_units]) and
+        all(not ju.indexers for ju in join_units) and
         # disregard Panels
-        all([ju.block.ndim <= 2 for ju in join_units]) and
+        all(ju.block.ndim <= 2 for ju in join_units) and
         # only use this path when there is something to concatenate
         len(join_units) > 1)
 

diff --git a/pandas/core/sparse/frame.py b/pandas/core/sparse/frame.py
@@ -347,8 +347,8 @@ def density(self):
         Ratio of non-sparse points to total (dense) data points
         represented in the frame
         """
-        tot_nonsparse = sum([ser.sp_index.npoints
-                             for _, ser in compat.iteritems(self)])
+        tot_nonsparse = sum(ser.sp_index.npoints
+                            for _, ser in compat.iteritems(self))
         tot = len(self.index) * len(self.columns)
         return tot_nonsparse / float(tot)
 

diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py
@@ -546,7 +546,7 @@ def _to_str_columns(self):
                                                minimum=header_colwidth,
                                                adj=self.adj)
 
-                max_len = max(np.max([self.adj.len(x) for x in fmt_values]),
+                max_len = max(max(self.adj.len(x) for x in fmt_values),
                               header_colwidth)
                 cheader = self.adj.justify(cheader, max_len, mode=self.justify)
                 stringified.append(cheader + fmt_values)
@@ -761,7 +761,7 @@ def _get_formatted_column_labels(self, frame):
             dtypes = self.frame.dtypes._values
 
             # if we have a Float level, they don't use leading space at all
-            restrict_formatting = any([l.is_floating for l in columns.levels])
+            restrict_formatting = any(l.is_floating for l in columns.levels)
             need_leadsp = dict(zip(fmt_columns, map(is_numeric_dtype, dtypes)))
 
             def space_format(x, y):
@@ -2302,7 +2302,7 @@ def _make_fixed_width(strings, justify='right', minimum=None, adj=None):
     if adj is None:
         adj = _get_adjustment()
 
-    max_len = np.max([adj.len(x) for x in strings])
+    max_len = max(adj.len(x) for x in strings)
 
     if minimum is not None:
         max_len = max(minimum, max_len)
@@ -2330,8 +2330,8 @@ def _trim_zeros(str_floats, na_rep='NaN'):
 
     def _cond(values):
         non_na = [x for x in values if x != na_rep]
-        return (len(non_na) > 0 and all([x.endswith('0') for x in non_na]) and
-                not (any([('e' in x) or ('E' in x) for x in non_na])))
+        return (len(non_na) > 0 and all(x.endswith('0') for x in non_na) and
+                not (any(('e' in x) or ('E' in x) for x in non_na)))
 
     while _cond(trimmed):
         trimmed = [x[:-1] if x != na_rep else x for x in trimmed]

diff --git a/pandas/io/html.py b/pandas/io/html.py
@@ -359,7 +359,7 @@ def _parse_raw_thead(self, table):
             trs = self._parse_tr(thead[0])
             for tr in trs:
                 cols = lmap(self._text_getter, self._parse_td(tr))
-                if any([col != '' for col in cols]):
+                if any(col != '' for col in cols):
                     res.append(cols)
         return res
 
@@ -606,7 +606,7 @@ def _parse_raw_thead(self, table):
             for tr in trs:
                 cols = [_remove_whitespace(x.text_content()) for x in
                         self._parse_td(tr)]
-                if any([col != '' for col in cols]):
+                if any(col != '' for col in cols):
                     res.append(cols)
         return res
 

diff --git a/pandas/io/json/normalize.py b/pandas/io/json/normalize.py
@@ -189,7 +189,7 @@ def _pull_field(js, spec):
         data = [data]
 
     if record_path is None:
-        if any([isinstance(x, dict) for x in compat.itervalues(data[0])]):
+        if any(isinstance(x, dict) for x in compat.itervalues(data[0])):
             # naive normalization, this is idempotent for flat records
             # and potentially will inflate the data considerably for
             # deeply nested structures: