diff --git a/_unittests/ut_df/test_connex_split.py b/_unittests/ut_df/test_connex_split.py index baa629c..c75fc01 100644 --- a/_unittests/ut_df/test_connex_split.py +++ b/_unittests/ut_df/test_connex_split.py @@ -110,7 +110,7 @@ def test_split_connex(self): s2 = set(test[col]) if s1 & s2: raise Exception( - 'Non empty intersection {0} & {1}\n{2}\n{3}'.format(s1, s2, train, test)) + f'Non empty intersection {s1} & {s2}\n{train}\n{test}') df['connex'] = 'ole' train, test = train_test_connex_split( # pylint: disable=W0632 @@ -148,7 +148,7 @@ def test_split_connex2(self): if s1 & s2: rows = [] for k, v in sorted(stats[0].items()): - rows.append("{0}={1}".format(k, v)) + rows.append(f"{k}={v}") raise Exception( 'Non empty intersection {0} & {1}\n{2}\n{3}\n{4}'.format(s1, s2, train, test, "\n".join(rows))) @@ -180,7 +180,7 @@ def test_split_connex_missing(self): if s1 & s2: rows = [] for k, v in sorted(stats[0].items()): - rows.append("{0}={1}".format(k, v)) + rows.append(f"{k}={v}") raise Exception( 'Non empty intersection {0} & {1}\n{2}\n{3}\n{4}'.format(s1, s2, train, test, "\n".join(rows))) diff --git a/_unittests/ut_df/test_pandas_groupbynan.py b/_unittests/ut_df/test_pandas_groupbynan.py index 63af8fc..4f32dab 100644 --- a/_unittests/ut_df/test_pandas_groupbynan.py +++ b/_unittests/ut_df/test_pandas_groupbynan.py @@ -31,7 +31,7 @@ def test_pandas_groupbynan(self): try: self.assertIsInstance(li[-1], float) except AssertionError as e: - raise AssertionError("Issue with {0}".format(ty)) from e + raise AssertionError(f"Issue with {ty}") from e try: self.assertTrue(numpy.isnan(li[-1])) except AssertionError as e: diff --git a/pandas_streaming/data/dummy.py b/pandas_streaming/data/dummy.py index c0fea66..0103d1f 100644 --- a/pandas_streaming/data/dummy.py +++ b/pandas_streaming/data/dummy.py @@ -20,10 +20,10 @@ def dummy_streaming_dataframe(n, chunksize=10, asfloat=False, **cols): """ if asfloat: df = DataFrame(dict(cfloat=[_ + 0.1 for _ in range(0, n)], cstr=[ - "s{0}".format(i) for i in range(0, n)])) + f"s{i}" for i in range(0, n)])) else: df = DataFrame(dict(cint=list(range(0, n)), cstr=[ - "s{0}".format(i) for i in range(0, n)])) + f"s{i}" for i in range(0, n)])) for k, v in cols.items(): df[k] = v return StreamingDataFrame.read_df(df, chunksize=chunksize) diff --git a/pandas_streaming/df/connex_split.py b/pandas_streaming/df/connex_split.py index e06cd69..e2b4aca 100644 --- a/pandas_streaming/df/connex_split.py +++ b/pandas_streaming/df/connex_split.py @@ -48,8 +48,7 @@ def train_test_split_weights(df, weights=None, test_size=0.25, train_size=None, if weights is None: if test_size == 0 or train_size == 0: raise ValueError( - "test_size={0} or train_size={1} cannot be null (1)." - "".format(test_size, train_size)) + f"test_size={test_size} or train_size={train_size} cannot be null (1).") return train_test_split(df, test_size=test_size, train_size=train_size, random_state=random_state) @@ -69,8 +68,7 @@ def train_test_split_weights(df, weights=None, test_size=0.25, train_size=None, test_size = 1 - p if p is None or min(test_size, p) <= 0: raise ValueError( - "test_size={0} or train_size={1} cannot be null (2)." - "".format(test_size, train_size)) + f"test_size={test_size} or train_size={train_size} cannot be null (2).") ratio = test_size / p if random_state is None: @@ -330,11 +328,9 @@ def do_connex_components(dfrows, local_groups, kb, sib): grsum = dfids[[name, one]].groupby(name, as_index=False).sum() if fLOG: for g in groups: - fLOG("[train_test_connex_split] #nb in '{0}': {1}".format( - g, len(set(dfids[g])))) + fLOG(f"[train_test_connex_split] #nb in '{g}': {len(set(dfids[g]))}") fLOG( - "[train_test_connex_split] #connex {0}/{1}".format( - grsum.shape[0], dfids.shape[0])) + f"[train_test_connex_split] #connex {grsum.shape[0]}/{dfids.shape[0]}") if grsum.shape[0] <= 1: raise ValueError( # pragma: no cover "Every element is in the same connected components.") @@ -358,10 +354,9 @@ def do_connex_components(dfrows, local_groups, kb, sib): "={2}/{3}".format(k, v, cum, len(elements))) # Most important component - fLOG('[train_test_connex_split] first row of the biggest component ' - '{0}'.format(maxc)) + fLOG(f'[train_test_connex_split] first row of the biggest component {maxc}') tdf = dfids[dfids[name] == maxc[0]] - fLOG('[train_test_connex_split] \n{0}'.format(tdf.head(n=10))) + fLOG(f'[train_test_connex_split] \n{tdf.head(n=10)}') # Splits. train, test = train_test_split_weights( @@ -457,8 +452,7 @@ def train_test_apart_stratify(df, group, test_size=0.25, train_size=None, test_size = 1 - p if p is None or min(test_size, p) <= 0: raise ValueError( # pragma: no cover - "test_size={0} or train_size={1} cannot be null".format( - test_size, train_size)) + f"test_size={test_size} or train_size={train_size} cannot be null") couples = df[[group, stratify]].itertuples(name=None, index=False) hist = Counter(df[stratify]) diff --git a/pandas_streaming/df/dataframe.py b/pandas_streaming/df/dataframe.py index 8294f95..1a19a7d 100644 --- a/pandas_streaming/df/dataframe.py +++ b/pandas_streaming/df/dataframe.py @@ -355,14 +355,13 @@ def read_df(df, chunksize=None, check_schema=True) -> 'StreamingDataFrame': chunksize = df.shape[0] else: raise NotImplementedError( - "Cannot retrieve size to infer chunksize for type={0}" - ".".format(type(df))) + f"Cannot retrieve size to infer chunksize for type={type(df)}.") if hasattr(df, 'shape'): size = df.shape[0] else: raise NotImplementedError( # pragma: no cover - "Cannot retrieve size for type={0}.".format(type(df))) + f"Cannot retrieve size for type={type(df)}.") def local_iterator(): "local iterator" @@ -696,10 +695,10 @@ def iterator_concat(this, lothers): if check: if list(columns) != list(df.columns): raise ValueError( - "Frame others[{0}] do not have the same column names or the same order.".format(i)) + f"Frame others[{i}] do not have the same column names or the same order.") if list(dtypes) != list(df.dtypes): raise ValueError( - "Frame others[{0}] do not have the same column types.".format(i)) + f"Frame others[{i}] do not have the same column types.") check = False yield df @@ -896,7 +895,7 @@ def iterate_streaming(): return StreamingDataFrame(lambda: iterate_streaming(), **self.get_kwargs()) raise ValueError( # pragma: no cover - "Unknown strategy '{0}'".format(strategy)) + f"Unknown strategy '{strategy}'") def ensure_dtype(self, df, dtypes): """ @@ -951,7 +950,7 @@ def __setitem__(self, index, value): """ if not isinstance(index, str): raise ValueError( - "Only column affected are supported but index=%r." % index) + f"Only column affected are supported but index={index!r}.") if isinstance(value, (int, float, numpy.number, str)): # Is is equivalent to add_column. iter_creation = self.iter_creation @@ -1145,7 +1144,7 @@ def sort_values(self, by, axis=0, ascending=True, kind='quicksort', """ if not isinstance(by, str): raise NotImplementedError( - "Only one column can be used to sort not %r." % by) + f"Only one column can be used to sort not {by!r}.") keys = {} nans = [] indices = [] @@ -1224,7 +1223,7 @@ def __init__(self, iter_creation, check_schema=True, stable=True): self, iter_creation, check_schema=check_schema, stable=stable) if len(self.columns) != 1: raise RuntimeError( - "A series can contain only one column not %r." % len(self.columns)) + f"A series can contain only one column not {len(self.columns)!r}.") def apply(self, *args, **kwargs) -> 'StreamingDataFrame': """ diff --git a/pandas_streaming/df/dataframe_helpers.py b/pandas_streaming/df/dataframe_helpers.py index 7591fdf..7fa8399 100644 --- a/pandas_streaming/df/dataframe_helpers.py +++ b/pandas_streaming/df/dataframe_helpers.py @@ -49,7 +49,7 @@ def hash_str(c, hash_length): if isinstance(c, float): if numpy.isnan(c): return c - raise ValueError("numpy.nan expected, not {0}".format(c)) + raise ValueError(f"numpy.nan expected, not {c}") m = hashlib.sha256() m.update(c.encode("utf-8")) r = m.hexdigest() @@ -70,7 +70,7 @@ def hash_int(c, hash_length): if numpy.isnan(c): return c else: - raise ValueError("numpy.nan expected, not {0}".format(c)) + raise ValueError(f"numpy.nan expected, not {c}") else: b = struct.pack("i", c) m = hashlib.sha256() @@ -167,7 +167,7 @@ def hash_floatl(c): df[c] = df[c].apply(hash_strl) else: raise NotImplementedError( - "Conversion of type {0} in column '{1}' is not implemented".format(t, c)) + f"Conversion of type {t} in column '{c}' is not implemented") return df @@ -413,7 +413,7 @@ def pandas_groupby_nan(df, by, axis=0, as_index=False, suffix=None, nanback=True df.columns, df.dtypes)} # pylint: disable=R1721 if typ[by[0]] != do: warnings.warn( # pragma: no cover - "[pandas_groupby_nan] NaN value: {0}".format(rep)) + f"[pandas_groupby_nan] NaN value: {rep}") return res for b in by: fnan = rep[b] @@ -468,7 +468,7 @@ def pandas_groupby_nan(df, by, axis=0, as_index=False, suffix=None, nanback=True for b in by: if typ[b] != do: warnings.warn( # pragma: no cover - "[pandas_groupby_nan] NaN values: {0}".format(rep)) + f"[pandas_groupby_nan] NaN values: {rep}") break return res raise NotImplementedError( diff --git a/pandas_streaming/df/dataframe_io.py b/pandas_streaming/df/dataframe_io.py index 2138178..30d0fb8 100644 --- a/pandas_streaming/df/dataframe_io.py +++ b/pandas_streaming/df/dataframe_io.py @@ -79,14 +79,14 @@ def to_zip(df, zipfilename, zname="df.csv", **kwargs): numpy.save(stb, df, **kwargs) else: raise TypeError( # pragma: no cover - "Type not handled {0}".format(type(df))) + f"Type not handled {type(df)}") text = stb.getvalue() if isinstance(zipfilename, str): ext = os.path.splitext(zipfilename)[-1] if ext != '.zip': raise NotImplementedError( # pragma: no cover - "Only zip file are implemented not '{0}'.".format(ext)) + f"Only zip file are implemented not '{ext}'.") zf = zipfile.ZipFile(zipfilename, 'w') # pylint: disable=R1732 close = True elif isinstance(zipfilename, zipfile.ZipFile): @@ -94,7 +94,7 @@ def to_zip(df, zipfilename, zname="df.csv", **kwargs): close = False else: raise TypeError( # pragma: no cover - "No implementation for type '{0}'".format(type(zipfilename))) + f"No implementation for type '{type(zipfilename)}'") zf.writestr(zname, text) if close: @@ -115,7 +115,7 @@ def read_zip(zipfilename, zname=None, **kwargs): ext = os.path.splitext(zipfilename)[-1] if ext != '.zip': raise NotImplementedError( # pragma: no cover - "Only zip files are supported not '{0}'.".format(ext)) + f"Only zip files are supported not '{ext}'.") zf = zipfile.ZipFile(zipfilename, 'r') # pylint: disable=R1732 close = True elif isinstance(zipfilename, zipfile.ZipFile): @@ -123,7 +123,7 @@ def read_zip(zipfilename, zname=None, **kwargs): close = False else: raise TypeError( # pragma: no cover - "No implementation for type '{0}'".format(type(zipfilename))) + f"No implementation for type '{type(zipfilename)}'") if zname is None: zname = zf.namelist()[0] diff --git a/pandas_streaming/df/dataframe_io_helpers.py b/pandas_streaming/df/dataframe_io_helpers.py index b778571..2bd761a 100644 --- a/pandas_streaming/df/dataframe_io_helpers.py +++ b/pandas_streaming/df/dataframe_io_helpers.py @@ -131,11 +131,11 @@ def _flatten(obj, key): if not isinstance(k, str): raise TypeError( "All keys must a string.") # pragma: no cover - k2 = k if key is None else "{0}{1}{2}".format(key, sep, k) + k2 = k if key is None else f"{key}{sep}{k}" _flatten(v, k2) elif isinstance(obj, (list, set)): for index, item in enumerate(obj): - k2 = k if key is None else "{0}{1}{2}".format(key, sep, index) + k2 = k if key is None else f"{key}{sep}{index}" _flatten(item, k2) else: flattened_dict[key] = obj @@ -266,15 +266,14 @@ def enumerate_json_items(filename, encoding=None, lines=False, flatten=False, fL for i, (_, event, value) in enumerate(parser): if i % 1000000 == 0 and fLOG is not None: fLOG( # pragma: no cover - "[enumerate_json_items] i={0} yielded={1}" - "".format(i, nbyield)) + f"[enumerate_json_items] i={i} yielded={nbyield}") if event == "start_array": if curkey is None: current = [] else: if not isinstance(current, dict): raise RuntimeError( # pragma: no cover - "Type issue {0}".format(type(current))) + f"Type issue {type(current)}") c = [] current[curkey] = c # pylint: disable=E1137 current = c @@ -324,8 +323,7 @@ def enumerate_json_items(filename, encoding=None, lines=False, flatten=False, fL current[curkey] = None # pylint: disable=E1137 curkey = None else: - raise ValueError("Unknown event '{0}'".format( - event)) # pragma: no cover + raise ValueError(f"Unknown event '{event}'") # pragma: no cover class JsonIterator2Stream: diff --git a/pandas_streaming/df/dataframe_split.py b/pandas_streaming/df/dataframe_split.py index 2ed9b9a..8d2057f 100644 --- a/pandas_streaming/df/dataframe_split.py +++ b/pandas_streaming/df/dataframe_split.py @@ -255,7 +255,7 @@ def iterator_internal(part_requested): part = cache.get(h) if part is None: raise ValueError( # pragma: no cover - "Second iteration. A row was never met in the first one\n{0}".format(obs)) + f"Second iteration. A row was never met in the first one\n{obs}") if part == part_requested: accumul.append(obs) if len(accumul) >= static_schema[2]: diff --git a/pandas_streaming/exc/exc_streaming.py b/pandas_streaming/exc/exc_streaming.py index dfe0bba..c7094e5 100644 --- a/pandas_streaming/exc/exc_streaming.py +++ b/pandas_streaming/exc/exc_streaming.py @@ -19,4 +19,4 @@ def __init__(self, meth): :param meth: inefficient method """ Exception.__init__( - self, "{0} should not be done in streaming mode.".format(meth)) + self, f"{meth} should not be done in streaming mode.") diff --git a/setup.py b/setup.py index 9952779..e8a706a 100644 --- a/setup.py +++ b/setup.py @@ -10,7 +10,7 @@ ######### project_var_name = "pandas_streaming" -versionPython = "%s.%s" % (sys.version_info.major, sys.version_info.minor) +versionPython = f"{sys.version_info.major}.{sys.version_info.minor}" path = "Lib/site-packages/" + project_var_name readme = 'README.rst' history = "HISTORY.rst"