Skip to content

Commit

Permalink
Uses f strings (#29)
Browse files Browse the repository at this point in the history
  • Loading branch information
sdpython committed Jul 19, 2022
1 parent 5cd722d commit 738a2e5
Show file tree
Hide file tree
Showing 11 changed files with 39 additions and 48 deletions.
6 changes: 3 additions & 3 deletions _unittests/ut_df/test_connex_split.py
Expand Up @@ -110,7 +110,7 @@ def test_split_connex(self):
s2 = set(test[col])
if s1 & s2:
raise Exception(
'Non empty intersection {0} & {1}\n{2}\n{3}'.format(s1, s2, train, test))
f'Non empty intersection {s1} & {s2}\n{train}\n{test}')

df['connex'] = 'ole'
train, test = train_test_connex_split( # pylint: disable=W0632
Expand Down Expand Up @@ -148,7 +148,7 @@ def test_split_connex2(self):
if s1 & s2:
rows = []
for k, v in sorted(stats[0].items()):
rows.append("{0}={1}".format(k, v))
rows.append(f"{k}={v}")
raise Exception(
'Non empty intersection {0} & {1}\n{2}\n{3}\n{4}'.format(s1, s2, train, test, "\n".join(rows)))

Expand Down Expand Up @@ -180,7 +180,7 @@ def test_split_connex_missing(self):
if s1 & s2:
rows = []
for k, v in sorted(stats[0].items()):
rows.append("{0}={1}".format(k, v))
rows.append(f"{k}={v}")
raise Exception(
'Non empty intersection {0} & {1}\n{2}\n{3}\n{4}'.format(s1, s2, train, test, "\n".join(rows)))

Expand Down
2 changes: 1 addition & 1 deletion _unittests/ut_df/test_pandas_groupbynan.py
Expand Up @@ -31,7 +31,7 @@ def test_pandas_groupbynan(self):
try:
self.assertIsInstance(li[-1], float)
except AssertionError as e:
raise AssertionError("Issue with {0}".format(ty)) from e
raise AssertionError(f"Issue with {ty}") from e
try:
self.assertTrue(numpy.isnan(li[-1]))
except AssertionError as e:
Expand Down
4 changes: 2 additions & 2 deletions pandas_streaming/data/dummy.py
Expand Up @@ -20,10 +20,10 @@ def dummy_streaming_dataframe(n, chunksize=10, asfloat=False, **cols):
"""
if asfloat:
df = DataFrame(dict(cfloat=[_ + 0.1 for _ in range(0, n)], cstr=[
"s{0}".format(i) for i in range(0, n)]))
f"s{i}" for i in range(0, n)]))
else:
df = DataFrame(dict(cint=list(range(0, n)), cstr=[
"s{0}".format(i) for i in range(0, n)]))
f"s{i}" for i in range(0, n)]))
for k, v in cols.items():
df[k] = v
return StreamingDataFrame.read_df(df, chunksize=chunksize)
20 changes: 7 additions & 13 deletions pandas_streaming/df/connex_split.py
Expand Up @@ -48,8 +48,7 @@ def train_test_split_weights(df, weights=None, test_size=0.25, train_size=None,
if weights is None:
if test_size == 0 or train_size == 0:
raise ValueError(
"test_size={0} or train_size={1} cannot be null (1)."
"".format(test_size, train_size))
f"test_size={test_size} or train_size={train_size} cannot be null (1).")
return train_test_split(df, test_size=test_size,
train_size=train_size,
random_state=random_state)
Expand All @@ -69,8 +68,7 @@ def train_test_split_weights(df, weights=None, test_size=0.25, train_size=None,
test_size = 1 - p
if p is None or min(test_size, p) <= 0:
raise ValueError(
"test_size={0} or train_size={1} cannot be null (2)."
"".format(test_size, train_size))
f"test_size={test_size} or train_size={train_size} cannot be null (2).")
ratio = test_size / p

if random_state is None:
Expand Down Expand Up @@ -330,11 +328,9 @@ def do_connex_components(dfrows, local_groups, kb, sib):
grsum = dfids[[name, one]].groupby(name, as_index=False).sum()
if fLOG:
for g in groups:
fLOG("[train_test_connex_split] #nb in '{0}': {1}".format(
g, len(set(dfids[g]))))
fLOG(f"[train_test_connex_split] #nb in '{g}': {len(set(dfids[g]))}")
fLOG(
"[train_test_connex_split] #connex {0}/{1}".format(
grsum.shape[0], dfids.shape[0]))
f"[train_test_connex_split] #connex {grsum.shape[0]}/{dfids.shape[0]}")
if grsum.shape[0] <= 1:
raise ValueError( # pragma: no cover
"Every element is in the same connected components.")
Expand All @@ -358,10 +354,9 @@ def do_connex_components(dfrows, local_groups, kb, sib):
"={2}/{3}".format(k, v, cum, len(elements)))

# Most important component
fLOG('[train_test_connex_split] first row of the biggest component '
'{0}'.format(maxc))
fLOG(f'[train_test_connex_split] first row of the biggest component {maxc}')
tdf = dfids[dfids[name] == maxc[0]]
fLOG('[train_test_connex_split] \n{0}'.format(tdf.head(n=10)))
fLOG(f'[train_test_connex_split] \n{tdf.head(n=10)}')

# Splits.
train, test = train_test_split_weights(
Expand Down Expand Up @@ -457,8 +452,7 @@ def train_test_apart_stratify(df, group, test_size=0.25, train_size=None,
test_size = 1 - p
if p is None or min(test_size, p) <= 0:
raise ValueError( # pragma: no cover
"test_size={0} or train_size={1} cannot be null".format(
test_size, train_size))
f"test_size={test_size} or train_size={train_size} cannot be null")

couples = df[[group, stratify]].itertuples(name=None, index=False)
hist = Counter(df[stratify])
Expand Down
17 changes: 8 additions & 9 deletions pandas_streaming/df/dataframe.py
Expand Up @@ -355,14 +355,13 @@ def read_df(df, chunksize=None, check_schema=True) -> 'StreamingDataFrame':
chunksize = df.shape[0]
else:
raise NotImplementedError(
"Cannot retrieve size to infer chunksize for type={0}"
".".format(type(df)))
f"Cannot retrieve size to infer chunksize for type={type(df)}.")

if hasattr(df, 'shape'):
size = df.shape[0]
else:
raise NotImplementedError( # pragma: no cover
"Cannot retrieve size for type={0}.".format(type(df)))
f"Cannot retrieve size for type={type(df)}.")

def local_iterator():
"local iterator"
Expand Down Expand Up @@ -696,10 +695,10 @@ def iterator_concat(this, lothers):
if check:
if list(columns) != list(df.columns):
raise ValueError(
"Frame others[{0}] do not have the same column names or the same order.".format(i))
f"Frame others[{i}] do not have the same column names or the same order.")
if list(dtypes) != list(df.dtypes):
raise ValueError(
"Frame others[{0}] do not have the same column types.".format(i))
f"Frame others[{i}] do not have the same column types.")
check = False
yield df

Expand Down Expand Up @@ -896,7 +895,7 @@ def iterate_streaming():
return StreamingDataFrame(lambda: iterate_streaming(), **self.get_kwargs())

raise ValueError( # pragma: no cover
"Unknown strategy '{0}'".format(strategy))
f"Unknown strategy '{strategy}'")

def ensure_dtype(self, df, dtypes):
"""
Expand Down Expand Up @@ -951,7 +950,7 @@ def __setitem__(self, index, value):
"""
if not isinstance(index, str):
raise ValueError(
"Only column affected are supported but index=%r." % index)
f"Only column affected are supported but index={index!r}.")
if isinstance(value, (int, float, numpy.number, str)):
# Is is equivalent to add_column.
iter_creation = self.iter_creation
Expand Down Expand Up @@ -1145,7 +1144,7 @@ def sort_values(self, by, axis=0, ascending=True, kind='quicksort',
"""
if not isinstance(by, str):
raise NotImplementedError(
"Only one column can be used to sort not %r." % by)
f"Only one column can be used to sort not {by!r}.")
keys = {}
nans = []
indices = []
Expand Down Expand Up @@ -1224,7 +1223,7 @@ def __init__(self, iter_creation, check_schema=True, stable=True):
self, iter_creation, check_schema=check_schema, stable=stable)
if len(self.columns) != 1:
raise RuntimeError(
"A series can contain only one column not %r." % len(self.columns))
f"A series can contain only one column not {len(self.columns)!r}.")

def apply(self, *args, **kwargs) -> 'StreamingDataFrame':
"""
Expand Down
10 changes: 5 additions & 5 deletions pandas_streaming/df/dataframe_helpers.py
Expand Up @@ -49,7 +49,7 @@ def hash_str(c, hash_length):
if isinstance(c, float):
if numpy.isnan(c):
return c
raise ValueError("numpy.nan expected, not {0}".format(c))
raise ValueError(f"numpy.nan expected, not {c}")
m = hashlib.sha256()
m.update(c.encode("utf-8"))
r = m.hexdigest()
Expand All @@ -70,7 +70,7 @@ def hash_int(c, hash_length):
if numpy.isnan(c):
return c
else:
raise ValueError("numpy.nan expected, not {0}".format(c))
raise ValueError(f"numpy.nan expected, not {c}")
else:
b = struct.pack("i", c)
m = hashlib.sha256()
Expand Down Expand Up @@ -167,7 +167,7 @@ def hash_floatl(c):
df[c] = df[c].apply(hash_strl)
else:
raise NotImplementedError(
"Conversion of type {0} in column '{1}' is not implemented".format(t, c))
f"Conversion of type {t} in column '{c}' is not implemented")

return df

Expand Down Expand Up @@ -413,7 +413,7 @@ def pandas_groupby_nan(df, by, axis=0, as_index=False, suffix=None, nanback=True
df.columns, df.dtypes)} # pylint: disable=R1721
if typ[by[0]] != do:
warnings.warn( # pragma: no cover
"[pandas_groupby_nan] NaN value: {0}".format(rep))
f"[pandas_groupby_nan] NaN value: {rep}")
return res
for b in by:
fnan = rep[b]
Expand Down Expand Up @@ -468,7 +468,7 @@ def pandas_groupby_nan(df, by, axis=0, as_index=False, suffix=None, nanback=True
for b in by:
if typ[b] != do:
warnings.warn( # pragma: no cover
"[pandas_groupby_nan] NaN values: {0}".format(rep))
f"[pandas_groupby_nan] NaN values: {rep}")
break
return res
raise NotImplementedError(
Expand Down
10 changes: 5 additions & 5 deletions pandas_streaming/df/dataframe_io.py
Expand Up @@ -79,22 +79,22 @@ def to_zip(df, zipfilename, zname="df.csv", **kwargs):
numpy.save(stb, df, **kwargs)
else:
raise TypeError( # pragma: no cover
"Type not handled {0}".format(type(df)))
f"Type not handled {type(df)}")
text = stb.getvalue()

if isinstance(zipfilename, str):
ext = os.path.splitext(zipfilename)[-1]
if ext != '.zip':
raise NotImplementedError( # pragma: no cover
"Only zip file are implemented not '{0}'.".format(ext))
f"Only zip file are implemented not '{ext}'.")
zf = zipfile.ZipFile(zipfilename, 'w') # pylint: disable=R1732
close = True
elif isinstance(zipfilename, zipfile.ZipFile):
zf = zipfilename
close = False
else:
raise TypeError( # pragma: no cover
"No implementation for type '{0}'".format(type(zipfilename)))
f"No implementation for type '{type(zipfilename)}'")

zf.writestr(zname, text)
if close:
Expand All @@ -115,15 +115,15 @@ def read_zip(zipfilename, zname=None, **kwargs):
ext = os.path.splitext(zipfilename)[-1]
if ext != '.zip':
raise NotImplementedError( # pragma: no cover
"Only zip files are supported not '{0}'.".format(ext))
f"Only zip files are supported not '{ext}'.")
zf = zipfile.ZipFile(zipfilename, 'r') # pylint: disable=R1732
close = True
elif isinstance(zipfilename, zipfile.ZipFile):
zf = zipfilename
close = False
else:
raise TypeError( # pragma: no cover
"No implementation for type '{0}'".format(type(zipfilename)))
f"No implementation for type '{type(zipfilename)}'")

if zname is None:
zname = zf.namelist()[0]
Expand Down
12 changes: 5 additions & 7 deletions pandas_streaming/df/dataframe_io_helpers.py
Expand Up @@ -131,11 +131,11 @@ def _flatten(obj, key):
if not isinstance(k, str):
raise TypeError(
"All keys must a string.") # pragma: no cover
k2 = k if key is None else "{0}{1}{2}".format(key, sep, k)
k2 = k if key is None else f"{key}{sep}{k}"
_flatten(v, k2)
elif isinstance(obj, (list, set)):
for index, item in enumerate(obj):
k2 = k if key is None else "{0}{1}{2}".format(key, sep, index)
k2 = k if key is None else f"{key}{sep}{index}"
_flatten(item, k2)
else:
flattened_dict[key] = obj
Expand Down Expand Up @@ -266,15 +266,14 @@ def enumerate_json_items(filename, encoding=None, lines=False, flatten=False, fL
for i, (_, event, value) in enumerate(parser):
if i % 1000000 == 0 and fLOG is not None:
fLOG( # pragma: no cover
"[enumerate_json_items] i={0} yielded={1}"
"".format(i, nbyield))
f"[enumerate_json_items] i={i} yielded={nbyield}")
if event == "start_array":
if curkey is None:
current = []
else:
if not isinstance(current, dict):
raise RuntimeError( # pragma: no cover
"Type issue {0}".format(type(current)))
f"Type issue {type(current)}")
c = []
current[curkey] = c # pylint: disable=E1137
current = c
Expand Down Expand Up @@ -324,8 +323,7 @@ def enumerate_json_items(filename, encoding=None, lines=False, flatten=False, fL
current[curkey] = None # pylint: disable=E1137
curkey = None
else:
raise ValueError("Unknown event '{0}'".format(
event)) # pragma: no cover
raise ValueError(f"Unknown event '{event}'") # pragma: no cover


class JsonIterator2Stream:
Expand Down
2 changes: 1 addition & 1 deletion pandas_streaming/df/dataframe_split.py
Expand Up @@ -255,7 +255,7 @@ def iterator_internal(part_requested):
part = cache.get(h)
if part is None:
raise ValueError( # pragma: no cover
"Second iteration. A row was never met in the first one\n{0}".format(obs))
f"Second iteration. A row was never met in the first one\n{obs}")
if part == part_requested:
accumul.append(obs)
if len(accumul) >= static_schema[2]:
Expand Down
2 changes: 1 addition & 1 deletion pandas_streaming/exc/exc_streaming.py
Expand Up @@ -19,4 +19,4 @@ def __init__(self, meth):
:param meth: inefficient method
"""
Exception.__init__(
self, "{0} should not be done in streaming mode.".format(meth))
self, f"{meth} should not be done in streaming mode.")
2 changes: 1 addition & 1 deletion setup.py
Expand Up @@ -10,7 +10,7 @@
#########

project_var_name = "pandas_streaming"
versionPython = "%s.%s" % (sys.version_info.major, sys.version_info.minor)
versionPython = f"{sys.version_info.major}.{sys.version_info.minor}"
path = "Lib/site-packages/" + project_var_name
readme = 'README.rst'
history = "HISTORY.rst"
Expand Down

0 comments on commit 738a2e5

Please sign in to comment.