Skip to content

Commit

Permalink
Rename sample_weight to w (code quality) (#1457)
Browse files Browse the repository at this point in the history
* rename sample_weight to w in cluster module files

Signed-off-by: mariliatd <mariliatd@gmail.com>

* rename sample_weight to w in ensemble module files

Signed-off-by: mariliatd <mariliatd@gmail.com>

* rename sample_weight to w in facto module files

Signed-off-by: mariliatd <mariliatd@gmail.com>

* rename sample_weight to w in forest and tree modules files

Signed-off-by: mariliatd <mariliatd@gmail.com>

* rename sample_weight to w in metrics module files

Signed-off-by: mariliatd <mariliatd@gmail.com>

* rename sample_weight to w in tree splitter module files

Signed-off-by: mariliatd <mariliatd@gmail.com>

* add note to unreleased.md file

Signed-off-by: mariliatd <mariliatd@gmail.com>

---------

Signed-off-by: mariliatd <mariliatd@gmail.com>
  • Loading branch information
mariliatd committed Nov 23, 2023
1 parent 3758eb1 commit 7b2f1b8
Show file tree
Hide file tree
Showing 45 changed files with 272 additions and 245 deletions.
27 changes: 27 additions & 0 deletions docs/releases/unreleased.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Unreleased

## cluster

- Renamed `sample_weight` to `w`.

## ensemble

- Renamed `sample_weight` to `w`.

## facto

- Renamed `sample_weight` to `w`.

## forest

- Renamed `sample_weight` to `w`.

## tree

- Renamed `sample_weight` to `w`.

## metrics

- Renamed `sample_weight` to `w`.


4 changes: 2 additions & 2 deletions river/cluster/dbstream.py
Original file line number Diff line number Diff line change
Expand Up @@ -389,7 +389,7 @@ def _recluster(self):

self.clustering_is_up_to_date = True

def learn_one(self, x, sample_weight=None):
def learn_one(self, x, w=None):
self._update(x)

if self._time_stamp % self.cleanup_interval == 0:
Expand All @@ -399,7 +399,7 @@ def learn_one(self, x, sample_weight=None):

return self

def predict_one(self, x, sample_weight=None):
def predict_one(self, x, w=None):
self._recluster()

min_distance = math.inf
Expand Down
4 changes: 2 additions & 2 deletions river/cluster/denstream.py
Original file line number Diff line number Diff line change
Expand Up @@ -313,7 +313,7 @@ def _initial_dbscan(self):
else:
item.covered = False

def learn_one(self, x, sample_weight=None):
def learn_one(self, x, w=None):
self._n_samples_seen += 1
# control the stream speed
if self._n_samples_seen % self.stream_speed == 0:
Expand Down Expand Up @@ -352,7 +352,7 @@ def learn_one(self, x, sample_weight=None):
self.o_micro_clusters.pop(j)
return self

def predict_one(self, x, sample_weight=None):
def predict_one(self, x, w=None):
# This function handles the case when a clustering request arrives.
# implementation of the DBSCAN algorithm proposed by Ester et al.
if not self.initialized:
Expand Down
4 changes: 2 additions & 2 deletions river/cluster/streamkmeans.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ def __init__(self, chunk_size=10, n_clusters=2, **kwargs):
self._temp_chunk = {}
self.centers = {}

def learn_one(self, x, sample_weight=None):
def learn_one(self, x, w=None):
self.time_stamp += 1

index = self.time_stamp % self.chunk_size
Expand All @@ -107,7 +107,7 @@ def learn_one(self, x, sample_weight=None):

return self

def predict_one(self, x, sample_weight=None):
def predict_one(self, x, w=None):
def get_distance(c):
return utils.math.minkowski_distance(self.centers[c], x, 2)

Expand Down
4 changes: 2 additions & 2 deletions river/cluster/textclust.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ def __init__(
self.micro_distance = self.distances(self.micro_distance)
self.macro_distance = self.distances(self.macro_distance)

def learn_one(self, x, t=None, sample_weight=None):
def learn_one(self, x, t=None, w=None):
localdict = {}
for key in x.keys():
new_key = key
Expand Down Expand Up @@ -213,7 +213,7 @@ def learn_one(self, x, t=None, sample_weight=None):

## predicts the cluster number. The type specifies whether this should happen on micro-cluster
## or macro-cluster level
def predict_one(self, x, sample_weight=None, type="micro"):
def predict_one(self, x, w=None, type="micro"):
localdict = {}
for key in x.keys():
new_key = key
Expand Down
18 changes: 9 additions & 9 deletions river/ensemble/streaming_random_patches.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ def learn_one(self, x: dict, y: base.typing.Target, **kwargs):
k = poisson(rate=self.lam, rng=self._rng)
if k == 0:
continue
model.learn_one(x=x, y=y, sample_weight=k, n_samples_seen=self._n_samples_seen)
model.learn_one(x=x, y=y, w=k, n_samples_seen=self._n_samples_seen)

return self

Expand Down Expand Up @@ -532,7 +532,7 @@ def learn_one(
x: dict,
y: base.typing.ClfTarget,
*,
sample_weight: int,
w: int,
n_samples_seen: int,
**kwargs,
):
Expand All @@ -543,16 +543,16 @@ def learn_one(
# Use all features
x_subset = x

# TODO Find a way to verify if the model natively supports sample_weight
for _ in range(int(sample_weight)):
# TODO Find a way to verify if the model natively supports sample_weight (w)
for _ in range(int(w)):
self.model.learn_one(x=x_subset, y=y, **kwargs)

if self._background_learner:
# Train the background learner
# Note: Pass the original instance x so features are correctly
# selected based on the corresponding subspace
self._background_learner.learn_one(
x=x, y=y, sample_weight=sample_weight, n_samples_seen=n_samples_seen # type: ignore
x=x, y=y, w=w, n_samples_seen=n_samples_seen # type: ignore
)

if not self.disable_drift_detector and not self.is_background_learner:
Expand Down Expand Up @@ -830,7 +830,7 @@ def learn_one(
x: dict,
y: base.typing.RegTarget,
*,
sample_weight: int,
w: int,
n_samples_seen: int,
**kwargs,
):
Expand All @@ -842,8 +842,8 @@ def learn_one(
# Use all features
x_subset = x

# TODO Find a way to verify if the model natively supports sample_weight
for _ in range(int(sample_weight)):
# TODO Find a way to verify if the model natively supports sample_weight (w)
for _ in range(int(w)):
self.model.learn_one(x=x_subset, y=y, **kwargs)

# Drift detection input
Expand All @@ -860,7 +860,7 @@ def learn_one(
# Note: Pass the original instance x so features are correctly
# selected based on the corresponding subspace
self._background_learner.learn_one(
x=x, y=y, sample_weight=sample_weight, n_samples_seen=n_samples_seen # type: ignore
x=x, y=y, w=w, n_samples_seen=n_samples_seen # type: ignore
)

if not self.disable_drift_detector and not self.is_background_learner:
Expand Down
8 changes: 4 additions & 4 deletions river/facto/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,28 +65,28 @@ def __init__(
def _init_latents(self) -> collections.defaultdict:
"""Initializes latent weights dict."""

def learn_one(self, x, y, sample_weight=1.0):
def learn_one(self, x, y, w=1.0):
x = self._ohe_cat_features(x)

if self.sample_normalization:
x_l2_norm = sum(xj**2 for xj in x.values()) ** 0.5
x = {j: xj / x_l2_norm for j, xj in x.items()}

return self._learn_one(x, y, sample_weight=sample_weight)
return self._learn_one(x, y, w=w)

def _ohe_cat_features(self, x):
"""One hot encodes string features considering them as categorical."""
return dict((f"{j}_{xj}", 1) if isinstance(xj, str) else (j, xj) for j, xj in x.items())

def _learn_one(self, x, y, sample_weight=1.0):
def _learn_one(self, x, y, w=1.0):
# Calculate the gradient of the loss with respect to the raw output
g_loss = self.loss.gradient(y_true=y, y_pred=self._raw_dot(x))

# Clamp the gradient to avoid numerical instability
g_loss = utils.math.clamp(g_loss, minimum=-self.clip_gradient, maximum=self.clip_gradient)

# Apply the sample weight
g_loss *= sample_weight
g_loss *= w

# Update the intercept
intercept_lr = self.intercept_lr.get(self.weight_optimizer.n_iterations)
Expand Down
4 changes: 2 additions & 2 deletions river/forest/adaptive_random_forest.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,9 +169,9 @@ def learn_one(self, x: dict, y: base.typing.Target, **kwargs):
k = poisson(rate=self.lambda_value, rng=self._rng)
if k > 0:
if not self._warning_detection_disabled and self._background[i] is not None:
self._background[i].learn_one(x=x, y=y, sample_weight=k) # type: ignore
self._background[i].learn_one(x=x, y=y, w=k) # type: ignore

model.learn_one(x=x, y=y, sample_weight=k)
model.learn_one(x=x, y=y, w=k)

drift_input = None
if not self._warning_detection_disabled:
Expand Down
4 changes: 2 additions & 2 deletions river/forest/online_extra_trees.py
Original file line number Diff line number Diff line change
Expand Up @@ -314,10 +314,10 @@ def learn_one(self, x, y):
if w == 0: # Skip model update if w is zero
continue

model.learn_one(x, y, sample_weight=w)
model.learn_one(x, y, w=w)

if i in self._background_trees:
self._background_trees[i].learn_one(x, y, sample_weight=w)
self._background_trees[i].learn_one(x, y, w=w)

trained.append(i)

Expand Down
38 changes: 19 additions & 19 deletions river/metrics/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,19 +84,19 @@ def __init__(self, cm: confusion.ConfusionMatrix | None = None):
cm = confusion.ConfusionMatrix()
self.cm = cm

def update(self, y_true, y_pred, sample_weight=1.0):
def update(self, y_true, y_pred, w=1.0):
self.cm.update(
y_true,
y_pred,
sample_weight=sample_weight,
w=w,
)
return self

def revert(self, y_true, y_pred, sample_weight=1.0):
def revert(self, y_true, y_pred, w=1.0):
self.cm.revert(
y_true,
y_pred,
sample_weight=sample_weight,
w=w,
)
return self

Expand Down Expand Up @@ -148,21 +148,21 @@ def update(
self,
y_true: bool,
y_pred: bool | float | dict[bool, float],
sample_weight=1.0,
w=1.0,
) -> BinaryMetric:
if self.requires_labels:
y_pred = y_pred == self.pos_val
return super().update(y_true == self.pos_val, y_pred, sample_weight)
return super().update(y_true == self.pos_val, y_pred, w)

def revert(
self,
y_true: bool,
y_pred: bool | float | dict[bool, float],
sample_weight=1.0,
w=1.0,
) -> BinaryMetric:
if self.requires_labels:
y_pred = y_pred == self.pos_val
return super().revert(y_true == self.pos_val, y_pred, sample_weight)
return super().revert(y_true == self.pos_val, y_pred, w)


class MultiClassMetric(ClassificationMetric):
Expand Down Expand Up @@ -224,7 +224,7 @@ def __init__(self, metrics, str_sep=", "):
super().__init__(metrics)
self.str_sep = str_sep

def update(self, y_true, y_pred, sample_weight=1.0):
def update(self, y_true, y_pred, w=1.0):
# If the metrics are classification metrics, then we have to handle the case where some
# of the metrics require labels, whilst others need to be fed probabilities
if hasattr(self, "requires_labels") and not self.requires_labels:
Expand All @@ -239,19 +239,19 @@ def update(self, y_true, y_pred, sample_weight=1.0):
m.update(y_true, y_pred)
return self

def revert(self, y_true, y_pred, sample_weight=1.0):
def revert(self, y_true, y_pred, w=1.0):
# If the metrics are classification metrics, then we have to handle the case where some
# of the metrics require labels, whilst others need to be fed probabilities
if hasattr(self, "requires_labels") and not self.requires_labels:
for m in self:
if m.requires_labels:
m.revert(y_true, max(y_pred, key=y_pred.get), sample_weight)
m.revert(y_true, max(y_pred, key=y_pred.get), w)
else:
m.revert(y_true, y_pred, sample_weight)
m.revert(y_true, y_pred, w)
return self

for m in self:
m.revert(y_true, y_pred, sample_weight)
m.revert(y_true, y_pred, w)
return self

def get(self):
Expand Down Expand Up @@ -333,12 +333,12 @@ def __init__(self):
def _eval(self, y_true, y_pred):
pass

def update(self, y_true, y_pred, sample_weight=1.0):
self._mean.update(x=self._eval(y_true, y_pred), w=sample_weight)
def update(self, y_true, y_pred, w=1.0):
self._mean.update(x=self._eval(y_true, y_pred), w=w)
return self

def revert(self, y_true, y_pred, sample_weight=1.0):
self._mean.revert(x=self._eval(y_true, y_pred), w=sample_weight)
def revert(self, y_true, y_pred, w=1.0):
self._mean.revert(x=self._eval(y_true, y_pred), w=w)
return self

def get(self):
Expand All @@ -354,11 +354,11 @@ class ClusteringMetric(base.Base, abc.ABC):
_fmt = ",.6f" # Use commas to separate big numbers and show 6 decimals

@abc.abstractmethod
def update(self, x, y_pred, centers, sample_weight=1.0) -> ClusteringMetric:
def update(self, x, y_pred, centers, w=1.0) -> ClusteringMetric:
"""Update the metric."""

@abc.abstractmethod
def revert(self, x, y_pred, centers, sample_weight=1.0) -> ClusteringMetric:
def revert(self, x, y_pred, centers, w=1.0) -> ClusteringMetric:
"""Revert the metric."""

@abc.abstractmethod
Expand Down
20 changes: 10 additions & 10 deletions river/metrics/confusion.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,22 +62,22 @@ def __getitem__(self, key):
"""Syntactic sugar for accessing the counts directly."""
return self.data[key]

def update(self, y_true, y_pred, sample_weight=1.0):
def update(self, y_true, y_pred, w=1.0):
self.n_samples += 1
self._update(y_true, y_pred, sample_weight)
self._update(y_true, y_pred, w)
return self

def revert(self, y_true, y_pred, sample_weight=1.0):
def revert(self, y_true, y_pred, w=1.0):
self.n_samples -= 1
# Revert is equal to subtracting so we pass the negative sample_weight
self._update(y_true, y_pred, -sample_weight)
# Revert is equal to subtracting so we pass the negative sample_weight (w)
self._update(y_true, y_pred, -w)
return self

def _update(self, y_true, y_pred, sample_weight):
self.data[y_true][y_pred] += sample_weight
self.total_weight += sample_weight
self.sum_row[y_true] += sample_weight
self.sum_col[y_pred] += sample_weight
def _update(self, y_true, y_pred, w):
self.data[y_true][y_pred] += w
self.total_weight += w
self.sum_row[y_true] += w
self.sum_col[y_pred] += w

@property
def classes(self):
Expand Down
4 changes: 2 additions & 2 deletions river/metrics/mse.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,5 +81,5 @@ class RMSLE(RMSE):
"""

def update(self, y_true, y_pred, sample_weight=1.0):
return super().update(math.log(y_true + 1), math.log(y_pred + 1), sample_weight)
def update(self, y_true, y_pred, w=1.0):
return super().update(math.log(y_true + 1), math.log(y_pred + 1), w)
Loading

0 comments on commit 7b2f1b8

Please sign in to comment.