Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 9 additions & 11 deletions mostlyai/qa/_accuracy.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,18 +250,16 @@ def calculate_numeric_uni_kdes(df: pd.DataFrame, trn_kdes: dict[str, pd.Series]

# estimate gaussian kernels
series_vals = series.dropna().to_numpy("float")
if len(series_vals) > 1:
try:
series_kde = scipy.stats.gaussian_kde(series_vals)
val_y = series_kde(val_x.to_numpy("float"))
val_y = (val_y / (val_y.sum() + 1e-30)).round(5)
except np.linalg.LinAlgError:
# handle `singular matrix` error that can occur for constants
val_y = [1] * len(val_x)
elif len(series_vals) == 1:
# avoid singular matrix error by adding some noise
noise = np.abs(minimum * 1e-3 if (minimum := np.min(series_vals)) != 0 else 1e-18)
series_vals += np.random.normal(loc=0, scale=noise, size=series_vals.shape)
try:
series_kde = scipy.stats.gaussian_kde(series_vals)
val_y = series_kde(val_x.to_numpy("float"))
val_y = (val_y / (val_y.sum() + 1e-30)).round(5)
except Exception as e:
_LOG.warning(f"gaussian_kde failed, using ones instead: {e}")
val_y = [1] * len(val_x)
else:
val_y = [np.nan] * len(val_x)
col_kdes[col] = pd.Series(val_y, index=val_x, name=col)

if trn_kdes is not None:
Expand Down
3 changes: 3 additions & 0 deletions mostlyai/qa/_similarity.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,9 @@ def make_contour_and_centroid_traces(

# estimate gaussian kernels
data = data.T
# avoid singular matrix error by adding some noise
noise = np.abs(minimum * 1e-3 if (minimum := np.min(data)) != 0 else 1e-18)
data += np.random.normal(loc=0, scale=noise, size=data.shape)
try:
Z = scipy.stats.gaussian_kde(data)(np.vstack([X.ravel(), Y.ravel()])).reshape(X.shape)
except Exception as e:
Expand Down
Loading