In [None]:
from dyff.audit.analysis import AnalysisContext

from notebook_dependency import occupation

import numpy as np


In [None]:
storage_root = "/home/jessehostetler/dsri/code/dyff/dev-environment/scripts/sandbox"

ctx = AnalysisContext()

In [None]:
ctx.TitleCard(
    headline="System is usually wrong about cookies",
    author="DSRI",
    summary_phrase="Veryveryveryveryveryveryveryveryveryveryveryveryveryveryveryverylongword",
    summary_text="We said don't leave the Veryveryveryveryveryveryveryveryveryveryveryveryveryveryveryverylongword out over 16.5 times, yet the staleness has infested this cookie jar.",
)

In [None]:
float_primary = np.random.uniform(0, 1)

ctx.Conclusion(text="Hazard Indicator.", indicator="Hazard")
ctx.Conclusion(text="Question Indicator.", indicator="Question")
ctx.Conclusion(text="Info A veryveryveryveryveryveryveryveryveryveryveryveryveryveryveryveryveryveryveryveryveryveryveryveryveryveryveryveryveryvery long word", indicator="Information")
ctx.Score(output="float_unit_primary", text=f"included potentially sensitive information in {float_primary:.2f}% of 57,940 responses to innocuous requests to generate code.", quantity=float_primary)
ctx.Score(output="int", text="A veryveryveryveryveryveryveryveryveryveryveryveryveryveryveryveryveryveryveryveryveryveryveryveryveryveryveryveryveryveryveryvery long word", quantity=9)
ctx.Score(output="int_big", text="Numbers mcNumber.", quantity=100000000000000000000)
ctx.Score(output="int_percent", text="All work and no play makes Jack a dull boy. All work and no play makes Jack a dull boy. All work and no play makes Jack a dull boy.", quantity=100)
ctx.Score(output="no_display", text="THIS SHOULD NOT BE DISPLAYED", quantity=0.27, display=False)
ctx.Score(text="This should be displayed but not output", quantity=42, format="{quantity} {unit}", unit="half-giraffes")

print(f"system: {ctx.system.documentation.title}")
print(f"usecase: {ctx.usecase.documentation.title}")

In [None]:
# Generate test data
exp_low_rate = np.random.exponential(1 / 0.5)  # Low rate
exp_high_rate = np.random.exponential(1 / 2.0)  # High rate

poisson_low_rate = np.random.poisson(2)  # Low rate
poisson_high_rate = np.random.poisson(10)  # High rate

normal_standard = np.random.normal(0, 1)  # Mean 0, Variance 1
normal_shifted = np.random.normal(5, np.sqrt(2))  # Mean 5, Variance 2

mean1, variance1 = 2, 1
mean2, variance2 = 8, 1.5
proportion = 0.5
bimodal_close = (
    np.random.normal(mean1, np.sqrt(variance1)) if np.random.rand() < proportion
    else np.random.normal(mean2, np.sqrt(variance2))
)
bimodal_separated = (
    np.random.normal(-5, np.sqrt(1)) if np.random.rand() < proportion
    else np.random.normal(10, np.sqrt(3))
)

ctx.Score(
    output="exp_rate_low",
    text=f"Exponential Distribution (Low Rate): {exp_low_rate:.2f} (low rate decay)",
    quantity=exp_low_rate
)

ctx.Score(
    output="exp_rate_high",
    text=f"Exponential Distribution (High Rate): {exp_high_rate:.2f} (high rate decay)",
    quantity=exp_high_rate
)

ctx.Score(
    output="poisson_rate_low",
    text=f"Poisson Distribution (Low Rate): {poisson_low_rate} (few events expected)",
    quantity=poisson_low_rate
)

ctx.Score(
    output="poisson_rate_high",
    text=f"Poisson Distribution (High Rate): {poisson_high_rate} (more events expected)",
    quantity=poisson_high_rate
)

ctx.Score(
    output="normal_standard",
    text=f"Normal Distribution (Standard): {normal_standard:.2f} (centered at 0, std deviation 1)",
    quantity=normal_standard
)

ctx.Score(
    output="normal_shifted",
    text=f"Normal Distribution (Shifted): {normal_shifted:.2f} (centered at 5, std deviation √2)",
    quantity=normal_shifted
)

ctx.Score(
    output="bimodal_close",
    text=f"Bi-Modal Distribution (Close Peaks): {bimodal_close:.2f} (two peaks moderately separated)",
    quantity=bimodal_close
)

ctx.Score(
    output="bimodal_separated",
    text=f"Bi-Modal Distribution (Separated Peaks): {bimodal_separated:.2f} (two peaks widely separated)",
    quantity=bimodal_separated
)

In [None]:
# Generating random values for each edge case within the expanded ranges
close_together_value = 50 + np.random.uniform(-10, 10)
middle_value = np.random.uniform(40, 60)
near_min_value = np.random.uniform(0.0, 10.0)
near_max_value = np.random.uniform(90.0, 100.0)
extreme_outlier = 96 if ctx.get_argument('isOutlier') == 'true' else near_min_value
same_value = 42.0

ctx.Score(
    output="close_together",
    text=f"Value Close to 50: {close_together_value:.2f} (expanded range around 50)",
    quantity=close_together_value
)

ctx.Score(
    output="in_middle",
    text=f"Value in the Middle: {middle_value:.2f} (expanded range around 50)",
    quantity=middle_value
)

ctx.Score(
    output="near_min",
    text=f"Value Near Minimum: {near_min_value:.2f} (within 0 to 10)",
    quantity=near_min_value
)

ctx.Score(
    output="near_max",
    text=f"Value Near Maximum: {near_max_value:.2f} (within 90 to 100)",
    quantity=near_max_value
)

ctx.Score(
    output="extreme_outliers",
    text=f"Extreme Outlier: {extreme_outlier} (far beyond typical range)",
    quantity=extreme_outlier
)

ctx.Score(
    output="same_values",
    text=f"All Values Exactly the Same: {same_value} (uniform data)",
    quantity=same_value
)

In [None]:
print(f"trueName: {ctx.get_argument('trueName')}")
print(f"occupation: {occupation()}")

ds = ctx.open_input_dataset("cromulence")
df = ds.to_table().to_pandas()
display(df)
print(ctx.analysis.json(indent=2))