# Study Experiment Design Generation

This notebook analyzes the characteristics of different space-filling experiment design generation techniques provide by raxpy for different input spaces.

In [None]:
from typing import Optional, Annotated

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

import raxpy
import raxpy.spaces.root as s
import raxpy.spaces.dimensions as d
import raxpy.spaces.complexity as c
import raxpy.does.lhs as lhs_doe
import raxpy.does.random as random_doe
import raxpy.does.assess as assess

## Create Helper Functions

In [None]:
def meets_portions(doe, expected_counts):
    actual_counts = {key:0 for key in expected_counts.keys()}

    # determine the sub-space each data-point belongs to
    def map_point(point):
        active_dim_ids = []

        for dim_id, column_index in doe.input_set_map.items():
            if ~np.isnan(point[column_index]):
                active_dim_ids.append(dim_id)

        active_dim_ids.sort()
        actual_counts[tuple(a for a in active_dim_ids)] += 1

    # compute the subspace each point belongs to
    for point in doe.input_sets:
        map_point(point)
        

    for key in expected_counts.keys():
        if expected_counts[key] != actual_counts[key]:
            print(actual_counts)
            return False

    return True

def generate_designs(strategies, space: s.InputSpace, number_of_designs: int = 10, number_of_points: int = 100, target_full_sub_space_portions=None):
    for _, strategy in enumerate(strategies):
        print(f"Generating designs for strategy: '{strategy[1]}'")
        design_count = 0
        while design_count < number_of_designs:
            doe = strategy[0](space, number_of_points)

            if target_full_sub_space_portions is None or meets_portions(doe, target_full_sub_space_portions):
                assessment = assess.assess_with_all_metrics(doe)
                strategy[2].append((doe,assessment))
                design_count += 1
                print(f"Created design {design_count} for {strategy[1]}")
            else:
                print("Skipping design")
            

In [None]:
def plot_fullsubspace_target_portions(space: s.InputSpace, number_of_points: int = 100):
    subspaces = space.derive_full_subspaces()
    
    values = c.compute_subspace_portions(space, subspaces)
    
    fig, axs = plt.subplots(nrows=1, ncols=1, figsize=(9, 4))

    rects = axs.bar(x=list(i for i in range(len(subspaces))),height=values, tick_label=list(", ".join(subspace) for subspace in subspaces))

    axs.bar_label(rects, labels=list(f"{int(value*100)}% - {round(value*number_of_points)}" for value in values))

    axs.set_ylabel("Portion Percentage")

    axs.set_title(f'Target Portions for {number_of_points} points')
    plt.xticks(rotation=45)
    plt.show()

In [None]:
def get_sub_space_assessments(strategies, dim_list, metric=assess.METRIC_DISCREPANCY):
    results = []
    for strategy in strategies:
        design_results = []
        for replication_data in strategy[2]:
            assessment = replication_data[1].get_full_sub_design_assessment(dim_list)
            if assessment is not None:
                design_results.append(assessment.measurements[metric])
        results.append(design_results)

    return results

In [None]:
def plot_sub_space_assessments(strategies, dim_list, metric=assess.METRIC_DISCREPANCY):
    assessment_values = get_sub_space_assessments(strategies, dim_list, metric)

    assessmentic_data = [[t for t in assessment_values[i] ] for i in range(len(strategies))]
    fig, axs = plt.subplots(nrows=1, ncols=1, figsize=(9, 4))

    axs.violinplot(assessmentic_data,
                    showmeans=False,
                    showmedians=True)
    axs.set_xticks(list(i+1 for i in range(len(strategies))))
    axs.set_xticklabels(list(strategy[1] for strategy in strategies))

    axs.set_title(f'{metric} for {", ".join(dim_list)} full-sub-design (smaller the better)')

    plt.show()
    pass

In [None]:
def generate_basic_plots(strategies):
    
    assessmentic_data = [[t[1].measurements[assess.METRIC_WEIGHTED_DISCREPANCY] for t in strategy[2] ] for strategy in strategies]
    fig, axs = plt.subplots(nrows=1, ncols=1, figsize=(9, 4))

    axs.violinplot(assessmentic_data,
                    showmeans=False,
                    showmedians=True)
    axs.set_title('Weighted Discrepancies (smaller the better)')

    plt.show()

    assessmentic_data = [[t[1].measurements[assess.METRIC_WHOLE_MIN_POINT_DISTANCE] for t in strategy[2] ] for strategy in strategies]
    fig, axs = plt.subplots(nrows=1, ncols=1, figsize=(9, 4))

    axs.violinplot(assessmentic_data,
                    showmeans=False,
                    showmedians=True)
    axs.set_title('Minimum Interpoint Distances (larger the better)')

    plt.show()


    assessmentic_data = [[t[1].measurements[assess.METRIC_WHOLE_MIN_PROJECTED_DISTANCE] for t in strategy[2] ] for strategy in strategies]
    fig, axs = plt.subplots(nrows=1, ncols=1, figsize=(9, 4))
    print(assessmentic_data)
    axs.violinplot(assessmentic_data,
                    showmeans=False,
                    showmedians=True)
    axs.set_title('Minimum Projected Distances (larger the better)')

    plt.show()

# Assessment A: 3 Optional Floats

In [None]:
strategies = [
    (lhs_doe.generate_design, "LHD-by-TreeTraversal",[]),
    (random_doe.generate_design, "Random",[]),
    # (lhs_doe.generate_seperate_designs_by_full_subspace, "LHD-by-SubSpace",[]),
    (lhs_doe.generate_design_with_projection, "LHD-Projection", []),
]

space = s.InputSpace(
    dimensions=[
        d.Float(id="x1", lb=0.0, ub=1.0, nullable=True, portion_null=0.1),
        d.Float(id="x2", lb=0.0, ub=1.0, nullable=True, portion_null=0.1),
        d.Float(id="x3", lb=0.0, ub=1.0, nullable=True, portion_null=0.1),
    ]
)

By default when creating dimensions, the target portion of values in a design to be null is unspecified. Creating a design without specifying these values, results in the whole design to have parameters. The following code assigns these portitions using a complexity analysis hueristic.

In [None]:
plot_fullsubspace_target_portions(space,number_of_points=100)

In [None]:
generate_designs(
    strategies, space, number_of_designs=10,
        
)

In [None]:
generate_basic_plots(strategies)

In [None]:
# custom assessment plots
plot_sub_space_assessments(strategies, ["x1","x2","x3"], assess.METRIC_DISCREPANCY)
plot_sub_space_assessments(strategies, ["x1","x2"], assess.METRIC_DISCREPANCY)
plot_sub_space_assessments(strategies, ["x2","x3"], assess.METRIC_DISCREPANCY)
plot_sub_space_assessments(strategies, ["x1","x3"], assess.METRIC_DISCREPANCY)

# Assessment B: Basic Heirarchy

In [None]:
strategies_bh = [
    (lhs_doe.generate_design, "LHD-by-TreeTraversal",[]),
    #(random_doe.generate_design, "Random",[]),
    #(lhs_doe.generate_seperate_designs_by_full_subspace, "LHD-by-SubSpace",[]),
    (lhs_doe.generate_design_with_projection, "LHD-Projection", []),
]

space_bh = s.InputSpace(
    dimensions=[
        d.Float(id="x1", lb=0.0, ub=1.0, nullable=False),
        d.Float(id="x2", lb=0.0, ub=1.0, nullable=False),
        d.Float(id="x3", lb=0.0, ub=1.0, nullable=True),
        d.Composite(id="x4", nullable=True, children=[
            d.Float(id="x4_1", lb=0.0, ub=1.0, nullable=False),
            d.Float(id="x4_2", lb=0.0, ub=1.0, nullable=True),
        ])
    ]
)

c.assign_null_portions(s.create_level_iterable(space_bh.children))
number_of_points = 100
plot_fullsubspace_target_portions(space_bh,number_of_points)

In [None]:
generate_designs(
    strategies_bh,
    space_bh,
    number_of_designs=100,
    number_of_points=number_of_points,
    target_full_sub_space_portions={
        ("x1","x2","x3"):17,
        ("x1","x2","x4","x4_1","x4_2"):14,
        ("x1","x2","x4","x4_1"):5,
        ("x1","x2","x3","x4","x4_1","x4_2"):45,
        ("x1","x2","x3","x4","x4_1"):14,
        ("x1","x2"):5
    }
)

In [None]:
generate_basic_plots(strategies_bh)

# Assessment C: More complex hierarchy

In [None]:

strategies_ch = [
    (lhs_doe.generate_design, "LHD-by-TreeTraversal",[]),
    (random_doe.generate_design, "Random",[]),
    (lhs_doe.generate_seperate_designs_by_full_subspace, "LHD-by-SubSpace",[]),
    (lhs_doe.generate_design_with_projection, "LHD-Projection", []),
]

space_ch = s.InputSpace(
    dimensions=[
        d.Float(id="x1", lb=3.0, ub=5.0),
        d.Float(
            id="x2",
            lb=-3.0,
            ub=-5.0,
            nullable=True,
            portion_null=1.0 / 10.0,
        ),
        d.Composite(
            id="x3",
            nullable=True,
            portion_null=1.0 / 7.0,
            children=[
                d.Int(id="x4", lb=6, ub=7),
                d.Float(
                    id="x5",
                    value_set=[0.1, 0.5, 0.9],
                    nullable=True,
                    portion_null=1.0 / 4.0,
                ),
            ],
        ),
        d.Variant(
            id="x6",
            nullable=True,
            portion_null=0.33,
            options=[
                d.Float(id="x7", lb=1.0, ub=2.0),
                d.Float(id="x8", lb=3.0, ub=4.0),
            ],
        ),
    ]
)

c.assign_null_portions(s.create_level_iterable(space_ch.children))
plot_fullsubspace_target_portions(space_ch)

In [None]:
from scipy.stats.qmc import discrepancy

In [None]:
discrepancy([[0.0],[0.5],[1.0]],method="WD")

In [None]:
discrepancy([[0.0],[0.1],[1.0]],method="WD")

In [None]:
discrepancy([[0.0],[0.001],[1.0]],method="WD")

In [None]:
discrepancy([[0.0],[0.1],[0.05]],method="WD")

In [None]:
discrepancy([[0.0],[0.1],[0.2],[0.3],[0.4],[0.5]],method="WD")

In [None]:
discrepancy([[0.0],[0.19],[0.2],[0.3],[0.4],[0.5]],method="WD")

In [None]:
discrepancy([[0.0],[0.19],[0.2],[0.21],[0.4],[0.5]],method="WD")

In [None]:
discrepancy([[0.0],[0.19],[0.2],[0.21],[0.49],[0.5]],method="WD")

In [None]:
discrepancy([[0.0],[0.01],[0.2],[0.21],[0.49],[0.5]],method="WD")

In [None]:
help(discrepancy)

In [None]:
discrepancy([[0.0],[0.19],[0.2],[0.21],[0.49],[0.5]], method="CD")

In [None]:
discrepancy([[0.0],[0.1],[0.2],[0.3],[0.4],[0.5]], method="CD")