In [None]:
%load_ext autoreload
%autoreload 2
%config Completer.use_jedi = False

Notebook local path should be at `ScientificValueAgent/figures`.

In [None]:
import sys
sys.path.append("..")

In [None]:
from itertools import product

import matplotlib.pyplot as plt
import matplotlib as mpl
from matplotlib import cm
import numpy as np
import pandas as pd
import plotly.graph_objects as go
from scipy.spatial import distance_matrix

In [None]:
from sva import utils
from sva.truth.uv import truth_uv
from sva.value import default_asymmetric_value_function
from sva.experiments import UVData, UVExperiment

Set some plotting defaults.

In [None]:
utils.set_defaults()

# UV-vis experiment results

In [None]:
df = pd.read_csv("../sva/truth/uv_data.csv")
df = df.drop_duplicates(subset=["NCit", "pH", "HA"])
X = df[["NCit", "pH", "HA"]].to_numpy()
Y = df.iloc[:, 4:].to_numpy()
grid = np.array([float(xx) for xx in df.columns.tolist()[4:]])

## Pure SVF analysis

Analyze the raw data using SVF only. First, we plot all available data in three dimensions, coloring the points by the calculated scientific value using only known measurements.

In [None]:
U = default_asymmetric_value_function(X, Y)
U = (U - U.min()) / (U.max() - U.min())

Optional, interactive visualization.

In [None]:
scat = go.Scatter3d(
    x=X[:, 0],
    y=X[:, 1],
    z=X[:, 2],
    marker=dict(color=U, size=5),
    marker_colorscale="viridis",
    mode='markers'
)


fig = go.Figure(data=[scat])

fig.update_layout(
    # title='something', 
    autosize=False,
    width=500, 
    height=500,
    margin=dict(l=0, r=0, b=0, t=0),
    scene=dict(
        xaxis_title="NCit",
        yaxis_title='"pH"',
        zaxis_title="HA",
    ),
)

fig.show()
# fig.write_image("test.pdf")
# plt.savefig("test.pdf", bbox_inches="tight", dpi=300)

Next, we can find groupings of points which are clusters of low value, and clusters of high value. For the low value points, we choose some of the edge regions of the plot. For the high value points, we choose points nearby the more "central" high value cluster.

In [None]:
low_value_cluster_1 = np.array([[2, -16, 2], [1, -16, 2]])
low_value_cluster_2 = np.array([[6, -16, 16], [7, -16, 14], [5.25, -16, 14]])
low_value_cluster_3 = np.array([[12, 16, 11], [11, 16, 11]])
low_value_cluster_4 = np.array([[1, 16, 2], [2, 16, 2]])

In [None]:
highest_value_point = X[np.argmax(U), :]
distances_to_highest_value_point = distance_matrix(highest_value_point.reshape(-1, 3), X).squeeze()
argsorted = np.argsort(distances_to_highest_value_point)

In [None]:
n_closest_points = 10
high_value_cluster = X[argsorted, :][:n_closest_points, :]

Plot the UV-Vis spectra from these clusters.

In [None]:
def get_indexes_by_cluster(cluster, X=X):
    indexes = []
    for point in cluster:
        where = np.all(X == point, axis=1)
        where = np.where(where)[0].item()
        indexes.append(where)
    return np.array(indexes)

In [None]:
low_value_spectra = [
    Y[get_indexes_by_cluster(low_value_cluster_1), :],
    Y[get_indexes_by_cluster(low_value_cluster_2), :],
    Y[get_indexes_by_cluster(low_value_cluster_3), :],
    Y[get_indexes_by_cluster(low_value_cluster_4), :],
]

In [None]:
low_value_value = [
    U[get_indexes_by_cluster(low_value_cluster_1)],
    U[get_indexes_by_cluster(low_value_cluster_2)],
    U[get_indexes_by_cluster(low_value_cluster_3)],
    U[get_indexes_by_cluster(low_value_cluster_4)],
]

In [None]:
high_value_spectra = Y[get_indexes_by_cluster(high_value_cluster), :]
high_value_value = U[get_indexes_by_cluster(high_value_cluster)]

In [None]:
prop_cycle = plt.rcParams['axes.prop_cycle']
colors = prop_cycle.by_key()['color']

In [None]:
fig, axs = plt.subplots(1, 4, figsize=(6, 1), sharey=True, sharex=True)

# Very low value
ax = axs[0]
ii = 0
spectral_data = low_value_spectra[ii]
value = low_value_value[ii]
mu = value.mean()
sd = value.std()
for jj, spec in enumerate(spectral_data):
    ax.plot(grid, spec, color=colors[ii])
# ax.text(0.1, 0.8, f"${mu:.02f} \pm {sd:.02f}$", ha="left", va="top", transform=ax.transAxes, color=colors[ii])
print(f"${mu:.02f} \pm {sd:.02f}$")
    
ii = 3
spectral_data = low_value_spectra[ii]
value = low_value_value[ii]
for jj, spec in enumerate(spectral_data):
    mu = value.mean()
    sd = value.std()
    ax.plot(grid, spec, color=colors[ii])
# ax.text(0.1, 0.7, f"${mu:.02f} \pm {sd:.02f}$", ha="left", va="top", transform=ax.transAxes, color=colors[ii])
    
ax.text(0.05, 0.5, "(a)", ha="left", va="top", transform=axs[0].transAxes)  # For the other svg
ax.text(0.9, 0.9, "(b)", ha="right", va="top", transform=axs[0].transAxes)
print(f"${mu:.02f} \pm {sd:.02f}$")



# Low value
ax = axs[1]
ii = 2
spectral_data = low_value_spectra[ii]
value = low_value_value[ii]
mu = value.mean()
sd = value.std()
for jj, spec in enumerate(spectral_data):
    ax.plot(grid, spec, color=colors[ii])
ax.text(0.9, 0.9, "(c)", ha="right", va="top", transform=ax.transAxes)
# ax.text(0.1, 0.8, f"${mu:.02f} \pm {sd:.02f}$", ha="left", va="top", transform=ax.transAxes, color=colors[ii])
print(f"${mu:.02f} \pm {sd:.02f}$")


# Medium value
ax = axs[2]
ii = 1
spectral_data = low_value_spectra[ii]
value = low_value_value[ii]
mu = value.mean()
sd = value.std()
for jj, spec in enumerate(spectral_data):
    ax.plot(grid, spec, color=colors[ii])
ax.text(0.9, 0.9, "(d)", ha="right", va="top", transform=ax.transAxes)
# ax.text(0.1, 0.8, f"${mu:.02f} \pm {sd:.02f}$", ha="left", va="top", transform=ax.transAxes, color=colors[ii])
print(f"${mu:.02f} \pm {sd:.02f}$")

        
# High value
ax = axs[3]
for jj, spec in enumerate(high_value_spectra):
    ax.plot(grid, spec, color="black")
mu = high_value_value.mean()
sd = high_value_value.std()
ax.text(0.9, 0.9, "(e)", ha="right", va="top", transform=ax.transAxes)
# ax.text(0.9, 0.8, f"${mu:.02f} \pm {sd:.02f}$", ha="right", va="top", transform=ax.transAxes)
print(f"${mu:.02f} \pm {sd:.02f}$")

for ax in axs:
    utils.set_grids(ax)
    # ax.set_ylim(top=3.5)
    ax.set_yticklabels([])
    ax.set_xticks([500, 700])

ax = fig.add_subplot(111, frameon=False)
plt.tick_params(labelcolor='none', top='off', bottom='off', left='off', right='off')
ax.set_xticks([])
ax.set_yticks([])
ax.set_ylabel("Abs.~[a.u.]")
ax.set_xlabel(r"$\lambda$~[nm]", labelpad=15)

    
plt.subplots_adjust(hspace=0.1)

# plt.show()
plt.savefig("uv_subfigure_b.svg", dpi=300, bbox_inches="tight")

Matplotlib version of the 3d scatter plot.

In [None]:
fig = plt.figure(figsize=(5, 5))
ax = fig.add_subplot(projection='3d')

ax.scatter(*low_value_cluster_1.T, color=colors[0], alpha=1, marker="o", s=50)
ax.scatter(*low_value_cluster_2.T, color=colors[1], alpha=1, marker="o", s=50)
ax.scatter(*low_value_cluster_3.T, color=colors[2], alpha=1, marker="o", s=50)
ax.scatter(*low_value_cluster_4.T, color=colors[3], alpha=1, marker="o", s=50)
# ax.scatter(*high_value_cluster.T, color="black", alpha=1, marker="o", s=50)
ax.scatter(X[:, 0], X[:, 1], X[:, 2], c=U, alpha=0.9, marker="o", s=20)



ax.set_box_aspect((
    np.ptp(X[:, 0]),
    np.ptp(X[:, 1]),
    np.ptp(X[:, 2])
))
ax.view_init(40, 225)
ax.set_xlabel("Volume NaCit [$\mu$L]")
ax.set_ylabel("Volume OH$^-$ [$\mu$L]", labelpad=20)
ax.set_zlabel("Volume HAuCl$_4$ [$\mu$L]")

ax.set_zticks([2, 6, 10, 14])
ax.set_xticks([2, 6, 10, 14])

# plt.show()
plt.savefig("uv_subfigure_a_2.svg", dpi=300, bbox_inches="tight")