In [1]:
import sys
from typing import Literal

import numpy as np
import pandas as pd
import plotly.express as px
import plotly.figure_factory as ff
import plotly.graph_objects as go
import tensorflow as tf
from plotly.colors import n_colors
from tensorflow.keras.layers import GRU, Dense, Embedding, SimpleRNN, StringLookup
from tensorflow.keras.models import Model

%load_ext autoreload
%autoreload 2

sys.path.append("../")
from equation_discover import *

2023-12-21 21:44:25.371670: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-12-21 21:44:25.413396: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2023-12-21 21:44:25.616732: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2023-12-21 21:44:25.616802: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2023-12-21 21:44:25.650908: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to

In [2]:
from scipy.optimize import basinhopping, minimize
from tensorflow.keras.losses import MSE

In [3]:
tree = Node.build_tree(
    [
        BASE_TOKENS.symbols.index(value)
        for value in ["sin", "+", "*", "const", "var_x", "const"]
    ]
)

X = pd.DataFrame(np.linspace(-2 * np.pi, 2 * np.pi), columns=["var_x"])
y = np.sin((X * 2 + 1).squeeze())

In [4]:
expression = Expression(tree)

In [None]:
MSE(y, expression.fit(X, y, T=1e-1, step_size=1).eval(X))

In [5]:
results = {}
T = 1e-1
step_size = 1
for n in range(50):
    res = basinhopping(
        lambda constants: MSE(y, expression.eval(X, constants)),
        expression.constants,
        T=T,
        stepsize=step_size,
        niter=1000,
        # niter_success=50
    )
    results[(T, step_size, n)] = res.x

results = pd.Series(results).rename_axis(["T", "step_size", "n"]).to_frame("res")
results.reset_index(inplace=True)
results["mse"] = results.apply(
    lambda x: MSE(y, expression.eval(X, x.res)).numpy(), axis=1
)

In [6]:
(results.mse < 1e-3).sum()

48

In [None]:
results = {}
for step_size in np.logspace(-1, 1, 10):
    for T in np.logspace(-3, 3, 10):
        for n in range(30):
            res = basinhopping(
                lambda constants: MSE(y, expression.eval(X, constants)),
                expression.constants,
                T=T,
                stepsize=step_size,
                niter=500,
            )
            results[(T, step_size, n)] = res.x

results = pd.Series(results).rename_axis(["T", "step_size", "n"]).to_frame("res")
results.reset_index(inplace=True)
results["mse"] = results.apply(
    lambda x: MSE(y, expression.eval(X, x.res)).numpy(), axis=1
)

In [None]:
px.imshow(
    results.groupby(["T", "step_size"]).apply(lambda x: (x.mse < 1e-1).sum()).unstack(),
    aspect="auto",
).update_layout(xaxis_type="log", yaxis_type="log")

In [None]:
fig = go.Figure()

colors = n_colors(
    "rgb(5, 200, 200)", "rgb(200, 10, 10)", results["T"].nunique(), colortype="rgb"
)
for (T, group), color in zip(results.groupby("T"), colors):
    fig.add_violin(x=group.distance, name=f"{T:.03f}", line_color=color)
fig.update_traces(orientation="h", side="positive", points=False)
fig.update_layout(height=600)