In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
# Some installs we need
# !pip install cvxpy

In [3]:
import embedders

from hyperdt.product_space_svm import mix_curv_svm
from hyperdt.product_space_perceptron import mix_curv_perceptron

# Tabaghi code

In [4]:
# Quentin's code for signature conversion, slightly rewritten for the new class
from sklearn.model_selection import train_test_split


def get_signature_str(pm):
    return ",".join([f"{M.type.lower()}{M.dim}" for M in pm.P])


def get_embed_data(pm, X, y):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    return {
        "X_train": X_train.detach().cpu().numpy(),
        "X_test": X_test.detach().cpu().numpy(),
        "y_train": y_train.detach().cpu().numpy(),
        "y_test": y_test.detach().cpu().numpy(),
        "max_norm": [M.manifold.inner(x, x).max().item() for M, x in zip(pm.P, pm.factorize(X))],
        "curv_value": [abs(M.curvature) for M in pm.P],
    }

In [5]:
# Embedders part: generate data

pm = embedders.manifolds.ProductManifold([(-1, 2), (0, 2), (1, 2)])
X, y = embedders.gaussian_mixture.gaussian_mixture(pm)
y_relabeled = y * 2 - 1

In [37]:
mix_component = get_signature_str(pm)
embed_data = get_embed_data(pm, X, y_relabeled)
# Convert to (1, -1) labels:

ps_perceptron = mix_curv_perceptron(
    mix_component=mix_component,
    embed_data=embed_data,
    multiclass=False,  # for now, just do binary
    max_round=10_000,
    max_update=10_000,
)
y_pred = ps_perceptron.process_data()
print(y_pred)

100%|██████████| 800/800 [00:00<00:00, 1350.90it/s]
100%|██████████| 800/800 [00:00<00:00, 1329.90it/s]
100%|██████████| 800/800 [00:00<00:00, 3786.86it/s]
100%|██████████| 800/800 [00:00<00:00, 4975.43it/s]
100%|██████████| 800/800 [00:00<00:00, 4964.47it/s]
100%|██████████| 800/800 [00:00<00:00, 6268.80it/s]
100%|██████████| 800/800 [00:00<00:00, 6444.50it/s]
100%|██████████| 800/800 [00:00<00:00, 9156.04it/s]
100%|██████████| 800/800 [00:00<00:00, 14060.22it/s]
100%|██████████| 800/800 [00:00<00:00, 9772.66it/s]
100%|██████████| 800/800 [00:00<00:00, 10330.03it/s]
100%|██████████| 800/800 [00:00<00:00, 9588.05it/s]
100%|██████████| 800/800 [00:00<00:00, 9091.81it/s]
100%|██████████| 800/800 [00:00<00:00, 13448.78it/s]
100%|██████████| 800/800 [00:00<00:00, 11560.53it/s]
100%|██████████| 800/800 [00:00<00:00, 12355.81it/s]
100%|██████████| 800/800 [00:00<00:00, 9422.09it/s]
100%|██████████| 800/800 [00:00<00:00, 10732.34it/s]
100%|██████████| 800/800 [00:00<00:00, 8133.88it/s]
100%|█

[ 1  1  1  1  1  1 -1  1 -1  1  1  1  1  1  1 -1  1  1  1  1  1 -1  1 -1
  1  1  1 -1  1  1 -1 -1  1  1  1  1  1 -1 -1  1  1  1 -1  1  1  1  1  1
 -1  1  1  1  1  1 -1  1 -1  1  1  1  1 -1  1  1  1  1 -1  1  1 -1 -1  1
 -1  1  1 -1 -1 -1  1  1 -1  1 -1  1 -1  1  1 -1 -1  1  1 -1 -1 -1  1  1
  1 -1  1  1  1  1  1  1  1  1  1  1 -1  1  1  1  1  1  1 -1 -1  1 -1  1
 -1  1  1 -1 -1  1  1  1 -1  1  1  1  1  1  1  1  1  1  1  1 -1  1  1  1
  1  1  1  1  1  1  1  1  1  1 -1  1  1 -1 -1 -1  1  1 -1  1 -1  1  1  1
  1  1  1  1  1  1  1  1 -1  1  1 -1  1 -1  1  1  1  1 -1  1 -1 -1  1  1
  1  1  1  1  1  1  1 -1]





In [38]:
# What about multiclass?

# Embedders part: generate data

pm = embedders.manifolds.ProductManifold([(-1, 2), (0, 2), (1, 2)])
X, y = embedders.gaussian_mixture.gaussian_mixture(pm, num_classes=2)

mix_component = get_signature_str(pm)
embed_data = get_embed_data(pm, X, y)
# Convert to (1, -1) labels:

ps_perceptron = mix_curv_perceptron(
    mix_component=mix_component, embed_data=embed_data, multiclass=True, max_round=10_000, max_update=10_000
)
y_pred = ps_perceptron.process_data()
print(y_pred)

[0 1 1 0 1 1 0 1 1 1 1 1 0 0 1 0 1 1 1 1 0 1 1 1 1 0 1 1 0 1 0 0 1 1 0 0 1
 0 0 0 1 0 0 1 1 1 0 0 1 0 0 1 1 0 0 1 1 1 1 0 1 1 0 0 0 1 0 0 0 1 0 0 0 0
 1 1 0 0 1 1 0 0 0 1 1 1 1 1 1 1 1 0 1 1 0 0 1 0 1 1 0 0 1 1 0 0 1 1 1 0 1
 1 1 1 0 0 1 0 0 0 1 1 1 1 0 0 0 0 0 1 0 1 1 0 0 0 0 1 1 0 0 0 0 1 0 0 1 0
 1 0 1 0 0 1 0 0 1 1 0 1 0 0 1 0 1 1 1 0 0 0 0 0 1 1 0 0 0 0 1 1 1 0 0 1 1
 0 0 1 1 1 1 0 1 0 0 1 1 1 1 0]


In [39]:
embed_data["y_test"]

array([0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1,
       0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0,
       1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1,
       0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1,
       0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0,
       1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1,
       1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1,
       0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1,
       1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1,
       1, 1])

In [40]:
# Ok, I'm not happy with how their code doesn't return predictions. Let's rewrite this a bit
import numpy as np
from hyperdt.platt import SigmoidTrain, SigmoidPredict


def perceptron_predict(perceptron):
    tmp_error_record = {0: 1}
    test_probability = np.zeros((perceptron.n_test_samples, perceptron.n_class), dtype=float)
    for class_val in perceptron.class_labels:
        y_bin_train = np.array([1 if val == class_val else -1 for val in perceptron.y_train])

        # Train part
        decision_vals = [0] * perceptron.n_train_samples
        for idx in range(perceptron.n_train_samples):
            decision_vals[idx] = perceptron.mix_classifier_train(idx, tmp_error_record, y_bin_train)
            tmp_ab = SigmoidTrain(deci=decision_vals, label=y_bin_train, prior1=None, prior0=None)

        # Test part
        for idx in range(perceptron.n_test_samples):
            yn = perceptron.mix_classifier_test(idx, tmp_error_record, y_bin_train)
            test_probability[idx, perceptron.class_labels.index(class_val)] = SigmoidPredict(deci=yn, AB=tmp_ab)

    # Get predictions
    return test_probability


perceptron_predict(ps_perceptron).argmax(axis=1)

# Now we start to see the issue: all predictions are 1

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1])

In [41]:
ps_perceptron_more_epochs = mix_curv_perceptron(
    mix_component=mix_component,
    embed_data=embed_data,
    multiclass=False,  # for now, just do binary
    max_round=1,
    max_update=100,
)
score = ps_perceptron.process_data()
print(score)

[0 1 1 0 1 1 0 1 1 1 1 1 0 0 1 0 1 1 1 1 0 1 1 1 1 0 1 1 0 1 0 0 1 1 0 0 1
 0 0 0 1 0 0 1 1 1 0 0 1 0 0 1 1 0 0 1 1 1 1 0 1 1 0 0 0 1 0 0 0 1 0 0 0 0
 1 1 0 0 1 1 0 0 0 1 1 1 1 1 1 1 1 0 1 1 0 0 1 0 1 1 0 0 1 1 0 0 1 1 1 0 1
 1 1 1 0 0 1 0 0 0 1 1 1 1 0 0 0 0 0 1 0 1 1 0 0 0 0 1 1 0 0 0 0 1 0 0 1 0
 1 0 1 0 0 1 0 0 1 1 0 1 0 0 1 0 1 1 1 0 0 0 0 0 1 1 0 0 0 0 1 1 1 0 0 1 1
 0 0 1 1 1 1 0 1 0 0 1 1 1 1 0]


In [42]:
perceptron_predict(ps_perceptron)

array([[0.64592858, 0.69015549],
       [0.59545233, 0.64249456],
       [0.20436469, 0.23874382],
       [0.33006785, 0.37561037],
       [0.3794162 , 0.42742353],
       [0.75062549, 0.78610532],
       [0.7002529 , 0.74042111],
       [0.4437521 , 0.49342738],
       [0.41373555, 0.46284661],
       [0.62736935, 0.67273995],
       [0.44408791, 0.49376741],
       [0.63791642, 0.68265242],
       [0.43520183, 0.48475409],
       [0.31560773, 0.36022789],
       [0.3059968 , 0.34995297],
       [0.30616528, 0.35013345],
       [0.84869997, 0.87259459],
       [0.39698415, 0.44561819],
       [0.62015168, 0.66593313],
       [0.61513658, 0.66119225],
       [0.46922468, 0.51909045],
       [0.59065267, 0.63791363],
       [0.80847747, 0.83750818],
       [0.41215829, 0.46122943],
       [0.51230572, 0.56190298],
       [0.42210602, 0.47141173],
       [0.57250984, 0.6205197 ],
       [0.44227191, 0.49192803],
       [0.34905478, 0.39566982],
       [0.60352511, 0.65018009],
       [0.

In [43]:
# Can we force the perceptron to predict 0s?
import torch

X_0_bias = torch.cat([X[y == 0]] * 10 + [X], dim=0)
y_0_bias = torch.cat([torch.zeros_like(y[y == 0])] * 10 + [y], dim=0)

embed_data_0_bias = get_embed_data(pm, X_0_bias, y_0_bias)

ps_perceptron_0_bias = mix_curv_perceptron(
    mix_component=mix_component,
    embed_data=embed_data_0_bias,
    multiclass=False,  # for now, just do binary
    max_round=1000,
    max_update=100,
)
score = ps_perceptron_0_bias.process_data()
print(score)

  2%|▏         | 106/4568 [00:00<00:01, 2556.86it/s]
100%|██████████| 1142/1142 [00:00<00:00, 1390.99it/s]

[ 1 -1  1 ...  1 -1  1]





In [44]:
perceptron_predict(ps_perceptron_0_bias).argmax(axis=1)

KeyboardInterrupt: 

# My version

In [2]:
# Embedders part: generate data
import embedders

pm = embedders.manifolds.ProductManifold([(-1, 2), (0, 2), (1, 2)])
X, y = embedders.gaussian_mixture.gaussian_mixture(pm)
y_relabeled = y * 2 - 1

In [34]:
import torch
from sklearn.base import BaseEstimator, ClassifierMixin


class ProductSpacePerceptron(BaseEstimator, ClassifierMixin):
    def __init__(self, pm, max_epochs=1000, patience=5):
        self.pm = pm  # ProductManifold instance
        self.max_epochs = max_epochs
        self.patience = patience  # Number of consecutive epochs without improvement to consider convergence
        self.classes_ = None
        self.classifiers_ = {}  # Dictionary to store classifiers for one-vs-rest approach
        self.R = []  # To store maximum radius for each hyperbolic manifold

    def fit(self, X, y):
        # Identify unique classes for multiclass classification
        self.classes_ = torch.unique(y).tolist()

        # Compute maximum hyperbolic radii for each hyperbolic manifold
        self.R = [0] * len(self.pm.P)
        for i, (M, x) in enumerate(zip(self.pm.P, self.pm.factorize(X))):
            if M.type == "H":
                self.R[i] = torch.sqrt(
                    torch.abs(M.manifold.inner(x, x).max())
                ).item()  # Use absolute value for Minkowski norm

        # Relabel y to -1 and 1 for binary classification per class
        for class_label in self.classes_:
            # Binary classification shortcut
            if len(self.classes_) == 2 and class_label == self.classes_[1]:
                self.classifiers_[class_label] = -1 * self.classifiers_[self.classes_[0]]

            else:
                print(f"Training perceptron for class {class_label} vs. rest")
                binary_y = torch.where(y == class_label, 1, -1)  # One-vs-rest relabeling

                # Initialize decision function g for this binary classifier
                g = torch.zeros(X.shape[1], dtype=X.dtype, device=X.device)

                n_epochs = 0
                epochs_without_improvement = 0  # Track consecutive epochs without improvement
                best_error_count = float("inf")  # Best error count seen so far

                while n_epochs < self.max_epochs:
                    errors = 0
                    for n in range(X.shape[0]):
                        # Compute the decision function value for the current point
                        decision_value = g @ X[n]

                        # Check if the point is misclassified
                        if torch.sign(decision_value) != binary_y[n]:
                            # Calculate the kernel K(x, x_n) for the current point x_n
                            K = torch.ones(X.shape[0], dtype=X.dtype, device=X.device)  # Start with the bias term

                            for i, (M, x) in enumerate(zip(self.pm.P, self.pm.factorize(X))):
                                # Compute kernel matrix between x[n:n+1] and all training points
                                if M.type == "E":
                                    K += M.scale * M.manifold.inner(x[n : n + 1], x)  # Kernel matrix for Euclidean
                                elif M.type == "S":
                                    K += M.scale * torch.asin(
                                        torch.clamp(M.manifold.inner(x[n : n + 1], x), -1, 1)
                                    )  # Kernel matrix for Spherical
                                elif M.type == "H":
                                    K += M.scale * torch.asin(
                                        torch.clamp((self.R[i] ** -2) * M.manifold.inner(x[n : n + 1], x), -1, 1)
                                    )  # Kernel matrix for Hyperbolic

                            # Update decision function using the computed kernel
                            g += binary_y[n] * X[n]  # Update with current point only
                            errors += 1  # Track the number of errors in this epoch

                    # Convergence check based on error improvement
                    if errors < best_error_count:
                        best_error_count = errors
                        epochs_without_improvement = 0  # Reset the counter if we have an improvement
                    else:
                        epochs_without_improvement += 1

                    if epochs_without_improvement >= self.patience:
                        print(f"Converged for class {class_label} after {n_epochs} epochs (no improvement).")
                        break

                    n_epochs += 1

                # Store the classifier (decision function) for the current class
                self.classifiers_[class_label] = g

        return self

    def predict(self, X):
        # Initialize matrix to store decision values for each class
        decision_values = torch.zeros((X.shape[0], len(self.classes_)), dtype=X.dtype, device=X.device)

        # Compute decision values for each classifier
        for idx, class_label in enumerate(self.classes_):
            g = self.classifiers_[class_label]
            decision_values[:, idx] = X @ g

        # Return the class with the highest decision value
        print(decision_values)
        argmax_idx = torch.argmax(decision_values, dim=1)
        return torch.tensor([self.classes_[i] for i in argmax_idx])


# Example usage:
ps_perc_pac = ProductSpacePerceptron(pm, max_epochs=100, patience=5)
ps_perc_pac.fit(X, y)
predictions = ps_perc_pac.predict(X)
predictions

# Check the accuracy
print((predictions == y).float().mean())

Training perceptron for class 0 vs. rest
Converged for class 0 after 7 epochs (no improvement).
tensor([[ 514.1910, -514.1910],
        [ 150.3096, -150.3096],
        [  95.0430,  -95.0430],
        ...,
        [ 229.6605, -229.6605],
        [  31.0748,  -31.0748],
        [  20.6654,  -20.6654]], grad_fn=<CopySlices>)
tensor(0.9810)


In [45]:
_x = pm.factorize(X)[0]
pm.P[0].manifold._log_scale = torch.nn.Parameter(torch.tensor(10.0))
pm.P[0].manifold.inner(_x, _x)

tensor([-1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000,
        -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000,
        -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000,
        -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000,
        -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000,
        -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000,
        -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000,
        -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000,
        -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000,
        -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000,
        -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000,
        -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000,
        -1.0000, -1.0000, -1.0000, -1.00

# New code: kernel check

In [198]:
import embedders

pm = embedders.manifolds.ProductManifold([(-1, 2), (0, 2), (1, 2)])
X, y = embedders.gaussian_mixture.gaussian_mixture(pm)

[x.isnan().any() for x in product_kernel(pm, X, X)[0]]



[tensor(False), tensor(False), tensor(False)]

In [199]:
product_kernel(pm, X, X)[0][2]

tensor([[ 1.5703, -0.1795,  1.0183,  ..., -1.0199,  0.2129, -0.2404],
        [-0.1795,  1.5708,  0.2632,  ...,  0.7079, -0.8280, -0.9777],
        [ 1.0183,  0.2632,  1.5705,  ..., -0.4905,  0.0947, -0.7749],
        ...,
        [-1.0199,  0.7079, -0.4905,  ...,  1.5708, -0.4669, -0.3033],
        [ 0.2129, -0.8280,  0.0947,  ..., -0.4669,  1.5703,  0.3170],
        [-0.2404, -0.9777, -0.7749,  ..., -0.3033,  0.3170,  1.5705]],
       grad_fn=<AsinBackward0>)

In [200]:
product_kernel(embedders.manifolds.ProductManifold(signature=[(-4, 2), (0, 2), (4, 2)]), X, X)[1]

[tensor(-4), tensor(1.), tensor(2.)]

In [201]:
import torch

_x = pm.factorize(X)[-1]
pm.P[-1].inner(_x, _x) * pm.P[-1].curvature * pm.P[-1].scale

tensor([[ 1.0000, -0.1786,  0.8512,  ..., -0.8521,  0.2113, -0.2381],
        [-0.1786,  1.0000,  0.2602,  ...,  0.6502, -0.7366, -0.8292],
        [ 0.8512,  0.2602,  1.0000,  ..., -0.4711,  0.0946, -0.6997],
        ...,
        [-0.8521,  0.6502, -0.4711,  ...,  1.0000, -0.4501, -0.2986],
        [ 0.2113, -0.7366,  0.0946,  ..., -0.4501,  1.0000,  0.3117],
        [-0.2381, -0.8292, -0.6997,  ..., -0.2986,  0.3117,  1.0000]],
       grad_fn=<MulBackward0>)

In [73]:
# pm.P[-1].inner(_x, _x)pm.P[-1].curvature * pm.P[-1].scale
import embedders.manifolds


embedders.manifolds.Manifold(curvature=2, dim=2).inner(_x, _x) / embedders.manifolds.Manifold(curvature=1, dim=2).inner(
    _x, _x
)

tensor([[0.5000, 0.5000, 0.5000,  ..., 0.5000, 0.5000, 0.5000],
        [0.5000, 0.5000, 0.5000,  ..., 0.5000, 0.5000, 0.5000],
        [0.5000, 0.5000, 0.5000,  ..., 0.5000, 0.5000, 0.5000],
        ...,
        [0.5000, 0.5000, 0.5000,  ..., 0.5000, 0.5000, 0.5000],
        [0.5000, 0.5000, 0.5000,  ..., 0.5000, 0.5000, 0.5000],
        [0.5000, 0.5000, 0.5000,  ..., 0.5000, 0.5000, 0.5000]],
       grad_fn=<DivBackward0>)

In [56]:
torch.asin

1.0

In [42]:
pm.P[0].manifold.inner(_x[:, None], _x[None, :]).shape

torch.Size([1, 1000])

In [226]:
ptron = embedders.perceptron.ProductSpacePerceptron(pm)

ptron.fit(X, y)
(ptron.predict(X) == y).float().mean()

tensor(1.)

In [257]:
# Do the same thing with train-test split

from sklearn.model_selection import train_test_split

X, y = embedders.gaussian_mixture.gaussian_mixture(pm)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

ptron = embedders.perceptron.ProductSpacePerceptron(pm)
ptron.fit(X_train, y_train)
(ptron.predict(X_test) == y_test).float().mean()

tensor(0.7450)

In [168]:
Ks, norms = embedders.kernel.product_kernel(pm, X, X)

In [169]:
K = torch.ones(X.shape[0], X.shape[0])
for mat in Ks:
    K += mat
K

tensor([[ 38.8659, -16.0567,  23.4093,  ...,   4.2112, -14.9139,  -2.0029],
        [-16.0567,   7.4579, -11.1123,  ...,  -1.8085,   0.5661,  -0.4191],
        [ 23.4093, -11.1123,  18.4601,  ...,   2.8568, -12.1712,  -0.5609],
        ...,
        [  4.2112,  -1.8085,   2.8568,  ...,   2.1052,  -5.3676,   0.5950],
        [-14.9139,   0.5661, -12.1712,  ...,  -5.3676,   4.3270,  -3.2937],
        [ -2.0029,  -0.4191,  -0.5609,  ...,   0.5950,  -3.2937,   1.2123]],
       grad_fn=<AddBackward0>)

In [86]:
g = torch.rand(X.shape[1])
g

tensor([0.1202, 0.7269, 0.7182, 0.8251, 0.9668, 0.4298, 0.5931, 0.9310])

In [90]:
err = torch.sign(X @ g) != (y * 2 - 1)

In [139]:
(((y * 2 - 1)[err, None] * K[err, :]).sum(dim=1)[:, None] * X[err, :]).sum(dim=0)

tensor([ 1.8569e+10,  5.5664e+09,  1.7664e+10,  1.0065e+06, -3.8445e+05,
         2.2273e+05,  9.1254e+04, -1.8895e+05], grad_fn=<SumBackward1>)

In [138]:
((y * 2 - 1)[err, None] * K[err, :]).sum(dim=1) @ X[err]

tensor([ 1.8569e+10,  5.5664e+09,  1.7664e+10,  1.0065e+06, -3.8445e+05,
         2.2273e+05,  9.1254e+04, -1.8895e+05], grad_fn=<SqueezeBackward4>)

In [157]:
((y * 2 - 1)[err, None]) * X[err] @ K[err]

RuntimeError: mat1 and mat2 shapes cannot be multiplied (582x8 and 582x1000)

In [156]:
K[:, err] @ X[err]

torch.Size([1000, 8])

In [129]:
((y * 2 - 1)[err, None] * K[err, :]).sum(dim=1).shape

torch.Size([582])

In [175]:
(y[0] * K[0]) @ X[0]  # Should be positive

RuntimeError: inconsistent tensor size, expected tensor [1000] and src [8] to have the same number of elements, but got 1000 and 8 elements respectively

In [203]:
(-1 * K[0] @ X) @ X[0], y[0]  # Should be negative

(tensor(-143785.7812, grad_fn=<DotBackward0>), tensor(0))

In [205]:
(-1 * K[1] @ X) @ X[1], y[1]  # Should be negative

(tensor(19303.4414, grad_fn=<DotBackward0>), tensor(0))

In [211]:
(-1 * K[6] @ X) @ X[6], y[6]  # Should be positive

(tensor(-21385.4707, grad_fn=<DotBackward0>), tensor(1))

# SVM 

In [5]:
from embedders.manifolds import ProductManifold
from embedders.gaussian_mixture import gaussian_mixture
from embedders.kernel import product_kernel
import torch
import cvxpy

# Get pm and sample
pm = ProductManifold(signature=[(-1, 2), (0, 2), (1, 2)])
X, y = gaussian_mixture(pm)

# Make y in {-1, 1}
# TODO: Do we need this?
# y = 2 * y - 1

# Get kernel
n_samples = X.shape[0]
Ks, norms = product_kernel(pm, X, X)
K = torch.zeros(n_samples, n_samples)
# K = torch.ones((n_samples, n_samples))
for K_component in Ks:
    K += K_component

# Make numpy arrays
X = X.detach().cpu().numpy()
Y = torch.diagflat(y).detach().cpu().numpy()
K = K.detach().cpu().numpy()

# Make variables
zeta = cvxpy.Variable(X.shape[0])
beta = cvxpy.Variable(X.shape[0])
epsilon = cvxpy.Variable(1)

# Get constraints
constraints = [
    epsilon >= 0,
    zeta >= 0,
    Y @ (K @ beta + cvxpy.sum(beta)) >= epsilon - zeta,
    # Y @ (K @ beta) >= epsilon - zeta,  # Replaced with sum-less form
]
for M, K_component, norm in zip(pm.P, Ks, norms):
    K_component = K_component.detach().cpu().numpy()
    norm = norm.item()
    if M.type == "E":
        alpha_E = 1.0  # TODO: make this flexible
        # constraints.append(cvxpy.quad_form(beta, K_component) <= alpha_E**2)
    elif M.type == "S":
        pass
        # constraints.append(cvxpy.quad_form(beta, K_component) <= torch.pi / 2)
    elif M.type == "H":
        pass  # No constraints currently
        # K_component_pos = K_component.clip(0, None)
        # K_component_neg = K_component.clip(None, 0)
        # constraints.append(cvxpy.quad_form(beta, K_component_neg) <= 1e-5)
        # constraints.append(cvxpy.quad_form(beta, K_component_pos) <= 1e-5 + norm)

# CVXPY solver
cvxpy.Problem(
    objective=cvxpy.Minimize(-epsilon + cvxpy.sum(zeta)),
    constraints=constraints,
).solve()
print(zeta.value, beta.value, epsilon.value)



[ 6.29910954e-12  6.29910954e-12 -8.43519079e-12  6.29910954e-12
  6.29910954e-12  6.29910954e-12  6.29910954e-12 -8.43515007e-12
  6.29910954e-12  6.29910954e-12  6.29910954e-12  6.29910954e-12
 -8.43523387e-12 -8.43557690e-12 -8.43522458e-12 -8.43522142e-12
  6.29910954e-12  6.29910954e-12  6.29910954e-12 -8.43549300e-12
  6.29910954e-12 -8.43525357e-12  6.29910954e-12  6.29910954e-12
 -8.43517822e-12  6.29910954e-12 -8.43525074e-12  6.29910954e-12
  6.29910954e-12 -8.43431589e-12  6.29910954e-12  6.29910954e-12
 -8.43515676e-12 -8.43507610e-12  6.29910954e-12  6.29910954e-12
  6.29910954e-12 -8.43566267e-12 -8.43529861e-12  6.29910954e-12
 -8.43524562e-12  6.29910954e-12  6.29910954e-12  6.29910954e-12
  6.29910954e-12  6.29910954e-12  6.29910954e-12  6.29910954e-12
 -8.43497067e-12  6.29910954e-12  6.29910954e-12  6.29910954e-12
  6.29910954e-12 -8.43650385e-12  6.29910954e-12  6.29910954e-12
  6.29910954e-12  6.29910954e-12 -8.43521011e-12  6.29910954e-12
  6.29910954e-12 -8.43523

In [3]:
from embedders.manifolds import ProductManifold
from embedders.gaussian_mixture import gaussian_mixture
from embedders.svm import ProductSpaceSVM

# Get pm and sample
pm = ProductManifold(signature=[(-1, 2), (0, 2), (1, 2)])
X, y = gaussian_mixture(pm)

# Get SVM and fit
_h, _s, _e = True, True, True
# _h, _s, _e = False, False, False
ps_svm = ProductSpaceSVM(pm=pm, h_constraints=_h, s_constraints=_s, e_constraints=_e, epsilon=1e-10)
ps_svm.fit(X, y)



DCPError: Problem does not follow DCP rules. Specifically:
The following constraints are not DCP:
QuadForm(var96, [[-0.00 -0.00 ... -0.00 -0.00]
 [-0.00 -0.00 ... -0.00 -0.00]
 ...
 [-0.00 -0.00 ... -0.00 -0.00]
 [-0.00 -0.00 ... -0.00 -0.00]]) <= 1e-10 , because the following subexpressions are not:
|--  QuadForm(var96, [[-0.00 -0.00 ... -0.00 -0.00]
 [-0.00 -0.00 ... -0.00 -0.00]
 ...
 [-0.00 -0.00 ... -0.00 -0.00]
 [-0.00 -0.00 ... -0.00 -0.00]])
QuadForm(var96, [[0.17 -0.58 ... -0.25 0.17]
 [-0.58 2.01 ... 1.19 -0.15]
 ...
 [-0.25 1.19 ... 4.34 4.99]
 [0.17 -0.15 ... 4.99 7.11]]) <= 1.0 , because the following subexpressions are not:
|--  QuadForm(var96, [[0.17 -0.58 ... -0.25 0.17]
 [-0.58 2.01 ... 1.19 -0.15]
 ...
 [-0.25 1.19 ... 4.34 4.99]
 [0.17 -0.15 ... 4.99 7.11]])

In [4]:
# Which versions of constraints pass?

for _h in [True, False]:
    for _s in [True, False]:
        for _e in [True, False]:
            ps_svm = ProductSpaceSVM(
                pm=pm, h_constraints=_h, s_constraints=_s, e_constraints=_e
            )
            try:
                ps_svm.fit(X, y)
                print(f"H: {_h}, S: {_s}, E: {_e}")
            except:
                pass


H: False, S: True, E: False
H: False, S: False, E: False
