In [None]:
#|default_exp experiments.pullback_comparison
## Standard libraries
import os
import math
import numpy as np
import time
from fastcore.all import *
from nbdev.showdoc import *

# Configure environment
os.environ['XLA_PYTHON_CLIENT_PREALLOCATE']='false' # Tells Jax not to hog all of the memory to this process.

## Imports for plotting
import matplotlib.pyplot as plt
%matplotlib inline
from IPython.display import set_matplotlib_formats
# set_matplotlib_formats('svg', 'pdf') # For export
from matplotlib.colors import to_rgba
import seaborn as sns
sns.set()

## Progress bar
from tqdm.auto import tqdm, trange

import torch

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# 1 Comparing Pullback Types
> Encoder? Decoder? Something in between?

Through different pullbacks, one obtains different metrics, with different theoretical connections. Which are empirically best supported? Let's find out.

**Hypothesis**: The encoder pullback will prove itself incapable of constructing curvature. The decoder pullback will work well in low dimensions for manifolds homeomorphic to $R^2$, but much less so for more topologically complex manifolds, or those in higher dimensions.

# Machinery

Here we implement a 'self evaluating dataset' with the intended use:
1. Each iteration yields a pointcloud, on which one can train a custom model (or many) with your own training machinery.
2. After training, you can store the results in the dataset by calling `.update`
3. This repeats for every dataset included in the battery.
4. Then call the `.table` and `.plot` functions to compile results.

In [None]:
#|export
from autometric.self_evaluating_datasets import SelfEvaluatingDataset, metric
from autometric.datasets import Torus, Ellipsoid, Saddle
import torch

class PullbackComparisonDataset(SelfEvaluatingDataset):
    def __init__(self, num_points = 3000):
        datalist = [
            Torus(num_points = num_points),
            Ellipsoid(num_points = num_points),
            Saddle(num_points = num_points),
        ]
        names = ["Torus", "Ellipsoid", "Saddle"]
        result_names = ["Curvature", "Metric Determinant"]
        super().__init__(datalist, names, result_names)
        self.MSE = torch.nn.MSELoss()
    
    def get_item(self,idx):
        X = self.DS[idx].obj.X
        return X
    
    def get_truth(self, result_name, idx):
        DS = self.DS[idx].obj
        match result_name:
            case "Curvature":
                return DS.ks.detach().numpy()
            case "Metric Determinant":
                return DS.manifold.metric_det(DS.intrinsic_coords).detach().numpy()
            case _:
                raise NotImplementedError(f"No such result {result_name}")
            
    def compute(self, metric, result_name, method_name, filter = None):
        # Overwrite this class with your logic. It implements the computation of a single metric for a single method
        d = {}
        for i, dsname in enumerate(self.names):
            d[dsname] = metric(self.labels[result_name][method_name][i], self.labels[result_name]['ground truth'][i])
        if filter is None: # average dataset values
            return np.mean([d[dsname] for dsname in self.names])
        elif filter == "Everything":
            return d
        elif filter in self.names:
            return d[filter]
        else:
            raise NotImplementedError("Invalid filter")
 
    @metric
    def dataset_mse(self, a, b):
        return np.sum(np.square(a - b))
    

# Results

In [None]:
# from autometric.metrics import PullbackMetric
# from autometric.connections import LeviCivitaConnection
# from autometric.manifolds import RiemannianManifold
# PB = PullbackComparisonDataset()

# for p in PB:
#     # train your model here

#     # then get the curvature and metric determinant like this
#     # encoder pullback
#     pbm = PullbackMetric(2, model.encoder) 
#     lcc = LeviCivitaConnection(2, pbm)
#     manifold = RiemannianManifold(2, (1, 1), metric=pbm, connection=lcc)
#     ks = manifold.scalar_curvature(p) # just give p, without encoding, for encoder pullback
#     PB.update(method_name = "encoder pullback", result_name = "Curvature", result = ks)
#     metric_det = manifold.metric_det(p)
#     PB.update(method_name = "encoder pullback", result_name = "Metric Determinant", result = metric_det)

#     pbm = PullbackMetric(2, model.decoder) # switch for encoder pullback
#     lcc = LeviCivitaConnection(2, pbm)
#     manifold = RiemannianManifold(2, (1, 1), metric=pbm, connection=lcc)
#     ks = manifold.scalar_curvature(model.encode(p)) # just give p, without encoding, for encoder pullback
#     PB.update(method_name = "decoder pullback", result_name = "Curvature", result = ks)
#     metric_det = manifold.metric_det(model.encode(p))
#     PB.update(method_name = "decoder pullback", result_name = "Metric Determinant", result = metric_det)


In [None]:
PB = PullbackComparisonDataset()
from autometric.metrics import PullbackMetric
from autometric.connections import LeviCivitaConnection
from autometric.manifolds import RiemannianManifold

for p in PB:
    # train your model here

    # then get the curvature and metric determinant like this
    # encoder pullback
    pbm = PullbackMetric(2, model.encoder) 
    lcc = LeviCivitaConnection(2, pbm)
    manifold = RiemannianManifold(2, (1, 1), metric=pbm, connection=lcc)
    ks = manifold.scalar_curvature(p) # just give p, without encoding, for encoder pullback
    PB.update(method_name = "encoder pullback", result_name = "Curvature", result = torch.rand(len(p)).numpy())
    PB.update(method_name = "encoder pullback", result_name = "Metric Determinant", result = torch.rand(len(p)).numpy())

    pbm = PullbackMetric(2, model.decoder) # switch for encoder pullback
    lcc = LeviCivitaConnection(2, pbm)
    manifold = RiemannianManifold(2, (1, 1), metric=pbm, connection=lcc)
    ks = manifold.scalar_curvature(model.encode(p)) # just give p, without encoding, for encoder pullback
    PB.update(method_name = "random man", result_name = "Metric Determinant", result = torch.rand(len(p)).numpy())

In [None]:
table = PB.table(filter="Torus")

Curvature
              dataset_mse
ground truth     0.000000
random man    7805.900879
Metric Determinant
               dataset_mse
ground truth  0.000000e+00
random man    2.746998e+11


# Conclusion

In [None]:
class GeometricAE:
    def __init__(
        self,
        ambient_dimension,
        latent_dimension,
        model_type, # "distance" or "affinity"
        # extra hyperparameters
    ):
        self.model = None # Get model
        # do stuff
        pass

    def fit(
        self,
        X, # pointcloud with assumed local euclidean distances
        percent_test = 0.3, # train/test split
        n_epochs = 100, # other hyperparams of graph creation, including default phate
    ):
        # Compute PHATE/distances/heat geo
        # Create pytorch PointCloud dataset, tailored to the model, with given train test split. 
        # training loop
        pass
        
    def fit_transform(self, 
                      X, 
                      X_test,
                      n_epochs):
        self.fit()
        return self.encode(X)
        
    def encode(self):
        # Call the encoder function of the model
        pass

    def decode(self):
        pass 
        
    
        
    
        
    
        
    


    

    


	