# Cantor

This notebook contains a study for the `cantor` encoding.

In [68]:
import sys
sys.path.append('../')
sys.setrecursionlimit(10000) # Croissant

import numpy as np
import math
import plotly.express as px
import pandas as pd

import tree_lib.util as util
from tree_lib.util import gauss, largest_gauss_binsearch as leq_gauss


In [69]:

# returns the minimum k for which (k*(k+1))/2 >= n
# i.e., left bottom point of the diagonal n is in
def geq_gauss(n):
    lg = leq_gauss(n)
    return lg if gauss(lg) == n else lg+1

# Given a number, returns a 2D point that follows Cantor's 
# zigzag bijection between N and N*N. I don't do zigzag as
# it's more complicated, instead, I enumerate like this
# 1 2 4 7 ...
# 3 5 8 ...
# 6 9 ...
# 10 ...
def cantor2d(n):
    if n == 0:
        return [0,0]
    g = geq_gauss(n)
    gg = gauss(g)
    delta = gg-n # gg must be >= n
    return [delta, g-1-delta]

# Modified version that maps to the 2D subspace for which x>=y 
# 1 x ...
# 2 3 x ...
# 4 5 6 x ...
# 7 8 9 10 x ...
def cantor2d_bisect(n):
    if n == 0:
        return []
    
    g = geq_gauss(n)
    gg = gauss(g)
    return [g-1, g-gg+n-1]

def cantor2d_bisect_inverse(c):
    if c == []:
        return 0
    
    x, y = [a+1 for a in c] # add +1 to both coordinates
    gg = gauss(x)
    n = y-x+gg
    return n

# Unit test
assert(all([cantor2d_bisect_inverse(cantor2d_bisect(i))==i for i in range(0,1000)]))

# Plot
data = [cantor2d(i)+ ["cantor2d"] for i in range(0,gauss(10))]
data += [cantor2d_bisect(i)+ ["cantor2d_bisect"] for i in range(0,gauss(10))]
dff = pd.DataFrame(data, columns=['x', 'y', "type"]) 
fig = px.scatter(dff, x="x", y="y", title="Cantor N => N * N Bijection", color="type")
fig.show()

In [70]:
from tree_lib.encodings import cantor2d

# cantor2d.number_to_cantor2d_decomposition(2)
cantor2d.tree_to_bits(cantor2d.bits_to_tree("101"))

'101'

In [71]:
# Let's go 3D

S = 10
data = [[x,y,z] for x in range(S) for y in range(S) for z in range(S) if x >= y and y >= z]
df3D = pd.DataFrame(data, columns=['x', 'y', "z"]) 
fig3D = px.scatter_3d(df3D, x="x", y="y", z="z", title="3D Ordered Space")
fig3D.show()

In [107]:
def combinations(n, r):
    if r > n:
        return 0

    numerator = 1
    denominator = 1

    for i in range(r):
        numerator *= n - i
        denominator *= i + 1

    return numerator // denominator

# Computes the mth simplex number in k dimension
def simplex(m, k):
   return combinations(m+k-1, k)

# Returns the index of the minimum simplex number in k dimensions greater or equal to n
# i.e., returns the simplex in k-1 dimensions in which the nth number falls
def geq_simplex(n,k):
    assert k >= 2 # At least 2 dimensions

    if n == 0:
        return 0

    f = lambda m: simplex(m,k) 
    lv = util.largest_arg_fitting(f, n, True) # largest value less or equal to n
    return lv if f(lv) == n else lv +1

# Map a number to a K-dimensional array with decreasing coordinates
def simplex_decomposition(n,K):
    # print(f"dec {n}")

    if n == 0:
        return []
    
    dec = [0 for _ in range(K)]
    for k in range(K,1,-1):
        ls = geq_simplex(n,k)
        # print(f"n = {n}, k = {k}, ls = {ls}")
        dec[K-k] = ls-1
        n = simplex(ls, k) - n
        
    dec[K-1] = n
    return dec

print (geq_simplex(3,3))

# Plot

# data = [simplex_decomposition(i, 2) for i in range(1,gauss(10))]
# dff = pd.DataFrame(data, columns=['x', 'y']) 
# fig = px.scatter(dff, x="x", y="y", title="Simplex 2D")
# fig.show()


for i in range(1,10):
     print(i, simplex_decomposition(i, 3))

# data = [simplex_decomposition(i, 3) for i in range(1,gauss(10))]
# df3D = pd.DataFrame(data, columns=['x', 'y', "z"]) 
# fig3D = px.scatter_3d(df3D, x="x", y="y", z="z", title="Simplex 3D")
# fig3D.show()

2


AssertionError: 

In [79]:
# Unit test

def ground_truth(d, k):
    def generate_coordinates(curr, remaining_dim):
        if remaining_dim == 0:
            return [curr[:]]

        start = 0 if not curr else curr[-1]
        result = []
        for i in range(start, d + 1):
            result.extend(generate_coordinates(curr + [i], remaining_dim - 1))
        return result

    res = generate_coordinates([], k)
    for c in res:
        c.reverse()
    return res

data_simplex = [simplex_decomposition(i, 2) for i in range(1,1000)]
print("2D missing",[v for v in ground_truth(5,2) if v not in data_simplex])

data_simplex = [simplex_decomposition(i, 3) for i in range(1,10000)]
print("3D missing",[v for v in ground_truth(5,3) if v not in data_simplex])

data_simplex = [simplex_decomposition(i, 4) for i in range(1,10000)]
print("4D missing",[v for v in ground_truth(5,4) if v not in data_simplex])



2D missing []
3D missing [[1, 1, 0], [2, 2, 0], [3, 3, 0], [4, 4, 0], [5, 5, 0]]
4D missing [[1, 1, 1, 0], [2, 1, 1, 0], [3, 1, 1, 0], [4, 1, 1, 0], [5, 1, 1, 0], [2, 2, 2, 0], [3, 2, 2, 0], [4, 2, 2, 0], [5, 2, 2, 0], [3, 3, 3, 0], [4, 3, 3, 0], [5, 3, 3, 0], [4, 4, 4, 0], [5, 4, 4, 0], [5, 5, 5, 0]]
