In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import os
from os.path import expanduser
home = expanduser("~")

import matplotlib
import scipy
import time

import tn2v

In [2]:
def circles(c=1,circle_density=20,mr=None,noise=0.05):

    x = []
    y = []

    circles = c
    meta_angles = np.linspace(0,2*np.pi,circles+1)[:circles]

    if mr is None:
        meta_radius = 0.51 + .08*(c-4)
    else:
        meta_radius = mr
    inner_radius = 0.20

    for i in range(circles):
        angle_noise = np.random.normal(loc=0,scale=2*np.pi/circle_density*noise,size=circle_density)
        inner_angles = np.linspace(0,2*np.pi,circle_density+1)[:circle_density] + angle_noise

        radius_noise = np.random.normal(loc=0,scale=inner_radius*noise,size=circle_density)
        inner_radii = np.full(circle_density,inner_radius) + radius_noise
        for j in range(circle_density):
            a = meta_radius*np.cos(meta_angles[i]) + inner_radii[j]*np.cos(inner_angles[j])
            b = meta_radius*np.sin(meta_angles[i]) + inner_radii[j]*np.sin(inner_angles[j])

            x.append(a)
            y.append(b)

    return pd.DataFrame([[x[i],y[i]] for i in range(len(x))])

In [3]:
def torus(outer_rad=1.0,inner_rad=0.5,spreader_coeff=0.175,noise=0.05):

    cycle1_min_rad = outer_rad - inner_rad
    cycle1_max_rad = outer_rad + inner_rad

    cycle2_circ = np.pi*2*inner_rad
    cycle1_min_circ = np.pi*2*cycle1_min_rad
    cycle1_max_circ = np.pi*2*cycle1_max_rad

    inner_angle_count = int(cycle2_circ/spreader_coeff)
    inner_angle_increment = 2*np.pi/inner_angle_count

    _inner_angles = np.linspace(0,2*np.pi,inner_angle_count)

    _outer_angles_dict = {}
    for i in range(len(_inner_angles)):
        angle = _inner_angles[i]
        circ = (outer_rad + np.cos(angle)*inner_rad)*2*np.pi
        outer_angle_count = int(circ/spreader_coeff)
        _outer_angles_dict[i] = np.linspace(0,2*np.pi,outer_angle_count)

    x = []
    y = []
    z = []

    for i in range(len(_inner_angles)):
        inner_angle = _inner_angles[i]
        for outer_angle in _outer_angles_dict[i]:
            x.append((outer_rad+inner_rad*np.cos(inner_angle))*np.cos(outer_angle))
            y.append((outer_rad+inner_rad*np.cos(inner_angle))*np.sin(outer_angle))
            z.append(inner_rad*np.sin(inner_angle))
    
    D = pd.DataFrame([x,y,z]).transpose()
    E = pd.DataFrame(np.random.uniform(-noise,noise,D.shape[0]*3).reshape(D.shape[0],3))
    
    return D+E

In [None]:
%%capture
# this command suppresses all the print statements I have in the code — you probably want to use this unless you're troubleshooting

embedding_dimension = 2 # desired embedding dimension




LEN = 5000

#eta_array = [.1 for i in range(LEN+1)]
eta_array = np.linspace(0.005,0.001,LEN+1)
for i in range(100):
    eta_array[i] = 1.0

lambda0 = 1 # for node2vec
lambda1 = 192 # for dim1 homology
lambda2 = 0 # for dim2 homology

L0_array = [lambda0 for i in range(LEN+1)]
L1_array = [lambda1*float(LEN/4+(3*i/4))/float(LEN)*int(i>100) for i in range(LEN+1)]
L2_array = [lambda2 for i in range(LEN+1)]

main_directory = home+'/tn2v_output/'
# local directory for saving output

if not os.path.isdir(main_directory):
    os.mkdir(main_directory)

    

#### GENERATE INPUT DATA
    
# circle number
cn = 8

# circle density
cd = 16

data = circles(cn,cd) # input data, should be a pd.DataFrame

mode = 'pointcloud'
# this describes the input type
# alternatives are 'correlationmatrix' and 'distancematrix'





#### NODE2VEC NBHD GENERATION PARAMETERS

r = 1000 # number of walks generated from each vertex in node2vec
l = 1000 # length of walks generated from each vertex in node2vec

L_array = [l for i in range(LEN+1)]
R_array = [r for i in range(LEN+1)]
P_array = [0 for i in range(LEN+1)]
Q_array = [1 for i in range(LEN+1)]

param_array = [{'l':L_array[i],
                'r':R_array[i],
                'p':P_array[i],
                'q':Q_array[i]}
               for i in range(LEN+1)]

nbhd_regen = None
# this determines how often/if the nbhds are regenerated using the variables above
# if nbhd_regen is None OR l*r > the size of the data set, no nbhd will be generated,
#     and instead the full vector of traversal probability (edge weights or reciprocal distances)
#     will be used instead





#### FURTHER ADJUSTMENT

mbs_array = [int(data.shape[0]*0.125) for i in range(LEN+1)] # mini-batch size — input is in number of data points, adjust the multiplier to choose by percent

grad_old = False

lift_array = [.125 for i in range(LEN+1)] # if you want to lift the PDs before matching, assign here

gpd = None # if you want to GIVE an artificial target PD, assign it here (as a pd.DataFrame)
W1_data = None # if you want to start with a non-random initial state for W1, assign it here




#### OUTPUT SETTINGS

pointcloud_vf_save = np.arange(LEN+1,step=100) # at what epochs do you want to save .png files of the pointcloud with vector fields denoting gradient movement?

pointcloud_data_save = np.arange(LEN+1,step=100) # at what epochs do you want to save .csv files of the current embedding?





project_name = 's1x8_example/'


overwrite = True
if os.path.isdir(main_directory+project_name) and not overwrite:
    
    print('This project directory already exists.')

else:

    X = tn2v.tn2v(
        main_directory=main_directory,
        project_name=project_name,
        data=data,
        mode=mode,
        embedding_dimension=embedding_dimension,
        param_array=param_array,
        l0_array=L0_array,
        l1_array=L1_array,
        l2_array=L2_array,
        eta_array=eta_array,
        LEN=LEN,
        mbs_array=mbs_array,
        lift_array=lift_array,
        gpd=gpd,
        grad_old=grad_old,
        nbhd_regen=nbhd_regen,
        pointcloud_data_save=pointcloud_data_save,
        pointcloud_vf_save=pointcloud_vf_save,
        cpu_gpu='gpu'
    )

In [None]:
%%capture
# this command suppresses the many (excessive) print statements I have in the code — you probably want to use this unless you're troubleshooting

embedding_dimension = 3 # desired embedding dimension






#### SETUP

LEN = 40000

eta_array = np.linspace(0.0025,0.000125,LEN+1)

# note that here and in the L* arrays below, the first 100 steps are left to full power Node2vec in order to rapidly move points to roughly their proper positions before trying to open up topological features
for i in range(100):
    eta_array[i] = 1.0

lambda0 = 1.0 # for node2vec
lambda1 = 256 # for dim1 homology
lambda2 = 784 # for dim2 homology

L0_array = [lambda0 for i in range(LEN+1)]
L1_array = [lambda1*float(LEN/4+(3*i/4))/float(LEN)*int(i>100) for i in range(LEN+1)]
L2_array = [lambda2*float(LEN/4+(3*i/4))/float(LEN)*int(i>100) for i in range(LEN+1)]

main_directory = home+'/tn2v_output/'
# local directory for saving output

if not os.path.isdir(main_directory):
    os.mkdir(main_directory)


    
    
    

#### INPUT DATA
    
data = torus() # input data, should be a pd.DataFrame

mode = 'pointcloud'
# this describes the input type
# alternatives are 'correlationmatrix' and 'distancematrix'

alpha = 0.8
# the distance-to-adjacency-matrix reciprocal parameter






#### NODE2VEC NBHD GENERATION PARAMETERS

r = 1000 # number of walks generated from each vertex in node2vec
l = 1000 # length of walks generated from each vertex in node2vec

L_array = [l for i in range(LEN+1)]
R_array = [r for i in range(LEN+1)]
P_array = [0 for i in range(LEN+1)]
Q_array = [1 for i in range(LEN+1)]

param_array = [{'l':L_array[i],
                'r':R_array[i],
                'p':P_array[i],
                'q':Q_array[i]}
               for i in range(LEN+1)]

nbhd_regen = None
# this determines how often / if the nbhds are regenerated using the variables above
# if nbhd_regen is None OR l*r > the size of the data set, no nbhd will be generated;
# instead the full vector of traversal probability (edge weights or reciprocal distances) will be used
# see the l = infinity remark in the paper for explanation





#### FURTHER ADJUSTMENT

mbs_array = [int(data.shape[0]*0.0625) for i in range(LEN+1)] # mini-batch size — input is in number of data points, adjust the multiplier to choose by percent

lift_array = [0.75 for i in range(LEN+1)] # if you want to lift the PDs before matching, assign here



#### OUTPUT SETTINGS

pointcloud_vf_save = np.arange(LEN+1,step=100) # at what epochs do you want to save .png files of the pointcloud with vector fields denoting gradient movement?

pointcloud_data_save = np.arange(LEN+1,step=100) # at what epochs do you want to save .csv files of the current embedding?





project_name = 'example_torus/'

overwrite = False
if os.path.isdir(main_directory+project_name) and not overwrite:
    print('This project directory already exists.')

else:
    X = tn2v.tn2v(
        main_directory=main_directory,
        project_name=project_name,
        data=data,
        mode=mode,
        embedding_dimension=embedding_dimension,
        param_array=param_array,
        l0_array=L0_array,
        l1_array=L1_array,
        l2_array=L2_array,
        eta_array=eta_array,
        LEN=LEN,
        mbs_array=mbs_array,
        lift_array=lift_array,
        gpd=gpd,
        grad_old=grad_old,
        nbhd_regen=nbhd_regen,
        pointcloud_data_save=pointcloud_data_save,
        pointcloud_vf_save=pointcloud_vf_save,
        alpha=alpha,
        cpu_gpu='gpu'
    )