In [1]:
import numpy as np

import pandas as pd

import scipy as sp
import scipy.interpolate

import plotly
import plotly.express as px
import plotly.graph_objects as go
import plotly.offline as py
import plotly.figure_factory as ff

from itertools import combinations

from collections import OrderedDict


from sklearn.neighbors import NearestNeighbors
from sklearn.linear_model import LinearRegression

In [None]:
np.random.seed(23)

n_points = 100

theta = 2*np.pi*np.random.random(n_points)
x = np.cos(theta)
y = np.sin(theta)

a = np.linspace(0,2*np.pi, n_points)

xx = np.cos(np.linspace(0,2*np.pi, 1000))
yy = np.sin(np.linspace(0,2*np.pi, 1000))


x_noise = (np.random.random(n_points)-0.5)*.25
y_noise = (np.random.random(n_points)-0.5)*.25

circle_trace = go.Scatter(x=xx, y=yy, name='circle',
                         line=dict(color='black'),
#                           visible='legendonly',
                          hoverinfo='skip'
)
fig = px.scatter(x=x+x_noise,y=y+y_noise, width=500, height=500)
fig.add_trace(circle_trace)
fig.update_layout(xaxis_range=(-1.2, 1.2), yaxis_range=(-1.2, 1.2))

fig.show()

In [None]:
df = pd.DataFrame({'x':x+x_noise,'y':y+y_noise})

X = np.array([x+x_noise, y+y_noise]).T

n, d = X.shape

nbrs = NearestNeighbors(n_neighbors=d+1).fit(X)
distances, indices = nbrs.kneighbors(X)

In [9]:
x_plus = np.random.normal(size = 50)
x_minus = np.random.normal(size = 50)

In [16]:
dataset_size = 100
dataset_size//2, dataset_size//2 + dataset_size%2

(50, 50)

## first way

sampled over $x$ in $(x,y) \in S^1 \in \mathbb{R}^2$

In [73]:
dataset_size = 100
x_plus = np.random.normal(size = dataset_size//2 + dataset_size%2)
x_minus = np.random.normal(size = dataset_size//2)

In [86]:
x_concat = np.concatenate([x_plus, x_minus])
x_max = np.abs(x_concat).max()


In [87]:
y_plus = np.sqrt(1 - (x_plus/x_max)**2)
y_minus = - np.sqrt(1 - (x_minus/x_max)**2)
y_concat = np.concatenate([y_plus, y_minus])

In [90]:
data = np.array([x_concat/x_max, y_concat]).T
data = data/data.max()

In [91]:
px.scatter(x = data[:,0], y = data[:,1])

In [92]:
theta = np.pi/4
R = np.array([[np.cos(theta), -np.sin(theta)],[np.sin(theta), np.cos(theta)]])


(0.7071067811865476, 0.7071067811865475)

In [68]:
np.linalg.norm(R)

0.9999999999999999

In [69]:
data = R@np.array([x_concat, y_concat])

In [83]:
(data@R).shape

(100, 2)

In [93]:
px.scatter(x = (data@R)[:,0], y = (data@R)[:,1])

## second way

sampled over $\theta \in [0,2\pi]$ for $(\sin(\theta),\cos(\theta)) \in S^1 \subset \mathbb{R}^2$

take $x \sim N(0,1)$ to $\theta \sim N(\mu, \pi/4)$

In [211]:
dataset_size = 200
theta = np.pi/4
theta_plus = np.random.normal(loc = theta,
                          scale = np.pi/4,
                          size = dataset_size//2 + dataset_size%2)
theta_minus = np.random.normal(loc = theta,
                          scale = np.pi/4,
                           size = dataset_size//2)

theta_concat = np.concatenate([theta_plus, theta_minus + np.pi])

x = np.cos(theta_concat)
y = np.sin(theta_concat)

x_noise = (np.random.random(dataset_size)-0.5)*.50
y_noise = (np.random.random(dataset_size)-0.5)*.50
# r_noise = np.random.normal(loc = 1, scale = .5, size = dataset_size)
r_noise = (np.random.random(dataset_size)-0.5)*.5

In [212]:
px.scatter(x=x+x_noise,y=y+y_noise)
# px.scatter(x=r_noise*x, y=r_noise*y)


In [213]:
model = LinearRegression()
model.fit(np.array([x+x_noise]).T, y+y_noise)

min_range = (x+x_noise).min()
max_range = (x+x_noise).max()


x_ols = np.linspace(min_range, max_range, 100)
y_ols = model.predict(x_ols.reshape(-1, 1))


fig = go.Figure([
    go.Scatter(x=x+x_noise, y=y+y_noise, name='$S^1$', mode='markers'),
    go.Scatter(x=x_ols, y=y_ols, name='OLS fit')
])
fig.update_layout(title='intercept: {}, slope: {}'.format(
    round(model.intercept_,4), round(model.coef_[0], 4)),
                 width=500, height=500)
fig.show()

In [214]:
def dsphere(n=100, d=2, r=1, noise=None, ambient=None, seed=None):
    """
    Sample `n` data points on a d-sphere.
    Parameters
    -----------
    n : int
        Number of data points in shape.
    r : float
        Radius of sphere.
    ambient : int, default=None
        Embed the sphere into a space with ambient dimension equal to `ambient`. The sphere is randomly rotated in this high dimensional space.
    seed : int, default=None
        Seed for random state.
    """
    np.random.seed(seed)
    data = np.random.randn(n, d + 1)

    # Normalize points to the sphere
    data = r * data / np.sqrt(np.sum(data ** 2, 1)[:, None])

    if noise:
        data += noise * np.random.randn(*data.shape)

    if ambient:
        assert ambient > d, "Must embed in higher dimensions"
        data = embed(data, ambient)

    return data

In [264]:
S1[:10]

array([[ 0.01159176, -0.78133311],
       [ 0.24541249, -1.01264504],
       [ 0.61089454,  0.82255239],
       [ 1.06380549, -0.16553503],
       [ 0.83720607,  0.57271136],
       [-0.97928376, -0.25445902],
       [-0.55729258, -0.9168292 ],
       [ 0.43857103, -0.74558037],
       [-0.35745903,  1.0214022 ],
       [ 0.03936944, -0.97175597]])

In [224]:
S1 = dsphere(d=1, noise = 0.1)
S2 = dsphere(d=2, noise = 0.1)
dsphere(n=10, d=3, noise = 0.1)

In [261]:
data = np.random.randn(100, 2)
s1_data = data / np.sqrt(np.sum(data ** 2, 1)[:, None])

In [251]:
data[0]

array([-0.24175922, -1.3433436 ])

In [252]:
np.sqrt(np.sum(data ** 2, 1)[:, None])[0]

array([1.36492474])

In [253]:
s1_data[0]

array([-0.17712275, -0.98418877])

In [262]:
fig = go.Figure([
    go.Scatter(x = data[:,0], y = data[:,1], name='$x_i \sim N(0,1)$', mode='markers'),
    go.Scatter(x = s1_data[:,0], y = s1_data[:,1], name='$x_i \mapsto S^1$', mode='markers')
])
fig.update_layout(height = 750, width = 750)
#     title = 'Contraction map from $x_i \sim N(0,1)$ to circle $S^1$ via projection from origin',
fig.show()