In [1]:
import pandas as pd
import numpy as np
import math
from matplotlib import pyplot as plt
from matplotlib import cm

In [2]:
def t_nearest_matr(m, t, X):
    t_nearest = np.ones((m, t), dtype=int) * 1
    for id, row in enumerate(X.T):
        dif = X.T - row # get vector representation-wise differences
        norm_indices = np.argsort(np.linalg.norm(dif, axis = 1))
        t_nearest[id] = norm_indices[1: t + 1] # exclude the row itself
    return t_nearest # returns m x t matrix representing t_nearest

In [3]:
def weight_matr(m, N, X, sigma):
    X = X.T # transpose the data matrix for ease in W_ij calculation
    W = np.zeros((m, m))
    for i in range(m):
        for j in range(m):
            if (i in N[j]) or (j in N[i]):
                W[i][j] = np.exp(-np.linalg.norm(X[i] - X[j]) / (sigma ** 2))
    return W

In [4]:
def diag_matr(m, W):
    D = np.zeros((m, m))
    for i in range(m):
        D[i][i] = np.sum(W[i])
    return D

In [5]:
def get_le_reduced(k, X, W):
    m = len(X[0])
    D = diag_matr(m, W)
    L = D - W

    eigenvalues, eigenvectors = np.linalg.eig(L)
    sorted_indices = np.argsort(eigenvalues)[::-1]
    sorted_eigenvectors = eigenvectors[:, sorted_indices]
    return sorted_eigenvectors[-k:] # smallest k eigenvectors (rows)

In [6]:
def avg_distance_matr(m, t, X, N):
    avgs = np.zeros((m,))
    for l in range(m):
        for j in range(t):
            avgs[l] += np.sum((X.T[l] - X.T[N[l][j]]) ** 2)
    return np.sqrt(avgs) / t

In [7]:
def averaged_laplacian_weight(m, t, X, N, avgs):
    X = X.T
    W = np.zeros((m, m))
    for i in range(m):
        for j in range(m):
            if (i in N[j]) or (j in N[i]):
                W[i][j] = np.exp(-(np.linalg.norm(X[i] - X[j]) ** 2) / (t * avgs[i] * avgs[j]))
    return W

In [8]:
def variable_nbrs_weight(m, t, X, N, avgs):
    z = np.mean(np.exp(1 / avgs))
    radius_matrix = 1 + t * np.exp(1 / avgs) / z
    radius_matrix = radius_matrix.astype(int)
    W = np.zeros((m, m))
    for i in range(m):
        for j in range(m):
            if (i in N[j][:radius_matrix[j]]) or (j in N[i][:radius_matrix[i]]):
                W[i][j] = np.exp(-(np.linalg.norm(X.T[i] - X.T[j]) ** 2) / (t * avgs[i] * avgs[j]))
    return W