In [103]:
import warnings
warnings.filterwarnings('ignore')

import numpy as np
from sklearn.manifold import smacof
from distance_utils import *
from sklearn.metrics import pairwise_distances

In [112]:
# Create a dataset
X = np.random.random_integers(-2000,2000, size=(900, 100))
# X = X/np.linalg.norm(X)

Y = np.random.random_integers(-2000,2000, size=(900, 10))
# Y = Y/np.linalg.norm(Y)

In [63]:
Y

array([[ -207, -1834, -1995, ...,     0,     0,     0],
       [ 1604,  -222,   396, ...,     0,     0,     0],
       [-1355, -1273,  1022, ...,     0,     0,     0],
       ...,
       [  158, -1032,   278, ...,     0,     0,     0],
       [-1699,  1303,  -755, ...,     0,     0,     0],
       [ 1012,  1100,  -732, ...,     0,     0,     0]])

In [36]:
# Perform multidimensional scaling
_,stress = smacof(n_components=2, dissimilarities=X, normalized_stress=True, metric=False)
stress


0.29710265549436726

In [86]:
import random
r = np.array([random.randrange(1, 1000) for _ in range(0, 1000)])
c = r[:, None]
dists = np.abs(r - r[:, None])

In [102]:
np.abs(X - X[:,None]).shape

(900, 900, 100)

In [113]:
dist = pairwise_distances(X)

In [111]:
dist

array([[0.        , 0.05006091, 0.04686859, ..., 0.04974494, 0.05032484,
        0.04944815],
       [0.05006091, 0.        , 0.04683941, ..., 0.04461266, 0.04390541,
        0.04992505],
       [0.04686859, 0.04683941, 0.        , ..., 0.04553586, 0.04847867,
        0.04662325],
       ...,
       [0.04974494, 0.04461266, 0.04553586, ..., 0.        , 0.04410017,
        0.04792273],
       [0.05032484, 0.04390541, 0.04847867, ..., 0.04410017, 0.        ,
        0.05065715],
       [0.04944815, 0.04992505, 0.04662325, ..., 0.04792273, 0.05065715,
        0.        ]])

In [163]:
import scipy
import os
from distance_utils import featurenormalize
data = scipy.io.loadmat('../../Store/arrays.mat')
X = data["cm_features"]
X_norm = featurenormalize(X)

In [164]:
dissimilarities = pairwise_distances(X_norm)

In [165]:
%%time
_,stress = smacof(dissimilarities=dissimilarities, init = X_norm, normalized_stress=True, metric=False)
stress

CPU times: user 26.9 s, sys: 1.93 s, total: 28.9 s
Wall time: 24.1 s


2.2106663159274273e-16

In [157]:
data.keys()

dict_keys(['__header__', '__version__', '__globals__', 'labels', 'cm_features', 'hog_features', 'avgpool_features', 'layer3_features', 'fc_features', 'resnet_features'])

In [166]:
%%time
from sklearn.decomposition import PCA
pca = PCA(n_components="mle")
pca.fit(X_norm)
Y = pca.transform(X_norm)

In [169]:
Y.shape

(4339, 873)

In [170]:
dissimilarities_Y = pairwise_distances(Y)

In [171]:
_,stress_Y = smacof(dissimilarities=dissimilarities_Y, init = Y, normalized_stress=True, metric=False)
stress_Y

7.684351312877257e-13

In [172]:
stress>stress_Y

False

In [178]:
import scipy
import os
from sklearn.decomposition import PCA
from distance_utils import featurenormalize
from tqdm import tqdm
data = scipy.io.loadmat('../../Store/arrays.mat')
hashmap = {}
features = ['cm_features', 'hog_features', 'avgpool_features', 'layer3_features', 'fc_features', 'resnet_features']
for feature in tqdm(features):
    X = data[feature]
    X_norm = featurenormalize(X)
    dissimilarities = pairwise_distances(X_norm)
    _,stress = smacof(dissimilarities=dissimilarities, init = X_norm, normalized_stress=True, metric=False)
    
    pca = PCA(n_components='mle')
    pca.fit(X_norm)
    Y = pca.transform(X_norm)
    
    dissimilarities_Y = pairwise_distances(Y)
    _,stress_Y = smacof(dissimilarities=dissimilarities_Y, init = Y, normalized_stress=True, metric=False)
    
    hashmap[feature] = [stress, stress_Y]

100%|█████████████████████████████████████████████████████████████████████████████| 6/6 [05:31<00:00, 55.22s/it]


In [174]:
hashmap

{'cm_features': [2.2106663159274273e-16, 7.684351312877257e-13],
 'hog_features': [2.191922842555055e-16, 6.649818965363446e-16],
 'avgpool_features': [4.560424932506581e-16, 6.679384729686748e-16],
 'layer3_features': [8.89857826226048e-16, 1.33317000083465e-15],
 'fc_features': [2.1929855952513896e-16, 1.8007375654995253e-17],
 'resnet_features': [3.499290615133032e-16, 2.3359910350527583e-16]}

In [176]:
for k,v in hashmap.items():
    print(f"{k}: Stress Reduction: {v[1] < v[0]}")

cm_features: Stress Reduction: False
hog_features: Stress Reduction: False
avgpool_features: Stress Reduction: False
layer3_features: Stress Reduction: False
fc_features: Stress Reduction: True
resnet_features: Stress Reduction: True


In [177]:
(hashmap["fc_features"][1] - hashmap["fc_features"][0]) - (hashmap["resnet_features"][1] - hashmap["resnet_features"][0])

-8.496122586211633e-17

In [179]:
hashmap

{'cm_features': [2.2106663159274273e-16, 1.4421554731602732e-12],
 'hog_features': [2.191922842555055e-16, 1.1095524473019442e-15],
 'avgpool_features': [4.560424932506581e-16, 4.568272518781918e-16],
 'layer3_features': [8.89857826226048e-16, 1.6507910727695148e-16],
 'fc_features': [2.1929855952513896e-16, 6.674532021242145e-16],
 'resnet_features': [3.499290615133032e-16, 3.514499956956765e-16]}

In [181]:
data = scipy.io.loadmat('../../Store/arrays.mat')
fc = data["cm_features"]
fc.shape

(4339, 900)

In [185]:
labels= data["labels"]

In [252]:
import tensorflow as tf
index = tf.argmax(labels, axis=1).numpy()


array([  0,   0,   0, ..., 100, 100, 100])

In [295]:
import numpy as np
from tqdm import tqdm
def get_labelled_features():
    data = scipy.io.loadmat(ROOT_DIR+'/Store/arrays.mat')
    if os.path.exists(os.path.join(ROOT_DIR, "Store/latent_dim_label.mat")):
        return loadmat(os.path.join(ROOT_DIR, "Store/latent_dim_label.mat"))
    labels = tf.argmax(data["labels"], axis=1).numpy()
    features = ['cm_features', 'hog_features', 'avgpool_features', 'layer3_features', 'fc_features', 'resnet_features'] 

    label_features = {_label:{feature:None for feature in features} for _label in range(101)}

    for idx in tqdm(range(len(labels))):
        for feature in features:
            if not isinstance(label_features[labels[idx]][feature], np.ndarray):
                label_features[labels[idx]][feature] = data[feature][idx]
            else:
                label_features[labels[idx]][feature] = np.vstack((label_features[labels[idx]][feature], data[feature][idx]))
    savemat(os.path.join(ROOT_DIR, "Store/latent_dim_label.mat"), label_features)
    return label_features
    
label_features = get_labelled_features(data)

100%|██████████████████████████████████████████████████████████████████████| 4339/4339 [00:15<00:00, 272.29it/s]


In [None]:
def calculate_stress_per_label():
    label_features = os.path.join(ROOT_DIR, "Store/latent_dim_label.mat")
    if os.path.exists(os.path.join(ROOT_DIR, "Store/stress_label.csv")):
        return
        
    df = pd.DataFrame(columns=["Label", "Feature Space", "Original Space", "Latent Space", "delta_stress"])
    pca = PCA(n_components='mle')
    for label in label_features:
            for feature in features:
                ## Calculate Stress for label & feature
                X_norm = featurenormalize(label_features[label][feature])
                dissimilarities = pairwise_distances(X_norm)
                _,stress = smacof(dissimilarities=dissimilarities, init = X_norm, normalized_stress=True, metric=False)
                ## get latent features
                pca.fit(X_norm)
                X_latent = pca.transform(X_norm)
                ## Calculate stress for latent features
                dissimilarities_X_latent = pairwise_distances(X_latent)
                _,stress_X_latent = smacof(dissimilarities=dissimilarities_X_latent, init = X_latent, normalized_stress=True, metric=False)
                ## Store
                stress_data = {"Label": label,"Feature Space": feature, "Original Space": stress, "Latent Space": stress_X_latent, "delta_stress": stress_X_latent - stress}
                df = pd.concat([df, pd.DataFrame([stress_data])], ignore_index=True)
    df.to_csv(os.path.join(ROOT_DIR, "Store/stress_label.csv"), index = False)
    
def get_best_inherent_dim_per_label():
    df = pd.read_csv(os.path.join(ROOT_DIR, "Store/stress_label.csv"))
    best_feature_per_label = []
    for label in range(101):
        df_label = df[df.Label == label)]
        feature = df_label[df_label["Latent Space"] == df_label["Latent Space"].min()]["Feature Space"].values[0]
        best_feature_per_label.append((label, feature))
    return best_feature_per_label


def get_inherent_dim_label(best_feature_per_label):
    data = get_labelled_features()
    inherent_dim_labels = dict()
    for label, feature in best_feature_per_label:
        inherent_dim_labels[label] = (feature, data[label][feature])
    return inherent_dim_labels

In [305]:
label_features[3]['avgpool_features'].shape

(399, 1024)

In [256]:
label_features[]

{0: {'cm_features': None,
  'hog_features': None,
  'avgpool_features': None,
  'layer3_features': None,
  'fc_features': None,
  'resnet_features': None},
 1: {'cm_features': None,
  'hog_features': None,
  'avgpool_features': None,
  'layer3_features': None,
  'fc_features': None,
  'resnet_features': None},
 2: {'cm_features': None,
  'hog_features': None,
  'avgpool_features': None,
  'layer3_features': None,
  'fc_features': None,
  'resnet_features': None},
 3: {'cm_features': None,
  'hog_features': None,
  'avgpool_features': None,
  'layer3_features': None,
  'fc_features': None,
  'resnet_features': None},
 4: {'cm_features': None,
  'hog_features': None,
  'avgpool_features': None,
  'layer3_features': None,
  'fc_features': None,
  'resnet_features': None},
 5: {'cm_features': None,
  'hog_features': None,
  'avgpool_features': None,
  'layer3_features': None,
  'fc_features': None,
  'resnet_features': None},
 6: {'cm_features': None,
  'hog_features': None,
  'avgpool_fea

In [258]:
np.stack((data["cm_features"][0], data["cm_features"][1]))

(2, 900)

In [294]:
x = np.vstack((data['cm_features'][0], data['cm_features'][1]))
x.shape

(2, 900)

In [265]:
isinstance(data['cm_features'][0], np.ndarray)

True

In [288]:
data['cm_features'][0].reshape(1,900)

(1, 900)

In [317]:
from scipy.io import loadmat
data = loadmat("/root/assignments/mwd/CSE515-Project/Store/original_space_features_by_label.mat")

In [318]:
data['0']

array([[(array([[ 9.28100000e+01,  4.67680863e+01,  3.22140643e-01, ...,
                 2.19130000e+02,  3.41094283e+01, -2.91407672e+00],
               [ 2.27300000e+02,  2.83800987e+01, -3.72945393e-01, ...,
                 2.33200000e+02,  2.50259865e+01, -8.08713135e-01],
               [ 8.54500000e+01,  1.52403248e+01, -1.60646664e+00, ...,
                 1.23540000e+02,  3.22469906e+01,  2.11454230e-01],
               ...,
               [ 7.21000000e+01,  1.19167949e+01, -7.92673165e-01, ...,
                 5.93000000e+01,  4.65500806e+01,  9.15679241e-01],
               [ 6.34300000e+01,  3.56463897e+01,  3.16945458e-02, ...,
                 1.04000000e+02,  7.79102047e+00, -3.35311883e+00],
               [ 1.02210000e+02,  4.37541529e+01, -2.56405928e-01, ...,
                 1.63470000e+02,  1.43585898e+01,  7.13910587e-01]]), array([[ 60791.79508903,   6339.78662879,   5264.69275283, ...,
                   262.95411248,   1055.50613378,  12021.22107874],
     

In [319]:
import numpy as np

def pairwise_distances_custom(X, metric='euclidean', **kwargs):
    """
    Calculate pairwise distances between feature descriptors without using cdist.

    Parameters:
    - X: numpy array, shape (n_samples, n_features)
      The input array of feature descriptors.
    - metric: str, optional (default='euclidean')
      The distance metric to use. See the documentation of numpy.linalg.norm for available options.
    - **kwargs: additional keyword arguments
      Additional arguments that are passed to the underlying distance function.

    Returns:
    - dist_matrix: numpy array, shape (n_samples, n_samples)
      The pairwise distance matrix.
    """

    # Ensure X is a numpy array
    X = np.array(X)

    # Calculate pairwise distances without using cdist
    dist_matrix = np.zeros((X.shape[0], X.shape[0]))

    for i in range(X.shape[0]):
        for j in range(i + 1, X.shape[0]):
            distance = np.linalg.norm(X[i] - X[j], ord=kwargs.get('ord', 2))
            dist_matrix[i, j] = distance
            dist_matrix[j, i] = distance

    return dist_matrix



# Example usage:
# feature_descriptors is assumed to be a numpy array of shape (n_samples, n_features)
feature_descriptors = np.array([[1, 2, 3,], [4, 5, 6], [7, 8, 9]])

# Calculate pairwise distances using the custom method
distances = pairwise_distances_custom(feature_descriptors)


# print("Pairwise Distances:")
# print(distances)


(3, 3)