In [38]:
import numpy as np
from scipy.spatial.distance import pdist, squareform
import scipy.io

In [39]:

def minkowski_distance(X, p):
    return squareform(pdist(X, metric='minkowski', p=p))


In [40]:
def stress_function(X, delta, p):
    d_X = minkowski_distance(X, p)
    mask = d_X != 0
    stress = np.sum(((d_X - delta) * mask) ** 2)
    return stress


In [41]:
def guttman_transform(X, delta, p):
    d_X = minkowski_distance(X, p)
    B = np.zeros_like(d_X)
    
    mask = d_X != 0
    B[mask] = -delta[mask] / d_X[mask]
    
    np.fill_diagonal(B, -np.sum(B, axis=1))
    X_new = (1 / len(X)) * (B @ X)
    return X_new



In [42]:
def smacof(X, delta, p, epsilon=1e-6, max_iter=300):
    stress_old = stress_function(X, delta, p)
    for _ in range(max_iter):
        X_new = guttman_transform(X, delta, p)
        stress_new = stress_function(X_new, delta, p)
        
        if abs(stress_new - stress_old) < epsilon:
            break
        stress_old = stress_new
        X = X_new
    
    return X, stress_new


In [43]:
df = scipy.io.loadmat('datasets/leukemia.mat')
df.keys()

dict_keys(['__header__', '__version__', '__globals__', 'geneinfo', 'cancertype', 'btcell', 'leukemia'])

In [44]:
dataset_info = {key: value.shape for key, value in df.items() if isinstance(value, np.ndarray)}
dataset_info

{'geneinfo': (51, 2),
 'cancertype': (72, 1),
 'btcell': (72, 1),
 'leukemia': (50, 72)}

In [45]:
data_matrix = df['leukemia']  

# Compute dissimilarity matrix using Minkowski distance
delta = squareform(pdist(data_matrix, metric='minkowski', p=2))  # Default p=2 for Euclidean

X_init = np.random.rand(delta.shape[0], 2)  # Initial random configuration

for p in [1.5, 2, 7]:
    X_final, stress = smacof(X_init, delta, p)
    print(f"p = {p}: Final stress = {stress}")


p = 1.5: Final stress = 43512054232.94126
p = 2: Final stress = 43884313224.96434
p = 7: Final stress = 52751705362.38033
