In [28]:
#generate a toy compositional dataset in 3D and play around with it 
import numpy as np
import matplotlib.pyplot as plt
   
def is_compositional(x):
    if round(sum(x), 5) == 1 and np.all([x_i >= 0 for x_i in x]):
        return True 
    else:
        return False

#define parameters
n = 100
max_entry_size = 10000
min_entry_size = 0

#generate data
random_freqs = np.random.randint(low=min_entry_size, high=max_entry_size, size = (n, 3))
comp_data = list(map(lambda x: [x_i / np.sum(x) for x_i in x], random_freqs))


#check comp_data defines a valid set of compositional data points 
assert (np.all([is_compositional(x) for x in comp_data]))


In [26]:
#Apply PCA naively to the data
from sklearn import decomposition as dc
pca_model = dc.PCA(2)
pca_model.fit(comp_data)
naive_pca = pca_model.transform(comp_data)

plt.scatter(naive_pca[:,0], naive_pca[:,1])
#plt.show()

#apply PCA to standard clr Aitchinson transformation:
clr = np.log(comp_data/) - np.mean(np.log(comp_data))
pca_model = dc.PCA(2)
pca_model.fit(clr)
clr_pca = pca_model.transform(clr)

plt.scatter(clr_pca[:,0], clr_pca[:,1])
#plt.show()

<matplotlib.collections.PathCollection at 0x1ecab2d6ba8>

In [29]:
#apply Hellinger (square root transformation)
hellinger = np.sqrt(comp_data)
pca_model = dc.PCA(2)
pca_model.fit(hellinger)
hellinger_pca = pca_model.transform(hellinger)

plt.scatter(hellinger_pca[:,0], hellinger_pca[:,1])
#plt.show()

#TODO add 3D plots? overlay the axes to see if it agrees with intuition