In [55]:
import numpy as np

In [56]:
# Creating Sample Dataset 
# Linear Alkanes
x = [
    [1,2,3,4,5,6,7,8,9,10],                           # number of carbons
    [-183,-183,-188,-138,-130,-95,-91,-57,-53,-30],   # Melting Point
    [-162,-89,-42,-1,36,69,98,126,151,174],           # Boiling Point
    [-135,-135,-104,-60,-49,-20,-7,12,31,46],         # Flash Point
    [595,515,470,365,260,230,220,205,205,200]         # Autoignition Temperature 
]
x = np.array(x,dtype=float)
x_name = ['Number of Carbons','Melting Point','Boiling Point','Flash Point','Autoignition Temperature']
properties = x.shape[0]
print('properties : ',properties)
compounds = x.shape[1]
print('compounds : ',compounds)

properties :  5
compounds :  10


In [57]:
# Step 1: Standardizing the Data
for i in range(properties):
    mean= np.mean(x[i,:])
    std = np.std(x[i,:])
    x[i] = (x[i]-mean)/std
print(x)

[[-1.5666989  -1.21854359 -0.87038828 -0.52223297 -0.17407766  0.17407766
   0.52223297  0.87038828  1.21854359  1.5666989 ]
 [-1.23008541 -1.23008541 -1.32026763 -0.41844548 -0.27415393  0.35712157
   0.42926734  1.04250641  1.11465218  1.52949037]
 [-1.9089359  -1.2051363  -0.75200505 -0.35672035  0.          0.31815598
   0.59774761  0.86769814  1.1087254   1.33047048]
 [-1.48354674 -1.48354674 -0.98849885 -0.28585023 -0.11018808  0.35292124
   0.56052196  0.86393841  1.16735486  1.40689416]
 [ 1.89714753  1.33188942  1.01393173  0.27203047 -0.4698708  -0.6818426
  -0.75249986 -0.85848575 -0.85848575 -0.89381439]]


In [58]:
# Step 2: Calculate Covariance Matrix
cov = np.cov(x)
print(cov)

[[ 1.11111111  1.08774608  1.09536944  1.09929443 -1.02703755]
 [ 1.08774608  1.11111111  1.05758561  1.09166881 -1.0090178 ]
 [ 1.09536944  1.05758561  1.11111111  1.09203066 -1.07098048]
 [ 1.09929443  1.09166881  1.09203066  1.11111111 -1.05360571]
 [-1.02703755 -1.0090178  -1.07098048 -1.05360571  1.11111111]]


In [53]:
# Step 3: Find the Principal Components
eigenval, eigenvec = np.linalg.eig(cov)
print(eigenval)
print(eigenvec)

[5.38561857e+00 1.22232307e-01 3.77252314e-02 1.90720103e-03
 8.07225085e-03]
[[-0.45021924  0.30164205 -0.43610543 -0.69786008 -0.1706402 ]
 [-0.44492949  0.4993078   0.59077635  0.22880652 -0.38905056]
 [-0.45068956 -0.17178313 -0.57489075  0.62694635 -0.20930448]
 [-0.45242771  0.13640078  0.08673703  0.08314994  0.8730789 ]
 [ 0.43763966  0.78204167 -0.35038931  0.24629909  0.1159588 ]]


In [54]:
# percentage of variance accounted for by each component
var = 100*(eigenval/np.sum(eigenval))
print(var)

In [59]:
# Step 4: Pick the Top Directions & Transform Data
pc= np.zeros((properties,compounds))
for i in range(properties):
    for j in range(compounds):
        for k in range(properties):
            pc[i,j] = pc[i,j] + eigenvec[i,k]*x[k,j]
print(pc)

[[ 1.87838823  1.51116694  0.83838718  0.41753039  0.15275146 -0.23933967
  -0.6290744  -0.9122256  -1.36406515 -1.65351939]
 [-2.12240638 -1.6416088  -1.33687102 -0.35855619  0.09815683  0.63484226
   0.75612641  1.17755482  1.27049007  1.52227198]
 [ 0.68764666  0.24443987  0.21943917  0.27617209  0.15481406  0.04126802
  -0.14382661 -0.34886361 -0.46650551 -0.66458414]
 [ 1.90846062  1.31847606  0.95152434  0.36199114 -0.37803367 -0.56840699
  -0.73625779 -0.8540169  -0.95555622 -1.04818057]
 [-1.12416214 -1.28394608 -1.27581825 -0.46966099 -0.37220797  0.25184731
   0.4056075   1.00540599  1.20447138  1.65846324]]
