In [1]:
import numpy as np
import pandas as pd
import matplotlib as plt
from numpy.linalg import eig

In [27]:
file_path ="output_reflectance4.csv"

df = pd.read_csv(file_path, header=0)

feature_names = df.columns[1:] 
data = df.iloc[:9, 1:].values.astype(float)

In [28]:
print(data)
print(data.shape)

[[28.85 25.03 20.18 ...  6.71  6.27  6.25]
 [29.34 24.67 19.33 ...  6.86  7.34  6.4 ]
 [34.83 29.01 22.76 ... 10.23 10.4   9.71]
 ...
 [33.72 28.67 23.52 ...  6.38  6.52  5.94]
 [29.82 24.43 19.3  ...  8.83  8.18  7.89]
 [27.27 22.24 17.17 ...  4.75  5.29  5.26]]
(9, 216)


In [29]:
data_centered = data - np.mean(data, axis=0)
print(data_centered)

[[-2.23888889 -0.90444444 -0.32333333 ... -1.09222222 -1.49222222
  -1.05666667]
 [-1.74888889 -1.26444444 -1.17333333 ... -0.94222222 -0.42222222
  -0.90666667]
 [ 3.74111111  3.07555556  2.25666667 ...  2.42777778  2.63777778
   2.40333333]
 ...
 [ 2.63111111  2.73555556  3.01666667 ... -1.42222222 -1.24222222
  -1.36666667]
 [-1.26888889 -1.50444444 -1.20333333 ...  1.02777778  0.41777778
   0.58333333]
 [-3.81888889 -3.69444444 -3.33333333 ... -3.05222222 -2.47222222
  -2.04666667]]


In [31]:
cov_matrix = np.cov(data_centered, rowvar=False)
print(cov_matrix.shape)

(216, 216)


In [32]:
eigenvalues, eigenvectors = np.linalg.eig(cov_matrix)

In [37]:
print(eigenvalues)

[ 7.50909868e+03+0.00000000e+00j  8.92771081e+02+0.00000000e+00j
  1.42576455e+02+0.00000000e+00j  1.38831620e+01+0.00000000e+00j
  9.89550781e+00+0.00000000e+00j  4.86542822e+00+0.00000000e+00j
  3.15818295e+00+0.00000000e+00j  8.93851611e-01+0.00000000e+00j
 -3.93363258e-13+3.29815075e-13j -3.93363258e-13-3.29815075e-13j
  3.46179760e-13+2.82004262e-13j  3.46179760e-13-2.82004262e-13j
 -7.24475159e-15+3.60401196e-13j -7.24475159e-15-3.60401196e-13j
  3.24593749e-13+0.00000000e+00j  1.32524761e-13+2.75585098e-13j
  1.32524761e-13-2.75585098e-13j -2.70522304e-13+6.59017526e-14j
 -2.70522304e-13-6.59017526e-14j  2.68709747e-13+0.00000000e+00j
 -3.79167862e-14+2.58063755e-13j -3.79167862e-14-2.58063755e-13j
  1.98213543e-13+1.64212916e-13j  1.98213543e-13-1.64212916e-13j
 -1.76558725e-13+1.67160202e-13j -1.76558725e-13-1.67160202e-13j
 -2.04990105e-13+9.49889551e-14j -2.04990105e-13-9.49889551e-14j
 -1.07638759e-14+1.57992064e-13j -1.07638759e-14-1.57992064e-13j
  1.78808946e-13+0.000000

In [38]:
eigenvalues = np.real(eigenvalues)
print(eigenvalues)

[ 7.50909868e+03  8.92771081e+02  1.42576455e+02  1.38831620e+01
  9.89550781e+00  4.86542822e+00  3.15818295e+00  8.93851611e-01
 -3.93363258e-13 -3.93363258e-13  3.46179760e-13  3.46179760e-13
 -7.24475159e-15 -7.24475159e-15  3.24593749e-13  1.32524761e-13
  1.32524761e-13 -2.70522304e-13 -2.70522304e-13  2.68709747e-13
 -3.79167862e-14 -3.79167862e-14  1.98213543e-13  1.98213543e-13
 -1.76558725e-13 -1.76558725e-13 -2.04990105e-13 -2.04990105e-13
 -1.07638759e-14 -1.07638759e-14  1.78808946e-13  1.19538030e-13
  1.19538030e-13 -1.59539879e-13  1.41123039e-13  1.41123039e-13
 -1.46813239e-13 -1.33957014e-13 -1.33957014e-13  1.43968170e-13
  1.43968170e-13  1.42348947e-13  1.32046988e-13  1.32046988e-13
 -1.18955615e-13 -1.18955615e-13 -1.07758560e-13 -1.07758560e-13
 -3.16230876e-14 -3.16230876e-14  9.54621121e-14  9.54621121e-14
  3.85570457e-14  3.85570457e-14  5.40347200e-14  5.40347200e-14
 -4.44554662e-14 -4.44554662e-14  9.38202093e-14  9.38202093e-14
  1.00995380e-13  1.00995

In [39]:
explained_variance_percentage = (eigenvalues / np.sum(eigenvalues)) * 100

In [40]:
results = pd.DataFrame({
    "Feature": feature_names,
    "Explained Variance (%)": explained_variance_percentage
})
print(results)

    Feature  Explained Variance (%)
0     356.0            8.754779e+01
1     366.0            1.040872e+01
2     376.0            1.662284e+00
3     386.0            1.618623e-01
4     396.0            1.153707e-01
..      ...                     ...
211  2466.0            1.009294e-17
212  2476.0            1.009294e-17
213  2486.0            6.072008e-17
214  2496.0            6.072008e-17
215  2506.0            1.001106e-16

[216 rows x 2 columns]


In [42]:
results_sorted = results.sort_values(by="Explained Variance (%)", ascending=False)
results_sorted["Explained Variance (%)"] = results_sorted["Explained Variance (%)"].map("{:.2f}".format)
print(results_sorted)

   Feature Explained Variance (%)
0    356.0                  87.55
1    366.0                  10.41
2    376.0                   1.66
3    386.0                   0.16
4    396.0                   0.12
..     ...                    ...
26   616.0                  -0.00
18   536.0                  -0.00
17   526.0                  -0.00
9    446.0                  -0.00
8    436.0                  -0.00

[216 rows x 2 columns]
