In [None]:
# This tests out methodology for doing PCA decomposition and dimensionality
# reduction of empirical data with SVD

# Created 2021 Sept. 18 by E.S.

In [33]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from scipy import linalg

%matplotlib qt

In [2]:
time_series = pd.read_csv("test_time_series_20210917.csv", index_col=0)

In [3]:
time_series.keys()

Index(['0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
       ...
       '295', '296', '297', '298', '299', '300', '301', 'fit_file_name',
       'jd-2459431', 'jd_helio-2459431'],
      dtype='object', length=305)

In [4]:
# make list of brightest stars (and exclude colums with other data)

list_brightest = [] # initialize list of brightest stars
for (columnName, columnData) in time_series.iteritems():
    #print(type(columnData[0]))
    if (columnName != "fit_file_name" and columnName != "jd-2459431" and columnName != "jd_helio-2459431"):
        if (np.median(columnData.values) > 5000):
            #plt.plot(columnData.values)
            #plt.annotate(str(columnName), xy=(2300,np.median(columnData.values)), xycoords="data")
            list_brightest.append(str(columnName))

In [5]:
# select the bright stars we want, and whiten the data

# separate out the photometry from the brightest stars
x = time_series.loc[:, list_brightest].values

# standardize the photometry
x_scaled = StandardScaler().fit_transform(x)

In [10]:
# decompose with SVD

U, s, Vh = linalg.svd(x_scaled)

# turn s into the full sigma matrix
sigma = np.zeros((U.shape[0], Vh.shape[0]))
for i in range(min(U.shape[0], Vh.shape[0])):
    sigma[i, i] = s[i]

In [11]:
# reconstruct original matrix

x_scaled_recon = np.dot(U, np.dot(sigma, Vh))
np.allclose(x_scaled, x_scaled_recon)

True

In [12]:
print(U.shape)
print(sigma.shape)
print(Vh.shape)

(2302, 2302)
(2302, 58)
(58, 58)


In [35]:
# reconstruct original matrix ON LOWER DIMS

N = 20
x_scaled_lower = np.dot(U, np.dot(sigma[:,:N], Vh[:N,:]))
np.allclose(x_scaled, x_scaled_recon)

False

In [36]:
star_to_use = 27

test_photometry_decorr = np.subtract(x_scaled[:,star_to_use],x_scaled_lower[:,star_to_use])
plt.plot(x_scaled[:,star_to_use], label="original")
plt.plot(x_scaled_lower[:,star_to_use], label="lower")
plt.plot(test_photometry_decorr, label="decorrelated")
plt.legend()
plt.show()