In [11]:
import matplotlib.pyplot as plt
import numpy as np
import scipy.misc
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import joblib
from joblib import dump, load
import pickle

In [12]:
#X = np.loadtxt("dataset_N2N_BIG.dat")

x_s  = X[:, 0];                                print("x_s =", x_s)
t_s  = X[:, 1];                                print("t_s =", t_s)
T    = X[:, 2];                                print("T =", T)
ni_n = X[:, 3:50];                             print("ni_n =", ni_n)
na_n = X[:, 50:51];                            print("na_n =", na_n)
rho  = X[:, 51:52];                            print("rho =", rho)
v    = X[:, 52:53];                            print("v =", v)
p    = X[:, 53:54];                            print("p =", p)
RDm  = X[:, 54:54+47];                         print("RDm =", RDm)
RDa  = X[:, 54+47:54+47+47];                   print("RDa =", RDa)
RVTm = X[:, 54+47+47:54+47+47+47];             print("RVTm =", RVTm)
RVTa = X[:, 54+47+47+47:54+47+47+47+47];       print("RVTa =", RVTa)
RVV  = X[:, 54+47+47+47+47:54+47+47+47+47+47]; print("RVV =", RVV)

# Concatenate molecular and atomic molar vectors
n_ma_n = np.concatenate((ni_n, na_n), axis=1)

# https://stackoverflow.com/questions/20978757/how-to-append-a-vector-to-a-matrix-in-python
X_train = np.c_[x_s, t_s, T, n_ma_n, rho, v, p] # equivalent to np.hstack([m, v]) or np.column_stack([m, v])
print(X_train.shape)

Y_train = np.c_[RDm, RDa, RVTm, RVTa, RVV]
print(Y_train.shape)

# Split data into train/test
x_train, x_test, y_train, y_test = train_test_split(X_train, Y_train, train_size=0.75, test_size=0.25, random_state=69)

sc_x = StandardScaler()
sc_y = StandardScaler()

sc_x.fit(x_train)
x_train = sc_x.transform(x_train)
x_test  = sc_x.transform(x_test)

sc_y.fit(y_train)
y_train = sc_y.transform(y_train)
y_test  = sc_y.transform(y_test)

dump(sc_x, open('scaler_x_shear.pkl', 'wb'))
dump(sc_y, open('scaler_y_shear.pkl', 'wb'))

print('Training Features Shape:', x_train.shape)
print('Training Labels Shape:',   y_train.shape)
print('Testing Features Shape:',  x_test.shape)
print('Testing Labels Shape:',    y_test.shape)

x_s = [0.0000000e+00 9.2426042e-03 1.8485208e-02 ... 8.4240986e+04 9.4240986e+04
 1.0000000e+05]
t_s = [0.0000000e+00 4.5330581e-08 9.0661162e-08 ... 2.7544133e-02 3.0813817e-02
 3.2696832e-02]
T = [  2038.7001   2038.6866   2038.6731 ... 284057.71   284057.71
 284057.71  ]
ni_n = [[1.0000000e+00 3.0221803e-15 1.3775893e-29 ... 0.0000000e+00
  0.0000000e+00 0.0000000e+00]
 [9.9998478e-01 1.5217419e-05 3.5476132e-10 ... 3.4629382e-32
  1.4441719e-32 4.2054678e-31]
 [9.9996957e-01 3.0432552e-05 1.1487908e-09 ... 4.8388402e-29
  2.0930328e-29 5.9563941e-28]
 ...
 [8.6517563e-07 8.5505245e-07 8.4516998e-07 ... 5.9106635e-07
  5.8788085e-07 5.8479712e-07]
 [8.6517563e-07 8.5505245e-07 8.4516998e-07 ... 5.9106635e-07
  5.8788085e-07 5.8479712e-07]
 [8.6517563e-07 8.5505245e-07 8.4516998e-07 ... 5.9106635e-07
  5.8788085e-07 5.8479712e-07]]
na_n = [[0.0000000e+00]
 [1.4244661e-24]
 [3.5710460e-24]
 ...
 [9.9996717e-01]
 [9.9996717e-01]
 [9.9996717e-01]]
rho = [[0.01924297]
 [0.01924313]
 [0.0

In [8]:
# Define randomized SVD function
def rSVD(X,r,q,p):
    # Step 1: Sample column space of X with P matrix
    ny = X.shape[1]
    P = np.random.randn(ny,r+p)
    Z = X @ P
    for k in range(q):
        Z = X @ (X.T @ Z)

    Q, R = np.linalg.qr(Z,mode='reduced')

    # Step 2: Compute SVD on projected Y = Q.T @ X
    Y = Q.T @ X
    UY, S, VT = np.linalg.svd(Y,full_matrices=0)
    U = Q @ UY

    return U, S, VT

In [None]:
U, S, VT = np.linalg.svd(x_train,full_matrices=0)

In [7]:
r = 400 # Target rank
q = 1   # Power iterations
p = 5   # Oversampling parameter

rU, rS, rVT = rSVD(x_train,r,q,p)

NameError: name 'rSVD' is not defined

In [None]:
plt.semilogy(S,'-o',color='k')
#plt.ylim((10**(-16),10**(4)+1))
#plt.yticks(np.power(10,np.arange(-16,4,4, dtype=float)))
#plt.xticks(np.arange(0,1000,250))
plt.grid()
plt.title('Unrotated Matrix: Spectrum')
plt.show()

plt.figure(1)
plt.semilogy(np.diag(S))
plt.title('Singular Values')
plt.show()

plt.figure(2)
plt.plot(np.cumsum(np.diag(S))/np.sum(np.diag(S)))
plt.title('Singular Values: Cumulative Sum')
plt.show()

fig1 = plt.figure()
ax1 = fig1.add_subplot(121)
ax1.semilogy(S,'-o',color='k')
ax2 = fig1.add_subplot(122)
ax2.plot(np.cumsum(S)/np.sum(S),'-o',color='k')

plt.show()