In [None]:
import pickle
import numpy as np
from utils import load_data, generate_data, preprocess_data, error
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from tqdm import tqdm
from pydmd import DMD, DMDc

import matplotlib.pyplot as plt
#from pylc.utils import format_axis, rcparams, format_axis_im
from mpl_toolkits.axes_grid1 import make_axes_locatable
#rcparams(1)

import warnings
warnings.filterwarnings("ignore", category=RuntimeWarning) 
warnings.filterwarnings("ignore", category=UserWarning)


In [None]:
import pickle
from scipy import sparse
from tensorflow import keras
from tensorflow.keras import layers, models
from keras.callbacks import EarlyStopping
import numpy as np
import spektral
import os
import tensorflow as tf
import pandas
import matplotlib.pyplot as plt
import keras_tuner as kt

### load data

In [None]:
x, u, sites = load_data(holiday=False, pollutant=['SO2', 'PM2.5', 'PM10', 'CO', 'NO2', 'O3'])

x = preprocess_data(x)
u = preprocess_data(u)

x, u = generate_data(x, u, window=4*24, size=31*24, rolling=True)

print([x.shape, u.shape])

In [None]:
x[0][1]

### simple dmd

In [None]:
from sklearn.datasets import make_classification
# define dataset
X, U = make_classification(n_samples=1000, n_features=100, n_informative=10, n_redundant=90, random_state=1)
# summarize the dataset
print(X.shape, y.shape)

In [None]:
def dmd(x, r, n_pca=None, return_A=True):
    
    ss = StandardScaler()
    x_ = ss.fit_transform(x.T).T
    
    if n_pca != None:
        pca = PCA(n_components=n_pca, whiten=True, random_state=0)
        x_ = pca.fit_transform(x_.T).T
    
    # x: m*n, m > n-1 is the often case in DMD
    # r_max = min(m, n-1)
    dmd_ = DMD(svd_rank=r)
    dmd_.fit(x_)
    x_pred = dmd_.reconstructed_data.real

    if n_pca != None:
        x_pred = pca.inverse_transform(x_pred.T).T
    
    x_pred = ss.inverse_transform(x_pred.T).T
    
    if return_A:
        return x_pred, dmd_.eigs
    else:
        return x_pred

In [None]:
x_pred, A_ = dmd(x[284], r=84, n_pca=85)

In [None]:
fig, ax = plt.subplots()

ax.plot(x[284][0], label='True')
ax.plot(x_pred[0], label='Pred')

ax.set_xlabel('Time (h)')
ax.set_ylabel('PM2.5')
ax.set_xlim([0, 95])
ax.legend()
#format_axis(ax)

In [None]:
fig, ax = plt.subplots(figsize=(8,8))

ax.scatter(A_.real, A_.imag, color='b', label='Eigenvalues')
circle = plt.Circle((0, 0), 1, edgecolor='g', linestyle='--', fill=False, label='Unit Circle')
ax.add_patch(circle)
ax.legend(loc=1, fontsize=15)
ax.grid()
ax.set_xlabel('Real Part')
ax.set_ylabel('Imaginary Part')
ax.set_xlim([-1.2, 1.2])
ax.set_ylim([-1.2, 1.2])
#format_axis(ax)
# plt.savefig('../result/figure/A1_eig.png', dpi=300, bbox_inches='tight')

### dmd with control

In [None]:
def dmdc(x, u, r, n_pca=None, std=True):
    
    u = u[:, :-1]
    
    if std == True:
        ss = StandardScaler()
        x_ = ss.fit_transform(x.T).T

        ss1 = StandardScaler()
        u_ = ss1.fit_transform(u.T).T
    else:
        x_ = x
        u_ = u
    
    if n_pca != None:
        pca = PCA(n_components=n_pca, whiten=True, random_state=0)
        x_ = pca.fit_transform(x_.T).T
        
        pca2 = PCA(n_components=n_pca, whiten=True, random_state=1)
        u_ = pca2.fit_transform(u_.T).T
    
    # x: m*n, m > n-1 is the often case in DMD
    # r_max = min(m, n-1)
    dmd_ = DMDc(svd_rank=r)
    dmd_.fit(x_, u_)
    x_pred = dmd_.reconstructed_data().real

    if n_pca != None:
        x_pred = pca.inverse_transform(x_pred.T).T
    
    if std == True:
        x_pred = ss.inverse_transform(x_pred.T).T
    else:
        x_pred = x_pred
    
    return x_pred, dmd_.B

In [None]:
i = 0
x_pred, B_ = dmdc(x[i], u[i], r=90, n_pca=None, std=True)

B = np.array(B_)
# err1_ = error(x[-1], x_pred, mode='percent')

In [None]:
ss1 = StandardScaler()
x_ = ss1.fit_transform(x[0].T).T

In [None]:
fig, ax = plt.subplots()
im = ax.imshow(x_, cmap='coolwarm', vmin=-2, vmax=2)
ax.set_xticks([24 * i for i in range(5)])
ax.set_yticks([34 * i for i in range(7)])
ax.set_yticklabels([])
ax.set_xticklabels([0, 1, 2, 3, 4])
ax.grid(c='k', axis='y')
div = make_axes_locatable(ax)
cax = div.append_axes("right", size="4%", pad="5%")
cb = fig.colorbar(im, cax=cax)
cb.set_label(label='Normalized concentration', size=25)
ax.set_xlabel('Day')

ax.text(-2, 17, r'SO$_2$', va='center', ha='right', rotation=90, fontsize=20)
ax.text(-2, 51, r'PM$_{2.5}$', va='center', ha='right', rotation=90, fontsize=20)
ax.text(-2, 85, r'PM$_{10}$', va='center', ha='right', rotation=90, fontsize=20)
ax.text(-2, 119, r'CO', va='center', ha='right', rotation=90, fontsize=20)
ax.text(-2, 153, r'NO$_2$', va='center', ha='right', rotation=90, fontsize=20)
ax.text(-2, 187, r'O$_3$', va='center', ha='right', rotation=90, fontsize=20)


In [None]:
fig, ax = plt.subplots()
im = ax.imshow(np.corrcoef(x_), cmap='coolwarm', vmin=-1, vmax=1, origin='lower')
ax.set_xticks([34 * i for i in range(7)])
ax.set_yticks([34 * i for i in range(7)])
ax.set_yticklabels([])
ax.set_xticklabels([])
ax.grid(c='k', axis='y')
div = make_axes_locatable(ax)
cax = div.append_axes("right", size="4%", pad="5%")
cb = fig.colorbar(im, cax=cax)
cb.set_label(label='Correlation', size=25)


ax.text(-2, 17, r'SO$_2$', va='center', ha='right', rotation=90, fontsize=20)
ax.text(-2, 51, r'PM$_{2.5}$', va='center', ha='right', rotation=90, fontsize=20)
ax.text(-2, 85, r'PM$_{10}$', va='center', ha='right', rotation=90, fontsize=20)
ax.text(-2, 119, r'CO', va='center', ha='right', rotation=90, fontsize=20)
ax.text(-2, 153, r'NO$_2$', va='center', ha='right', rotation=90, fontsize=20)
ax.text(-2, 187, r'O$_3$', va='center', ha='right', rotation=90, fontsize=20)

ax.text(17, -15, r'SO$_2$', va='bottom', ha='center', rotation=0, fontsize=20)
ax.text(51, -15, r'PM$_{2.5}$', va='bottom', ha='center', rotation=0, fontsize=20)
ax.text(85, -15, r'PM$_{10}$', va='bottom', ha='center', rotation=0, fontsize=20)
ax.text(119, -15, r'CO', va='bottom', ha='center', rotation=0, fontsize=20)
ax.text(153, -15, r'NO$_2$', va='bottom', ha='center', rotation=0, fontsize=20)
ax.text(187, -15, r'O$_3$', va='bottom', ha='center', rotation=0, fontsize=20)


In [None]:
ss1 = StandardScaler()
u_ = ss1.fit_transform(u[0].T).T

In [None]:
fig, ax = plt.subplots()
im = ax.imshow(u_, cmap='coolwarm', vmin=-2, vmax=2)
ax.set_xticks([24 * i for i in range(5)])
ax.set_yticks([34 * i for i in range(7)])
ax.set_yticklabels([])
ax.set_xticklabels([0, 1, 2, 3, 4])
ax.grid(c='k', axis='y')
div = make_axes_locatable(ax)
cax = div.append_axes("right", size="4%", pad="5%")
cb = fig.colorbar(im, cax=cax)
cb.set_label(label='Normalized value', size=25)
ax.set_xlabel('Day')

ax.text(-2, 17, r'd2m', va='center', ha='right', rotation=90, fontsize=20)
ax.text(-2, 51, r't2m', va='center', ha='right', rotation=90, fontsize=20)
ax.text(-2, 85, r'rh', va='center', ha='right', rotation=90, fontsize=20)
ax.text(-2, 119, r'sp', va='center', ha='right', rotation=90, fontsize=20)
ax.text(-2, 153, r'u10', va='center', ha='right', rotation=90, fontsize=20)
ax.text(-2, 187, r'v10', va='center', ha='right', rotation=90, fontsize=20)
ax.text(-2, 221, r'blh', va='center', ha='right', rotation=90, fontsize=20)

In [None]:
fig, ax = plt.subplots()
im = ax.imshow(np.corrcoef(u_), cmap='coolwarm', vmin=-1, vmax=1, origin='lower')
ax.set_xticks([34 * i for i in range(7)])
ax.set_yticks([34 * i for i in range(7)])
ax.set_yticklabels([])
ax.set_xticklabels([])
ax.grid(c='k', axis='y')
div = make_axes_locatable(ax)
cax = div.append_axes("right", size="4%", pad="5%")
cb = fig.colorbar(im, cax=cax)
cb.set_label(label='Correlation', size=25)


ax.text(-2, 17, r'dm2', va='center', ha='right', rotation=90, fontsize=20)
ax.text(-2, 51, r'tm2', va='center', ha='right', rotation=90, fontsize=20)
ax.text(-2, 85, r'rh', va='center', ha='right', rotation=90, fontsize=20)
ax.text(-2, 119, r'sp', va='center', ha='right', rotation=90, fontsize=20)
ax.text(-2, 153, r'u10', va='center', ha='right', rotation=90, fontsize=20)
ax.text(-2, 187, r'v10', va='center', ha='right', rotation=90, fontsize=20)
ax.text(-2, 221, r'blh', va='center', ha='right', rotation=90, fontsize=20)

ax.text(17, -15, r'dm2', va='bottom', ha='center', rotation=0, fontsize=20)
ax.text(51, -15, r'tm2', va='bottom', ha='center', rotation=0, fontsize=20)
ax.text(85, -15, r'rh', va='bottom', ha='center', rotation=0, fontsize=20)
ax.text(119, -15, r'sp', va='bottom', ha='center', rotation=0, fontsize=20)
ax.text(153, -15, r'u10', va='bottom', ha='center', rotation=0, fontsize=20)
ax.text(187, -15, r'v10', va='bottom', ha='center', rotation=0, fontsize=20)
ax.text(221, -15, r'blh', va='bottom', ha='center', rotation=0, fontsize=20);


In [None]:
fig, ax = plt.subplots()
im = ax.imshow(B, cmap='coolwarm', vmin=-.05, vmax=.05, origin='lower')
ax.set_xticks([34 * i for i in range(7)])
ax.set_yticks([34 * i for i in range(6)])
ax.set_yticklabels([])
ax.set_xticklabels([])
ax.grid(c='k', axis='both')
div = make_axes_locatable(ax)
cax = div.append_axes("right", size="4%", pad="5%")
cb = fig.colorbar(im, cax=cax)
cb.set_label(label='B Matrix', size=25)


ax.text(-2, 17, r'SO$_2$', va='center', ha='right', rotation=90, fontsize=20)
ax.text(-2, 51, r'PM$_{2.5}$', va='center', ha='right', rotation=90, fontsize=20)
ax.text(-2, 85, r'PM$_{10}$', va='center', ha='right', rotation=90, fontsize=20)
ax.text(-2, 119, r'CO', va='center', ha='right', rotation=90, fontsize=20)
ax.text(-2, 153, r'NO$_2$', va='center', ha='right', rotation=90, fontsize=20)
ax.text(-2, 187, r'O$_3$', va='center', ha='right', rotation=90, fontsize=20)

ax.text(17, -15, r'dm2', va='bottom', ha='center', rotation=0, fontsize=20)
ax.text(51, -15, r'tm2', va='bottom', ha='center', rotation=0, fontsize=20)
ax.text(85, -15, r'rh', va='bottom', ha='center', rotation=0, fontsize=20)
ax.text(119, -15, r'sp', va='bottom', ha='center', rotation=0, fontsize=20)
ax.text(153, -15, r'u10', va='bottom', ha='center', rotation=0, fontsize=20)
ax.text(187, -15, r'v10', va='bottom', ha='center', rotation=0, fontsize=20)
ax.text(221, -15, r'blh', va='bottom', ha='center', rotation=0, fontsize=20);


In [None]:
i = 0
j = 5

Bs = B[i*34:(i+1)*34, j*34:(j+1)*34]
Bc = np.corrcoef(Bs.T)
plt.imshow(Bc, cmap='coolwarm', vmin=-1, vmax=1)

In [None]:
downtown = ['FTHY', 'WL', 'XZMB', 'GY', 'WSXG', 'NSH', 'YDMN', 'QM', 'ATZX', 'DS', 'TT', 'NZG', 'DSH']
suburban = ['BBXQ', 'MTG', 'GC', 'YG', 'FS', 'DX', 'YZ', 'TZ']
northwest_rural = ['YQ', 'BDL', 'DL', 'CP']
northeast_rural = ['SY', 'HR', 'MY', 'SK', 'PG', 'DGC']
southwest_rural = ['LLH']
southeast_rural = ['YF', 'YLD']

In [None]:
downtown_idx = [np.where(sites == downtown[i])[0][0] for i in range(len(downtown))]
suburban_idx = [np.where(sites == suburban[i])[0][0] for i in range(len(suburban))]
northwest_rural_idx = [np.where(sites == northwest_rural[i])[0][0] for i in range(len(northwest_rural))]
northeast_rural_idx = [np.where(sites == northeast_rural[i])[0][0] for i in range(len(northeast_rural))]
southwest_rural_idx = [np.where(sites == southwest_rural[i])[0][0] for i in range(len(southwest_rural))]
southeast_rural_idx = [np.where(sites == southeast_rural[i])[0][0] for i in range(len(southeast_rural))]

len(downtown_idx), len(suburban_idx), len(northwest_rural_idx), len(northeast_rural_idx), len(southwest_rural_idx), len(southeast_rural_idx)

In [None]:
idx = downtown_idx + suburban_idx + northwest_rural_idx + northeast_rural_idx + southwest_rural_idx + southeast_rural_idx

In [None]:
c1 = Bc[idx]
c2 = c1[:, idx]

In [None]:
fig, ax = plt.subplots()
im = ax.imshow(c2, cmap='coolwarm', vmin=-1, vmax=1)

ax.set_xticks([-0.5, 12.5, 20.5, 24.5, 30.5, 31.5, 33.5])
ax.set_yticks([-0.5, 12.5, 20.5, 24.5, 30.5, 31.5, 33.5])
ax.xaxis.set_ticklabels([])
ax.yaxis.set_ticklabels([])

div = make_axes_locatable(ax)
cax = div.append_axes("right", size="4%", pad="5%")
cb = fig.colorbar(im, cax=cax)

# format_axis_im(ax)
# plt.yticks([12.5, 20.5, 24.5, 30.5, 33.5])
ax.grid(c='k')

In [None]:
len(idx)

### dmd without pca

In [None]:
n_bins = np.arange(-1.0, 10.0, 0.01)
fig, ax = plt.subplots()

for i in np.arange(1, 56+1, 5):
    with open(f'../result/pickle/pca_56_r_{i}_simple.pickle', 'rb') as handle:
        err = pickle.load(handle)
    err_ = err[:, 0]
    n, bins, patches = ax.hist(np.log10(err_), n_bins, density=True, histtype='step',
                               cumulative=True, label='PCA', linewidth=2)
ax.set_xlim([-1, 10])
# ax.legend(loc=4)

ax.set_xlabel('log10 Error')
ax.set_ylabel('Cumulative Frequency')
#format_axis(ax)

In [None]:
with open(f'../result/pickle/pca_56_r_1_simple.pickle', 'rb') as handle:
    err = pickle.load(handle)
err[:, 0]

In [None]:
with open(f'../result/pickle/pca_56_r_6_simple.pickle', 'rb') as handle:
    err = pickle.load(handle)
err[:, 0]

In [None]:
i = 372
x_true1, x_pred1, x_forecast1, x_future1, cond1_, eig1_ = dmd(x[i], x[i+24], r=85, n_pca=86)
x_true2, x_pred2, x_forecast2, x_future2, cond2_, eig2_ = dmd(x[i], x[i+24], r=85, n_pca=None)

In [None]:
fig, ax = plt.subplots(figsize=(8, 4), ncols=2)
ax[0].plot(x_true1[1], label='Original')
ax[0].plot(x_pred1[1], label='PCA')

ax[1].plot(x_true1[1], label='Original')
ax[1].plot(x_pred2[1], label='No PCA')

for ax_ in ax:
    ax_.set_xlabel('Time (h)')
    ax_.set_ylabel('PM2.5')
    ax_.set_xlim([0, 94])
    ax_.legend()
    #format_axis(ax_)
plt.tight_layout()
plt.savefig('../result/figure/pca_worst.png', dpi=300, bbox_inches='tight')

In [None]:
i = 372
x_true1, x_pred1, x_forecast1, x_future1, cond1_, eig1_ = dmd(x[i], x[i+24], r=57, n_pca=None)
x_true2, x_pred2, x_forecast2, x_future2, cond2_, eig2_ = dmd(x[i], x[i+24], r=58, n_pca=None)
x_true3, x_pred3, x_forecast3, x_future3, cond3_, eig3_ = dmd(x[i], x[i+24], r=85, n_pca=None)

In [None]:
fig, ax = plt.subplots(figsize=(12, 4), ncols=3)
ax[0].plot(x_true1[1], label='Original')
ax[0].plot(x_pred1[1], label='No PCA r=57')

ax[1].plot(x_true1[1], label='Original')
ax[1].plot(x_pred2[1], label='No PCA r=58')

ax[2].plot(x_true1[1], label='Original')
ax[2].plot(x_pred3[1], label='No PCA r=85')

for ax_ in ax:
    ax_.set_xlabel('Time (h)')
    ax_.set_ylabel('PM2.5')
    ax_.set_xlim([0, 94])
    ax_.legend()
    #format_axis(ax_)
plt.tight_layout()
plt.savefig('../result/figure/pca_worst_nochange.png', dpi=300, bbox_inches='tight')

In [None]:
n_bins = np.arange(-1.0, 10.0, 0.01)

fig, ax = plt.subplots()
n, bins, patches = ax.hist(np.log10(erf1), n_bins, density=True, histtype='step',
                           cumulative=True, label='PCA', linewidth=2)
n, bins, patches = ax.hist(np.log10(erf2), n_bins, density=True, histtype='step',
                           cumulative=True, label='No PCA', linewidth=2)
ax.set_xlim([-1, 2])
ax.legend(loc=4)

ax.set_xlabel('log10 Error')
ax.set_ylabel('Cumulative Frequency')
#format_axis(ax)
plt.savefig('../result/figure/error_forecast.png', dpi=300, bbox_inches='tight')

In [None]:
x_ = x[0]
xmax_ = np.max([x_.max(), np.abs(x_.min())])
fig, ax = plt.subplots()
im = ax.imshow(x_, vmin=-xmax_, vmax=xmax_, cmap='coolwarm')

divider = make_axes_locatable(ax)
cax = divider.append_axes('right', size='5%', pad=0.05)

fig.colorbar(im, cax=cax, orientation='vertical')

ax.set_xlabel('Time (h)')
ax.set_ylabel('Contaminant ID')
#format_axis(ax)

plt.savefig('../result/figure/cond_original.png', dpi=300, bbox_inches='tight')
plt.show()

dmd_ = DMD(svd_rank=85)
dmd_.fit(x_)
A_ = dmd_.eigs

print(np.linalg.cond(x_))

fig, ax = plt.subplots(figsize=(8,8))
ax.scatter(A_.real, A_.imag, color='b', label='Eigenvalues')
circle = plt.Circle((0, 0), 1, edgecolor='g', linestyle='--', fill=False, label='Unit Circle')
ax.add_patch(circle)
ax.legend(loc=1)
ax.grid()
ax.set_xlabel('Real Part')
ax.set_ylabel('Imaginary Part')
ax.set_xlim([-1.2, 1.2])
ax.set_ylim([-1.2, 1.2])
#format_axis(ax)
plt.savefig('../result/figure/A1_eig.png', dpi=300, bbox_inches='tight')

In [None]:
ss = StandardScaler()
x_ = ss.fit_transform(x_.T).T

xmax_ = np.max([x_.max(), np.abs(x_.min())])
fig, ax = plt.subplots()
im = ax.imshow(x_, vmin=-xmax_, vmax=xmax_, cmap='coolwarm')

divider = make_axes_locatable(ax)
cax = divider.append_axes('right', size='5%', pad=0.05)

fig.colorbar(im, cax=cax, orientation='vertical')

ax.set_xlabel('Time (h)')
ax.set_ylabel('Contaminant ID')
#format_axis(ax)

plt.savefig('../result/figure/cond_original.png', dpi=300, bbox_inches='tight')
plt.show()
print(np.linalg.cond(x_))
print(np.linalg.svd(x_)[1][1])

dmd_ = DMD(svd_rank=85)
dmd_.fit(x_)
A2_ = dmd_.eigs

fig, ax = plt.subplots(figsize=(8,8))
ax.scatter(A2_.real, A2_.imag, color='b', label='Eigenvalues')
circle = plt.Circle((0, 0), 1, edgecolor='g', linestyle='--', fill=False, label='Unit Circle')
ax.add_patch(circle)
ax.legend(loc=1)
ax.grid()
ax.set_xlabel('Real Part')
ax.set_ylabel('Imaginary Part')
ax.set_xlim([-1.2, 1.2])
ax.set_ylim([-1.2, 1.2])
#format_axis(ax)
plt.savefig('../result/figure/A2_eig.png', dpi=300, bbox_inches='tight')

In [None]:
pca = PCA(n_components=96, whiten=True, random_state=0)
x_ = pca.fit_transform(x_.T).T

xmax_ = np.max([x_.max(), np.abs(x_.min())])
fig, ax = plt.subplots()
im = ax.imshow(x_, vmin=-xmax_, vmax=xmax_, cmap='coolwarm')

divider = make_axes_locatable(ax)
cax = divider.append_axes('right', size='5%', pad=0.05)

fig.colorbar(im, cax=cax, orientation='vertical')

ax.set_xlabel('Time (h)')
ax.set_ylabel('PC ID')
#format_axis(ax)

plt.savefig('../result/figure/cond_pca.png', dpi=300, bbox_inches='tight')
plt.show()
print(np.linalg.cond(x_))
print(np.linalg.svd(x_)[1][1])

dmd_ = DMD(svd_rank=85)
dmd_.fit(x_)
A3_ = dmd_.eigs

fig, ax = plt.subplots(figsize=(8,8))
ax.scatter(A3_.real, A3_.imag, color='b', label='Eigenvalues')
circle = plt.Circle((0, 0), 1, edgecolor='g', linestyle='--', fill=False, label='Unit Circle')
ax.add_patch(circle)
ax.legend(loc=1)
ax.grid()
ax.set_xlabel('Real Part')
ax.set_ylabel('Imaginary Part')
ax.set_xlim([-1.2, 1.2])
ax.set_ylim([-1.2, 1.2])
#format_axis(ax)
plt.savefig('../result/figure/A3_eig.png', dpi=300, bbox_inches='tight')

In [None]:
fig, ax = plt.subplots(figsize=(4, 4))
ax.plot(np.linalg.svd(A_)[1])
ax.plot(np.linalg.svd(A2_)[1])
ax.plot(np.linalg.svd(A3_)[1])

In [None]:
fig, ax = plt.subplots(figsize=(4, 4))
ax.plot(sorted(np.abs(np.linalg.norm(A_)), reverse=True), label='Original')
ax.plot(sorted(np.abs(np.linalg.norm(A2_)), reverse=True), label='After Std')
ax.plot(sorted(np.abs(np.linalg.norm(A3_)), reverse=True), label='After PCA')

ax.set_xlim([0, 85])
ax.set_xlabel('Index')
ax.set_ylabel('Eigenvalue')

ax.legend()
#format_axis(ax)

plt.savefig('../result/figure/eig_comp.png', dpi=300, bbox_inches='tight')

In [None]:
fig, ax = plt.subplots(figsize=(4, 4))
ax.plot(sorted(np.abs(np.linalg.eigvals(A_).imag), reverse=True), label='Original')
ax.plot(sorted(np.abs(np.linalg.eigvals(A2_).imag), reverse=True), label='After Std')
ax.plot(sorted(np.abs(np.linalg.eigvals(A3_).imag), reverse=True), label='After PCA')

ax.set_xlim([0, 85])
ax.set_xlabel('Index')
ax.set_ylabel('Eigenvalue')

ax.legend()
#format_axis(ax)

plt.savefig('../result/figure/eig_comp.png', dpi=300, bbox_inches='tight')

In [None]:
n_bins = np.arange(-1.0, 20.0, 0.01)

fig, ax = plt.subplots(figsize=(4, 4))
n, bins, patches = ax.hist(np.log10(cond1), n_bins, density=True, histtype='step',
                           cumulative=True, label='PCA', linewidth=2)
n, bins, patches = ax.hist(np.log10(cond2), n_bins, density=True, histtype='step',
                           cumulative=True, label='No PCA', linewidth=2)
ax.set_xlim([-1, 20])
ax.legend(loc=3)
#format_axis(ax)

ax.set_xlabel('log10 Condition Number')
ax.set_ylabel('Cumulative Frequency')
#format_axis(ax)
plt.savefig('../result/figure/cond.png', dpi=300, bbox_inches='tight')

In [None]:
n_bins = np.arange(0.0, 1.2, 0.001)

fig, ax = plt.subplots(figsize=(4, 4))
n, bins, patches = ax.hist(eig1, n_bins, density=True, histtype='step',
                           cumulative=True, label='PCA', linewidth=2)
n, bins, patches = ax.hist(eig2, n_bins, density=True, histtype='step',
                           cumulative=True, label='No PCA', linewidth=2)
ax.set_xlim([0.9, 1.2])
ax.legend(loc=4)
ax.grid()
#format_axis(ax)

ax.set_xlabel('Eigenvalue')
ax.set_ylabel('Cumulative Frequency')
#format_axis(ax)
plt.savefig('../result/figure/eig.png', dpi=300, bbox_inches='tight')

In [None]:
np.where(np.array(eig1 ) >= 1), len(np.where(np.array(eig2) >= 1)[0]) / len(eig2)

In [None]:
eig2

In [None]:
np.sum(np.array(err1) > np.array(err2))

In [None]:
np.log10(cond2)

In [None]:
x_true, x_pred, x_forecast, cond_ = dmd(x[0], r=85, n_pca=86)

In [None]:
plt.plot(x_pred[0])
plt.plot(x_true[0])

In [None]:
dmd.eigs

In [None]:
from tensorflow import keras
def train(size):
    x_in = layers.Input(shape=(204, ))
    x = layers.Dense(size, activation='selu')(x_in)
    x = layers.Dense(204, activation='sigmoid')(x)
    model = Model(inputs=x_in, outputs=x)
    model.compile(optimizer=Adam(learning_rate=0.001), loss='mse')
    cp = ModelCheckpoint(f'test_{size}.h5', monitor='val_loss',
                     verbose=0, save_best_only=True,
                     mode='min')
    els = EarlyStopping(monitor='val_loss', mode='min', patience=20)
    
    history = model.fit(x_train, x_train, batch_size=128,
                        epochs=500, callbacks=[cp, els],
                        validation_split=0.2, verbose=0, shuffle=True).history

In [None]:
n_components = np.arange(204, 150, -4)
for size in n_components:
    train(size)

In [None]:
fig, ax = plt.subplots()
ax.plot(x_train[-2], label='True')
ax.plot(x_pred[-2], label='Pred')
ax.set_xlabel('Var ID')
ax.set_ylabel('Magnitude')
ax.legend()
ax.grid()
#format_axis(ax)
plt.savefig('encoder.png', bbox_inches='tight', dpi=300)

In [None]:
from tensorflow import keras
def test(size):
    x_in = layers.Input(shape=(204, ))
    x = layers.Dense(size, activation='selu')(x_in)
    x = layers.Dense(204, activation='sigmoid')(x)
    model = Model(inputs=x_in, outputs=x)
    model.compile(optimizer=Adam(learning_rate=0.001), loss='mse')

    model.load_weights(f'test_{size}.h5')
    
    encoder = Model(model.input, model.layers[-2].output)

    decoder_input = layers.Input(shape=(size,))
    decoder = Model(decoder_input, model.layers[-1](decoder_input))

    
    xp = encoder.predict(xs)
    x = generate_data(xp.T, window=48)
    xp2 = xs
    x2 = generate_data(xp2.T, window=48)

    r = int(0.9*x.shape[-1])

    err, err2 = [], []

    for t in range(len(x) - 1):
        x_o, x_r, _ = dmd(x[t], r)
        x2_o, x2_r, _ = dmd(x2[t], r)

        x_o = x2_o.T
        x_r = x_r.T
        x2_r = x2_r.T
        x_r = decoder.predict(x_r)

        err_ = per_error(x_o, x_r)
        err2_ = per_error(x_o, x2_r)

        err.append(err_)
        err2.append(err2_)
        print(f'{t}', end='\r')
    err = np.array(err)
    err2 = np.array(err2)
    err.sort()
    err2.sort()
    err = err[:-2]
    err2 = err2[:-2]
    return err.mean()

In [None]:
errr = []
for size in n_components:
    err = test(size)
    errr.append(err)
# print(f'AE: {err.mean():0.4g}+/-{err.std():0.4g}')
# print(f'Ori: {err2.mean():0.4g}+/-{err2.std():0.4g}')

In [None]:
fig, ax = plt.subplots()
ax.plot(n_components, errr, label='Autoencoder')
ax.plot(n_components, np.ones(len(n_components)) * 0.2526, label='Original')
ax.set_xlabel('Latent Dimension')
ax.set_ylabel('Error')
ax.grid()
ax.legend()
#format_axis(ax)
plt.savefig('ae.png', bbox_inches='tight', dpi=300)

In [None]:
n_components = np.arange(204, 150, -4)
errr = []
for n in n_components:
    pca = PCA(n_components=n)

    xp = pca.fit_transform(xs)

    x = generate_data(xp.T, window=48)
    xp2 = xs
    x2 = generate_data(xp2.T, window=48)
    # print(xp.shape)

    r = int(0.9*x.shape[-1])

    err, err2 = [], []

    for t in range(len(x) - 1):
        x_o, x_r, _ = dmd(x[t], r)
        x2_o, x2_r, _ = dmd(x2[t], r)

        x_o = x2_o.T
        x_r = x_r.T
        x2_r = x2_r.T
        x_r = pca.inverse_transform(x_r)

        err_ = per_error(x_o, x_r)
        err2_ = per_error(x_o, x2_r)

        err.append(err_)
        err2.append(err2_)
        print(f'{t}', end='\r')
    err = np.array(err)
    err2 = np.array(err2)
    err.sort()
    err2.sort()
    err = err[:-2]
    err2 = err2[:-2]
    errr.append(err.mean())
    print(f'AE: {err.mean():0.4g}+/-{err.std():0.4g}')
    print(f'Ori: {err2.mean():0.4g}+/-{err2.std():0.4g}')

In [None]:
fig, ax = plt.subplots()
ax.plot(n_components, errr, label='PCA')
ax.plot(n_components, np.ones(len(n_components)) * 0.2526, label='Original')
ax.set_xlabel('# PC Components')
ax.set_ylabel('Error')
ax.grid()
ax.legend()
#format_axis(ax)
plt.savefig('pca.png', bbox_inches='tight', dpi=300)

In [None]:
pca = PCA(n_components=190)
xp = pca.fit_transform(xs)

In [None]:
fig, ax = plt.subplots()
mat = np.corrcoef(xs.T)
vmax = np.max(np.abs(mat))
ax.matshow(mat, cmap='coolwarm', vmin=-vmax, vmax=vmax)
#format_axis(ax)
plt.savefig('corr.png', bbox_inches='tight', dpi=300)

In [None]:
fig, ax = plt.subplots()
mat = np.corrcoef(xp.T)
vmax = np.max(np.abs(mat))
ax.matshow(mat, cmap='coolwarm', vmin=-vmax, vmax=vmax)
#format_axis(ax)
plt.savefig('corr_pca.png', bbox_inches='tight', dpi=300)

In [None]:
size=200
x_in = layers.Input(shape=(204, ))
x = layers.Dense(size, activation='selu')(x_in)
x = layers.Dense(204, activation='sigmoid')(x)
model = Model(inputs=x_in, outputs=x)
model.compile(optimizer=Adam(learning_rate=0.001), loss='mse')

model.load_weights(f'test_{size}.h5')

encoder = Model(model.input, model.layers[-2].output)

decoder_input = layers.Input(shape=(size,))
decoder = Model(decoder_input, model.layers[-1](decoder_input))


xp2 = encoder.predict(xs)

In [None]:
fig, ax = plt.subplots()
mat = np.corrcoef(xp2.T)
vmax = np.max(np.abs(mat))
ax.matshow(mat, cmap='coolwarm', vmin=-vmax, vmax=vmax)
#format_axis(ax)
plt.savefig('corr_ae.png', bbox_inches='tight', dpi=300)

In [None]:
fig, ax = plt.subplots()
ax.plot(xs[:, 0], label='Original')
ax.set_xlabel('Time (hr)')
ax.legend()
#format_axis(ax)
plt.savefig('signal.png', bbox_inches='tight', dpi=300)
# ax.plot(xp[:, 0])
# ax.plot(xp2[:, 0])

In [None]:
fig, ax = plt.subplots()
ax.plot(xp2[:, 0], label='Autoencoder')
ax.set_xlabel('Time (hr)')
ax.legend()
#format_axis(ax)
plt.savefig('signal_ae.png', bbox_inches='tight', dpi=300)
# ax.plot(xp[:, 0])
# ax.plot(xp2[:, 0])

In [None]:
fig, ax = plt.subplots()
ax.plot(xp[:, 0], label='PCA')
ax.set_xlabel('Time (hr)')
ax.legend()
#format_axis(ax)
plt.savefig('signal_pca.png', bbox_inches='tight', dpi=300)
# ax.plot(xp[:, 0])
# ax.plot(xp2[:, 0])

In [None]:
class Sampling(layers.Layer):
    """Uses (z_mean, z_log_var) to sample z, the vector encoding a digit."""

    def call(self, inputs):
        z_mean, z_log_var = inputs
        batch = tf.shape(z_mean)[0]
        dim = tf.shape(z_mean)[1]
        epsilon = tf.keras.backend.random_normal(shape=(batch, dim))
        return z_mean + tf.exp(0.5 * z_log_var) * epsilon

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

In [None]:
latent_dim = 102

encoder_inputs = keras.Input(shape=(204, ))
x = layers.Dense(102, activation="leaky_relu")(encoder_inputs)
z_mean = layers.Dense(latent_dim, name="z_mean")(x)
z_log_var = layers.Dense(latent_dim, name="z_log_var")(x)
z = Sampling()([z_mean, z_log_var])
encoder = keras.Model(encoder_inputs, [z_mean, z_log_var, z], name="encoder")
# encoder.summary()

In [None]:
latent_inputs = keras.Input(shape=(latent_dim,))
decoder_outputs = layers.Dense(204, activation="sigmoid")(latent_inputs)
decoder = keras.Model(latent_inputs, decoder_outputs, name="decoder")
# decoder.summary()

In [None]:
class VAE(keras.Model):
    def __init__(self, encoder, decoder, **kwargs):
        super(VAE, self).__init__(**kwargs)
        self.encoder = encoder
        self.decoder = decoder
        self.total_loss_tracker = keras.metrics.Mean(name="total_loss")
        self.reconstruction_loss_tracker = keras.metrics.Mean(
            name="reconstruction_loss"
        )
        self.kl_loss_tracker = keras.metrics.Mean(name="kl_loss")

    @property
    def metrics(self):
        return [
            self.total_loss_tracker,
            self.reconstruction_loss_tracker,
            self.kl_loss_tracker,
        ]

    def train_step(self, data):
        with tf.GradientTape() as tape:
            z_mean, z_log_var, z = self.encoder(data)
            reconstruction = self.decoder(z)
            reconstruction_loss = tf.reduce_mean(
                tf.reduce_sum(
                    keras.losses.binary_crossentropy(data, reconstruction)
                )
            )
            kl_loss = -0.5 * (1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var))
            kl_loss = tf.reduce_mean(tf.reduce_sum(kl_loss, axis=1))
            total_loss = reconstruction_loss + kl_loss
        grads = tape.gradient(total_loss, self.trainable_weights)
        self.optimizer.apply_gradients(zip(grads, self.trainable_weights))
        self.total_loss_tracker.update_state(total_loss)
        self.reconstruction_loss_tracker.update_state(reconstruction_loss)
        self.kl_loss_tracker.update_state(kl_loss)
        return {
            "loss": self.total_loss_tracker.result(),
            "reconstruction_loss": self.reconstruction_loss_tracker.result(),
            "kl_loss": self.kl_loss_tracker.result(),
        }

In [None]:
vae = VAE(encoder, decoder)
vae.compile(optimizer=keras.optimizers.Adam())
vae.fit(x_train, epochs=500, batch_size=128)

In [None]:
x_pred = vae.decoder.predict(vae.encoder.predict(x_train)[0])

In [None]:
idx = 26
plt.plot(x_pred[idx])
plt.plot(x_train[idx])

In [None]:
(x_train, _), (x_test, _) = keras.datasets.mnist.load_data()
mnist_digits = np.concatenate([x_train, x_test], axis=0)
mnist_digits = np.expand_dims(mnist_digits, -1).astype("float32") / 255

In [None]:
mnist_digits.shape

In [None]:
xp = xs
x = generate_data(xp.T, window=48)
print(xp.shape)

r = int(0.9*x.shape[-1])

err = []

for t in range(len(x) - 1):
    if t != 40:
        x_o, x_r, _ = dmd(x[t], r)
        x_o = x_o.T
        x_r = x_r.T

    #     x_o = decoder.predict(x_o)
    #     x_r = decoder.predict(x_r)
        err_ = per_error(x_o, x_r)

        err.append(err_)
        print(f'{t}', end='\r')
err = np.array(err)
print(f'Reconstruction Error: {err.mean():0.4g}+/-{err.std():0.4g}')

In [None]:
plt.plot(err)
print(np.argmax(err))

In [None]:
x_o, x_r, _ = dmd(x[40], r)
x_o = x_o.T
x_r = x_r.T

In [None]:
plt.plot(x_o[:, 2])
plt.plot(x_r[:, 2])

feature matrix (8760, 204) = (time, # features)

previously, PCA --> hidden dimension --> DMD with truncation --> inverse transform

tool 
1. can map to a hidden dimension
2. have inverse transform


tool 1: PCA
tool 2: autoencoder

autoencoder

autoencoder --> (204 -> 32) -> 204 (f1)
f1^(-1) 32-->204 

In [None]:
xs.shape

In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import ModelCheckpoint

In [None]:
def nn():
    x_in = Input((204, ))
    x = Dense(64, activation='relu')(x_in)
    x = Dense(64, activation='relu')(x)
    x_out = Dense(204, activation='linear')(x)
    model = Model(inputs=x_in, outputs=x_out)
    return model

In [None]:
model = nn()
model.compile(optimizer=Adam(), loss='mse')
mcp = ModelCheckpoint('test.h5', mode='min')
model.fit(xs, xs, epochs=100, batch_size=32, callbacks=[mcp])