In [None]:
%config Completer.use_jedi = False
import glob
import matplotlib
import pickle
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sn
import pandas as pd
import scipy as sp
import sklearn.preprocessing as skp
import sklearn.model_selection as skms
import sklearn.metrics as skm

def rcparams(r=0.5):
    matplotlib.rcParams['font.size'] = 25 * r
    matplotlib.rcParams['xtick.labelsize'] = 20 * r
    matplotlib.rcParams['ytick.labelsize'] = 20 * r
    matplotlib.rcParams['axes.labelsize'] = 25 * r
    matplotlib.rcParams['legend.title_fontsize'] = 17 * r
    matplotlib.rcParams['legend.fontsize'] = 17 * r
    matplotlib.rcParams['axes.axisbelow'] = True
    matplotlib.rcParams['figure.figsize'] = [6 * r, 6 * r]

# format axis

def format_axis(ax):
    from matplotlib.ticker import (AutoMinorLocator)
    ax.xaxis.set_minor_locator(AutoMinorLocator())
    ax.yaxis.set_minor_locator(AutoMinorLocator())
    ax.tick_params(which='both', width=2)
    ax.tick_params(which='major', length=6)
    ax.tick_params(which='minor', length=3)
    ax.locator_params(axis='x', nbins=5)
    ax.locator_params(axis='y', nbins=5)
    for axis in ['top', 'bottom', 'left', 'right']:
        ax.spines[axis].set_linewidth(1.5)


In [None]:
path = '/Users/sjiang87/data_weight/atmospheric/data/Hourly_data_of_Beijing_from_Jinxi_interpolated.csv'
df = pd.read_csv(path)
df.head()
# check naminghttps://confluence.ecmwf.int/display/CKB/ERA5%3A+data+documentation
# https://climserv.ipsl.polytechnique.fr/fr/les-donnees/era-5-4.html
keys = ['ERA5_d2m', 'ERA5_t2m', 'ERA5_rh', 'ERA5_sp', 'ERA5_u10', 'ERA5_v10', 'ERA5_blh', 'SO2', 'PM2.5']
names = ['Dew point at 2 meters',
         'Temperature at 2 meters',
         'Relative humidity',
         'Surface pressure', 
         '10m U wind component', 
         '10m V wind component',
         'Boundary layer height',
         'SO2', 'PM2.5']

In [None]:
df = pd.read_csv(path)
site_num = 0
sites = df['Site'].values
unique_sites = np.unique(sites)
inds = sites == unique_sites[site_num]
df = df[keys].iloc[inds].interpolate().copy()
df = df - df.mean(axis=0)
df = df / df.std(axis=0)
print(f"Looking only at site {unique_sites[site_num]}")

In [None]:
rcparams(0.5)
fig, ax = plt.subplots(nrows=3, ncols=3, figsize=(8, 6))
ax = ax.ravel()
for i, key in enumerate(df.keys()):
    n = len(df[key])
    if i <= 6:
        ax[i].plot(np.arange(n) / 24, df[key], color='k')
    else:
        ax[i].plot(np.arange(n) / 24, np.log(df[key]), color='tab:red')
    ax[i].set_title(names[i], fontsize=12)
    ax[i].set_xlim([0, n / 24])
    ax[i].set_xlabel('Day')
    format_axis(ax[i])
plt.suptitle(f'Site {unique_sites[site_num]}')
plt.tight_layout()
# plt.savefig(f'Site {unique_sites[site_num]}.png', bbox_inches='tight')

In [None]:
fig, ax = plt.subplots()
ax.matshow(df.corr())
ax.set_xticks(np.arange(len(names)))
ax.set_xticklabels(names, rotation=45, ha='left')
ax.set_yticks(np.arange(len(names)))
ax.set_yticklabels(names, rotation=45, va='top')
# format_axis(ax)
plt.savefig(f'Site {unique_sites[site_num]} corr.png', bbox_inches='tight')
plt.show()

In [None]:
data_sites = []
for i in range(34):
    df = pd.read_csv(path)
    site_num = i
    sites = df['Site'].values
    unique_sites = np.unique(sites)
    inds = sites == unique_sites[site_num]
    df = df[keys].iloc[inds].interpolate().copy()
    df = df - df.mean(axis=0)
    df = df / df.std(axis=0)
    so2 = df['SO2']
    pm = df['PM2.5']
    data_sites.append([so2, pm])

In [None]:
rcparams(0.5)
fig, ax = plt.subplots(nrows=6, ncols=6, figsize=(10, 10))
ax = ax.ravel()
for i, site in enumerate(data_sites):
    n = len(site[0])
    ax[i].plot(np.arange(n) / 24, site[0], color='k')
    ax[i].set_title(unique_sites[i], fontsize=12)
    ax[i].set_xlim([0, n / 24])
    format_axis(ax[i])
for i in [-2, -1]:
    ax[i].axis('off')
plt.suptitle('SO2')
plt.tight_layout()
plt.savefig(f'SO2.png', bbox_inches='tight')

In [None]:
rcparams(0.5)
fig, ax = plt.subplots(nrows=6, ncols=6, figsize=(10, 10))
ax = ax.ravel()
for i, site in enumerate(data_sites):
    n = len(site[0])
    ax[i].plot(np.arange(n) / 24, site[1], color='k')
    ax[i].set_title(unique_sites[i], fontsize=12)
    ax[i].set_xlim([0, n / 24])
    format_axis(ax[i])
for i in [-2, -1]:
    ax[i].axis('off')
plt.suptitle('PM2.5')
plt.tight_layout()
plt.savefig(f'PM2.5.png', bbox_inches='tight')

In [None]:
x = df.values.T
x.shape

In [None]:
def generate_date(x, train=96, test=24):
    window = train + test
    n_total = x.shape[1]
    n_sample = n_total - window
    x_train = np.array([x[:, i: i + train] for i in range(n_sample)])
    x_test = np.array([x[:, (i + train): (i + train + test)] for i in range(n_sample)])
    print(f'x train: {x_train.shape}')
    print(f'x test: {x_test.shape}')
    return x_train, x_test

### dmd

In [None]:
def predict(A, x, n):
    x_pred = [x]
    for i in range(n):
        x_pred.append(A @ x)
        x = x_pred[i+1]
    x_pred = np.array(x_pred).T
    return x_pred

i = 100
x_train, x_test = generate_date(x, 96, 24)
x_true = x_total = np.array(x_train[i, -2:, :])
x0 = x_total[:, :-1]
x1 = x_total[:, 1:]
x_future = x_test[i, -2:, :]

def dmd(x0, x1):
    A = x1 @ np.linalg.pinv(x0)
    x_pred = predict(A, x0[:, 0], 119)
    return x_pred
x_pred = dmd(x0, x1)

In [None]:
rcparams(0.75)
fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(8, 4.5))
for i in range(2):
    ax[i].plot(np.arange(96) / 24, x_true[i, :], 'k', label='True')
    ax[i].plot(np.arange(96) / 24, x_pred[i, :96], 'k--', label='Reconst.')
    ax[i].plot(np.arange(96, 120) / 24, x_future[i, :], 'r', label='Future')
    ax[i].plot(np.arange(96, 120) / 24, x_pred[i, 96:], 'r--', label='Predicted')
    ax[i].set_ylabel(df.keys()[-2:][i])
    ax[i].set_xlabel('Day')
    if i == 0:
        ax[i].legend()
    format_axis(ax[i])
plt.tight_layout()
plt.savefig('dmd.png', bbox_inches='tight')

### dmd time delay

In [None]:
def predict(A, x, n):
    x_pred = [x]
    for i in range(n):
        x_pred.append(A @ x)
        x = x_pred[i+1]
    x_pred = np.array(x_pred).T
    return x_pred[-2:]

delay = 6
i = 777 - delay
x_train, x_test = generate_date(x, 96, 24)
x_true = x_total = np.array(x_train[i, -2:, :])


n_state, n_time = x_total.shape
x_delay = np.zeros((2 * delay, n_time - delay))
for j in range(n_time - delay):
    for k in range(delay):
        x_delay[k*2:k*2+2, j] = x_total[:, j + k : j + k + 1].T
x_delay = np.array(x_delay)

x0 = x_delay[:, :-1]
x1 = x_delay[:, 1:]

A = x1 @ np.linalg.pinv(x0)

x_pred = predict(A, x_delay[:, 0], 119)
x_true = x_total = np.array(x_train[i + delay, -2:, :])
x_future = x_test[i + delay, -2:, :]

In [None]:
rcparams(0.75)
fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(8, 4.5))
for i in range(2):
    ax[i].plot(np.arange(96) / 24, x_true[i, :], 'k', label='True')
    ax[i].plot(np.arange(96) / 24, x_pred[i, :96], 'k--', label='Reconst.')
    ax[i].plot(np.arange(96, 120) / 24, x_future[i, :], 'r', label='Future')
    ax[i].plot(np.arange(96, 120) / 24, x_pred[i, 96:], 'r--', label='Predicted')
    ax[i].set_ylabel(df.keys()[-2:][i])
    ax[i].set_xlabel('Day')
    if i == 0:
        ax[i].legend()
    format_axis(ax[i])
plt.tight_layout()
plt.savefig(f'dmdd_{delay}.png', bbox_inches='tight')

### dmdc

In [None]:
def predict(A, B, x, u, n):
    x_pred = [x]
    for i in range(n):
        x_pred.append(A @ x + B @ u[:, i])
        x = x_pred[i+1]
    x_pred = np.array(x_pred).T
    return x_pred

from scipy.sparse.linalg import svds

i = 100
x_train, x_test = generate_date(x, 96, 24)
x_true = x_total = np.array(x_train[i, -2:, :])
u_total = np.array(x_train[i, :-2, :])

x0 = x_total[:, :-1]
x1 = x_total[:, 1:]
u0 = u_total[:, :-1]
u1 = u_total[:, 1:]
x_future = x_test[i, -2:, :]

g = np.concatenate((x0, u0), axis=0)
u, s, v = np.linalg.svd(g, full_matrices=False)

ux = u[:2, :]
uu = u[2:, :]
A = x1 @ v.T / s @ ux.T
B = x1 @ v.T / s @ uu.T
x_future = x_test[i, -2:, :]
u_future = x_test[i, :-2, :]

x_pred = predict(A, B, x_total[:, 0], np.concatenate((u1, u_future), axis=1), 119)

In [None]:
[u.shape, s.shape, v.shape]

In [None]:
rcparams(0.75)
fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(8, 4.5))
for i in range(2):
    ax[i].plot(np.arange(96) / 24, x_true[i, :], 'k', label='True')
    ax[i].plot(np.arange(96) / 24, x_pred[i, :96], 'k--', label='Reconst.')
    ax[i].plot(np.arange(96, 120) / 24, x_future[i, :], 'r', label='Future')
    ax[i].plot(np.arange(96, 120) / 24, x_pred[i, 96:], 'r--', label='Predicted')
    ax[i].set_ylabel(df.keys()[-2:][i])
    ax[i].set_xlabel('Day')
    if i == 0:
        ax[i].legend()
    format_axis(ax[i])
plt.tight_layout()
plt.savefig(f'dmdc.png', bbox_inches='tight')

In [None]:
B.shape

In [None]:
rcparams(0.75)
fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(8, 6))
for i in range(2):
    ax[i].bar(np.arange(7), B[i], color='k')
    ax[i].set_title(df.keys()[-2:][i])
    ax[i].set_xticks(np.arange(7))
    ax[i].set_xticklabels(names[:7], rotation=90, ha='right', fontsize=12)
plt.tight_layout()
plt.savefig(f'B.png', bbox_inches='tight')

### dmdc with time delay

In [None]:
def predict(A, B, x, u, n):
    x_pred = [x]
    for i in range(n):
        x_pred.append(A @ x + B @ u[:, i])
        x = x_pred[i+1]
    x_pred = np.array(x_pred).T
    return x_pred

delay  = 12
i = 100- delay
x_train, x_test = generate_date(x, 96, 24)
x_true = x_total = np.array(x_train[i, -2:, :])
u_total = np.array(x_train[i, :-2, :])
###
n_state, n_time = x_total.shape
x_delay = np.zeros((2 * delay, n_time - delay))
for j in range(n_time - delay):
    for k in range(delay):
        x_delay[k*2:k*2+2, j] = x_total[:, j + k : j + k + 1].T
x_delay = np.array(x_delay)

x0 = x_delay[:, :-1]
x1 = x_delay[:, 1:]
###
n_state, n_time = u_total.shape
u_delay = np.zeros((7 * delay, n_time - delay))
for j in range(n_time - delay):
    for k in range(delay):
        u_delay[k*7:k*7+7, j] = u_total[:, j + k : j + k + 1].T
u_delay = np.array(u_delay)
u0 = u_delay[:, :-1]
u1 = u_delay[:, 1:]
x_future = x_test[i, -2:, :]

g = np.concatenate((x0, u0), axis=0)
u, s, v = np.linalg.svd(g, full_matrices=False)
print(x0.shape)

ux = u[:2*delay, :]
uu = u[2*delay:, :]
A = x1 @ v.T / s @ ux.T
B = x1 @ v.T / s @ uu.T

## 
u_prev = x_train[i, :-2, :]
u_future = x_test[i, :-2, :]
u_future = np.concatenate((u_prev, u_future), axis=1)
n_state, n_time = u_future.shape
u_delay = np.zeros((7 * delay, n_time - delay))
for j in range(n_time - delay):
    for k in range(delay):
        u_delay[k*7:k*7+7, j] = u_future[:, j + k : j + k + 1].T
u_delay = np.array(u_delay)
###

x_pred = predict(A, B, x_delay[:, 0], u_delay, 108)

x_true = x_total = np.array(x_train[i, -2:, :])
x_future = x_test[i, -2:, :]

In [None]:
x_pred = x_pred[:2, :]

In [None]:
err_1 = skm.mean_squared_error(x_true[:, :84], x_pred[:, :84]) ** 0.5
err_2 = skm.mean_squared_error(np.concatenate(([x_true[:, 84:96], x_future[:, :13]]), axis=1), x_pred[:, 84:]) ** 0.5
err_1, err_2

In [None]:
rcparams(0.75)
fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(8, 4.5))
for i in range(2):
    ax[i].plot(np.arange(84) / 24, x_true[i, :84], 'k', label='True')
    ax[i].plot(np.arange(84) / 24, x_pred[i, :84], 'k--', label='Reconst.')
    ax[i].plot(np.arange(84, 108) / 24, np.concatenate([x_true[i, 84:96], x_future[i, :12]]), 'r', label='Future')
    ax[i].plot(np.arange(84, 109) / 24, x_pred[i, 84:], 'r--', label='Predicted')
    ax[i].set_ylabel(df.keys()[-2:][i])
    ax[i].set_xlabel('Day')
    if i == 0:
        ax[i].legend()
    format_axis(ax[i])
plt.tight_layout()

In [None]:
A.shape

In [None]:
u0.shape

In [None]:
u.shape

## 