# Simulate data

In [None]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import os
import sys
module_path = os.path.abspath(os.path.join('/gpfs/home/nonnenma/projects/emulators/simulators/L96'))
if module_path not in sys.path:
    sys.path.append(module_path)

res_dir = '/gpfs/work/nonnenma/results/emulators/L96/'

dtype = np.float32

In [None]:
from L96_base import f1, f2, J1, J1_init, f1_juliadef, f2_juliadef
from L96_emulator.util import predictor_corrector

F, h, b, c = 10, 1, 10, 10
K,J = 36, 10
T, dt = 605, 0.001

X_init = F * (0.5 + np.random.randn(K*(J+1)) * 1.0) / np.maximum(J,1)
dX_dt = np.empty(X_init.size, dtype=X_init.dtype)
times = np.linspace(0, T, np.floor(T/dt)+1)

if J > 0:
    def fun(t, x):
        return f2(x, F, h, b, c, dX_dt, K, J)
else:
    def fun(t, x):
        return f1(x, F, dX_dt, K)

out = predictor_corrector(fun=fun, y0=X_init.copy(), times=times, alpha=0.5)

# filename for data storage
fn_data = f'out_K{K}_J{J}_T{T}'
np.save(res_dir + 'data/' + fn_data, out.astype(dtype=dtype))

plt.figure(figsize=(8,4))
plt.imshow(out.T, aspect='auto')
plt.xlabel('time')
plt.ylabel('location')
plt.show()

In [None]:
from L96_base import f1, f2, J1, J1_init, f1_juliadef, f2_juliadef
from L96_emulator.util import predictor_corrector

try: 
    print(F, h, b, c, K, J, T, dt)
except: 
    F, h, b, c = 10, 1, 10, 10
    K,J = 36, 10
    T, dt = 605, 0.001

T_ = 5000
T_burnin = int(5./dt) # rough time [s] for model state to have 'forgotten' its initial state

dX_dt = np.empty(K*(J+1), dtype=np.float32)
times = np.arange(0, (T_+1)*dt, 0.5*dt)

if J > 0:
    def fun(t, x):
        return f2(x, F, h, b, c, dX_dt, K, J)
else:
    def fun(t, x):
        return f1(x, F, dX_dt, K)

out2 = predictor_corrector(fun=fun, y0=out[T_burnin], times=times, alpha=0.5)[::2,:]
#out2 = (out2.reshape(-1,J+1,K) - dg_train.mean) / dg_train.std
out2 = out2.reshape(-1,(J+1)*K)

In [None]:
from L96_emulator.dataset import Dataset

temporal_offset = 1
dg_train = Dataset(data=out, J=J, offset=temporal_offset, normalize=True, 
                   start=T_burnin, 
                   end=int(np.floor(out.shape[0]*0.8)))

In [None]:
out2 = (out2.reshape(-1,J+1,K) - dg_train.mean) / dg_train.std
out2 = out2.reshape(-1,(J+1)*K)

In [None]:
plt.plot(np.sqrt(np.mean( (out2 - dg_train[np.arange(T_+1)+ T_burnin].reshape(-1,(J+1)*K))**2, axis=1 )), 'k--',
        label='solver 2x temp. resol. vs sim')
plt.axis([0, 2000, 0, 1.6])
plt.title('missmatch over time')
plt.xlabel('time')
plt.ylabel('RMSE (on z-scored data)')
plt.legend()
plt.show()

# Learn local emulator

In [None]:
#%run -i 'main_train.py'

# Evaluate model fit

In [None]:
import torch 
import numpy as np
from L96_emulator.networks import TinyNetwork, TinyResNet

import os
import sys
module_path = os.path.abspath(os.path.join('/gpfs/home/nonnenma/projects/seasonal_forecasting/code/weatherbench'))
if module_path not in sys.path:
    sys.path.append(module_path)
from src.pytorch.util import init_torch_device

dtype = torch.float32

device = init_torch_device()

K,J = 36, 10
T, dt = 605, 0.001
fn_data = f'out_K{K}_J{J}_T{T}'

T_burnin = int(5./dt) # rough time [s] for model state to have 'forgotten' its initial state


model = TinyResNet(n_filters_ks3 = [128, 128, 128, 128], 
                    #n_filters_ks1=[[128, 128], [128, 128], [128, 128], [128, 128], [128, 128]],
                    n_channels_in = 2*(J+1),
                    n_channels_out = J+1,
                    padding_mode='circular')
test_input = np.random.normal(size=(10, 2*(J+1), 36))
print(f'model output shape to test input of shape {test_input.shape}', 
      model.forward(torch.as_tensor(test_input, device=device, dtype=dtype)).shape)

print('total #parameters: ', np.sum([np.prod(item.shape) for item in model.state_dict().values()]))
model.layers_ks1, model.layers3x3

In [None]:
from L96_emulator.dataset import DatasetRelPred, Dataset
import numpy as np

exp_id = 'V9'
save_dir = res_dir + 'models/' + exp_id + '/'
temporal_offset = 1
fn_model = f'{exp_id}_FOV5_dt{temporal_offset}.pt'

out = np.load(res_dir + 'data/' + fn_data + '.npy')
model.load_state_dict(torch.load(save_dir + fn_model, map_location=torch.device(device)))

dg_train = DatasetRelPred(data=out, J=J, offset=temporal_offset, normalize=True, 
                   start=T_burnin, 
                   end=int(np.floor(out.shape[0]*0.8)))
dg_val   = DatasetRelPred(data=out, J=J, offset=temporal_offset, normalize=True, 
                   start=int(np.ceil(out.shape[0]*0.8)), 
                   end=int(np.floor(out.shape[0]*0.9)))

In [None]:
if isinstance(dg_train, DatasetRelPred):

    std_out = torch.as_tensor(dg_train.std_out, device='cpu', dtype=dtype)
    mean_out = torch.as_tensor(dg_train.mean_out, device='cpu', dtype=dtype)

    def model_simulate(y0, dy0, T):
        x = np.empty((T+1, *y0.shape[1:]))
        x[0] = y0.copy()
        xx = torch.as_tensor(x[0], device='cpu', dtype=dtype)
        dx = torch.as_tensor(dy0.copy(), device='cpu', dtype=dtype)
        for i in range(1,T+1):
            xxo = xx * 1.
            #print(xx.shape, dx.shape)
            xx = std_out * model.forward(torch.cat((xx.reshape(1,J+1,K), dx), axis=1)) + mean_out + xx.reshape(1,J+1,-1)
            dx = xx - xxo
            x[i] = xx.detach().numpy().copy()
        return x
    
elif isinstance(dg_train, Dataset): 

    def model_simulate(y0, T):
        x = np.empty((T+1, *y0.shape[1:]))
        x[0] = y0.copy()
        xx = torch.as_tensor(x[0], device='cpu', dtype=dtype).reshape(1,1,-1)
        for i in range(1,T+1):
            xx = model.forward(xx.reshape(1,J+1,-1))
            x[i] = xx.detach().numpy().copy()
        return x
    
    
T_burnin = int(5./dt)
T_ = 500 #(out.shape[0]-1)//10000

out_model = model_simulate(y0=dg_train[T_burnin].copy(), 
                           dy0=dg_train[T_burnin]-dg_train[T_burnin-dg_train.offset],
                           T=T_)#.reshape(-1, K*(J+1))

vmax = np.maximum(np.nanmax(dg_train[np.arange(T_burnin,T_burnin+T_)]),
                  np.nanmax(out_model))
vmin = np.minimum(np.nanmin(dg_train[np.arange(T_burnin,T_burnin+T_)]),
                  np.nanmin(out_model.T))

vmax, vmin = 5, -5

plt.figure(figsize=(16,9))
plt.subplot(2,2,1)
plt.imshow(dg_train[np.arange(T_+1)+ T_burnin].reshape(-1,(J+1)*K).T, aspect='auto', vmin=vmin, vmax=vmax)
plt.xlabel('time')
plt.ylabel('location')
plt.title('numerical simulation')
plt.colorbar()
plt.subplot(2,2,3)
plt.imshow(out_model.reshape(-1,(J+1)*K).T, aspect='auto', vmin=vmin, vmax=vmax)
plt.xlabel('time')
plt.ylabel('location')
plt.title('model-reconstructed simulation')
plt.colorbar()
plt.subplot(1,2,2)
plt.plot(np.sqrt(np.mean( (out_model - dg_train[np.arange(T_+1)+ T_burnin])**2, axis=(1,2) )), 
         label='model-reconstruction vs sim')

try:
    plt.plot(np.sqrt(np.mean( (out2[:T_+1] - dg_train[np.arange(T_+1)+ T_burnin].reshape(-1,(J+1)*K))**2, axis=1 )), 'k--',
            label='solver 2x temp. resol. vs sim')
except:
    pass
#plt.axis([0, np.minimum(500, T_), 0, 1.6])
plt.title('missmatch over time')
plt.xlabel('time')
plt.ylabel('RMSE (on z-scored data)')
plt.legend()
plt.show()

## debug corner

In [None]:
T_burnin = 10000
out_model = model_simulate(y0=dg_train[T_burnin].copy(), T=T_)#.reshape(-1, K*(J+1))


plt.figure(figsize=(12,4))
plt.subplot(1,2,1)
for i in range(J+1):
    plt.semilogy(np.arange(1, T_+1), (np.mean( (out_model[1:] - dg_train[np.arange(1,T_+1)+ T_burnin])**2, axis=(2,) ))[:,i], 
             'b--')
plt.semilogy(np.arange(1, T_+1), (np.mean( (out_model[1:] - dg_train[np.arange(1,T_+1)+ T_burnin])**2, axis=(2,) ))[:,0], 'k', linewidth=2,
         label='slow variables')
plt.semilogy(-1, 1, 'b--', label=f'fast variables (J={str(J)})')
plt.axis([0,20,0.0000001, 0.5])
plt.legend()
plt.xlabel('iterations')
plt.ylabel('MSE')
plt.title('error over iterations, per variable type')

plt.subplot(1,2,2)
for i in range(J+1):
    plt.semilogy(np.arange(1, T_+1), (np.mean( (out_model[1:] - dg_train[np.arange(1,T_+1)+ T_burnin])**2, axis=(2,) ))[:,i], 
             'b--')
plt.semilogy(np.arange(1, T_+1), (np.mean( (out_model[1:] - dg_train[np.arange(1,T_+1)+ T_burnin])**2, axis=(2,) ))[:,0], 'k', linewidth=2,
         label='slow variables')
plt.semilogy(-1, 1, 'b--', label=f'fast variables (J={str(J)})')
plt.axis([0,1300,0.0000001, 1000])
plt.legend()
plt.xlabel('iterations')
plt.ylabel('MSE')
plt.title('error over iterations, per variable type')

plt.show()

In [None]:
T_burnin = 10000
out_model = model_simulate(y0=dg_train[T_burnin].copy(), T=T_)#.reshape(-1, K*(J+1))


plt.figure(figsize=(12,4))
plt.subplot(1,2,1)
for i in range(K):
    plt.semilogy(np.arange(1, T_+1), (np.mean( (out_model[1:] - dg_train[np.arange(1,T_+1)+ T_burnin])**2, axis=(1,) ))[:,i], 
             'b--')
plt.semilogy(np.arange(1, T_+1), (np.mean( (out_model[1:] - dg_train[np.arange(1,T_+1)+ T_burnin])**2, axis=(1,) ))[:,0], 'k', linewidth=2,
         label='slow variables')
plt.semilogy(-1, 1, 'b--', label=f'fast variables (J={str(J)})')
plt.axis([0,20,0.0000001, 0.5])
plt.legend()
plt.xlabel('iterations')
plt.ylabel('MSE')
plt.title('error over iterations, per variable type')

plt.subplot(1,2,2)
for i in range(K):
    plt.semilogy(np.arange(1, T_+1), (np.mean( (out_model[1:] - dg_train[np.arange(1,T_+1)+ T_burnin])**2, axis=(1,) ))[:,i], 
             'b--')
plt.semilogy(np.arange(1, T_+1), (np.mean( (out_model[1:] - dg_train[np.arange(1,T_+1)+ T_burnin])**2, axis=(1,) ))[:,0], 'k', linewidth=2,
         label='slow variables')
plt.semilogy(-1, 1, 'b--', label=f'fast variables (J={str(J)})')
plt.axis([0,1300,0.0000001, 1000])
plt.legend()
plt.xlabel('iterations')
plt.ylabel('MSE')
plt.title('error over iterations, per variable type')

plt.show()

In [None]:
t = 10000
plt.plot((dg_train.mean_in + dg_train.std_in * dg_train[t][0,:,:]).reshape(K*(J+1)) - out[t])


In [None]:
t = 5000
plt.figure(figsize=(12,8))
plt.plot(dg_train[t+0].flatten(), label='t=0')
plt.plot(dg_train[t+1].flatten(), label='t=1')
plt.plot(model_simulate(y0=dg_train[t+0].copy(), T=1)[-1,:,:].flatten(), 'k--')
plt.show()

In [None]:
t = 10000
plt.figure(figsize=(12,8))
plt.plot(dg_train[t+1].flatten() - dg_train[t+0].flatten(), label='sim')
plt.plot(model_simulate(y0=dg_train[t+0].copy(), T=1)[-1,:,:].flatten()  - dg_train[t+0].flatten(), label='model')
plt.show()

In [None]:
for t in [0, 100, 1000, 10000]:
    plt.plot(model_simulate(y0=dg_train[t+0].copy(), T=1)[-1,:,:].flatten()  - dg_train[t+1].flatten(), label='model')
    plt.show()


In [None]:

plt.semilogy(np.std(np.diff(dg_train[np.arange(T_+1)+ T_burnin].reshape(-1,(J+1)*K), axis=0), axis=0))
plt.xlabel('variable ID (slow: first K=36)')
plt.ylabel('std')
plt.title('variability of 1-step temporal differences')
plt.axis([0, 397, 0.001, 0.1])
plt.show()

In [None]:
vmin, vmax

In [None]:
plt.figure(figsize=(16,12))
T_burnin = 10000
T_ = 10000
plt.imshow(dg_train[np.arange(T_+1)+ T_burnin].reshape(-1,(J+1)*K).T, aspect='auto', vmin=vmin, vmax=vmax)
plt.xlabel('time')
plt.ylabel('location')
plt.title('numerical simulation')
plt.colorbar()
plt.show()

In [None]:
dg_train[np.arange(T_+1)+ T_burnin][:,:,:].shape

In [None]:
plt.figure(figsize=(16,12))
T_burnin = 10000
T_ = 10000
plt.imshow(dg_train[np.arange(T_+1)+ T_burnin][:,:,0].reshape(-1,J+1).T, aspect='auto', vmin=vmin, vmax=vmax)
plt.xlabel('time')
plt.ylabel('location')
plt.title('numerical simulation')
plt.colorbar()
plt.show()

In [None]:
plt.figure(figsize=(8,9))
plt.subplot(2,1,1)
plt.imshow(dg_train[np.arange(100)+T_burnin].reshape(100,-1).T - dg_train[T_burnin].reshape(-1,1), aspect='auto')
plt.xlabel('time')
plt.ylabel('location')
plt.title('numerical simulation, differences to yo')
plt.colorbar()
plt.subplot(2,1,2)
plt.imshow(out_model[:100].reshape(100,-1).T - dg_train[T_burnin].reshape(-1,1), aspect='auto')
plt.xlabel('time')
plt.ylabel('location')
plt.title('model-reconstructed simulation, differences to yo')
plt.colorbar()

plt.show()

In [None]:
from L96_emulator.dataset import DatasetRelPred
dg_train = DatasetRelPred(data=out, J=J, offset=temporal_offset, normalize=True, 
                          start=T_burnin, 
                          end=int(np.floor(out.shape[0]*0.8)))
dg_val   = DatasetRelPred(data=out, J=J, offset=temporal_offset, normalize=True, 
                          start=int(np.ceil(out.shape[0]*0.8)), 
                          end=int(np.floor(out.shape[0]*0.9)))

In [None]:
ct = 0
s = np.zeros((474000, 11, 36))
for batch in dg_train:
    X,Y = batch
    #print(X.shape, Y.shape)
    
    s[ct] = Y.copy()
    ct += 1
    if ct > 1000:
        pass #break
m = np.mean(s, axis=(0,2))
s = np.std(s, axis=(0,2))
print(m, s)

In [None]:
ct