In [13]:
import numpy as np
import sklearn
import time
from L96_updated import *
from pickle import dump,load

In [14]:
def create_datasets_no_saving_of_xarray_burn_in(initial_X,initial_Y,mtu, data_file_path_to_save, data_with_y_file_path_to_save,forcing):
    
    """
    Burns in
    
    saves datasets with x and advection, and a separate one with x, advection and y.
    
    Save paths look like this: "data/truth_run/training_dataset.npy" and "data/truth_run/training_dataset_with_y.npy"
    
    Returns the last x and y values to init the next simulator. 
    """
    
    l96_two= L96TwoLevel_updated(save_dt=save_time_step, X_init=initial_X, Y_init=initial_Y,K=k, J=J, h=1, F=forcing, c=10, b=10, dt=time_step)
    l96_two.iterate(int(burn_in_mtu+mtu))
    h2_xarray = l96_two.history
    
    x = np.ravel(h2_xarray.X)
    x_subset = x.reshape(-1,k) #shape (timesteps, k)
    advection = np.roll(x_subset, 1,axis=1) * (np.roll(x_subset, 2,axis=1) - np.roll(x_subset, -1,axis=1))
    data = np.stack([x_subset,advection],axis=2)
    y  = np.ravel(h2_xarray.Y).reshape(-1,k,J)
    data_with_y = np.concatenate([data,y],axis=2)

    del h2_xarray
    
    np.save(data_file_path_to_save,data[int(burn_in_mtu/save_time_step):])
    np.save(data_with_y_file_path_to_save,data_with_y[int(burn_in_mtu/save_time_step):])

    
    initX_new = x_subset[-1,:]
    initY_new = y[-1,:,:].reshape(k*J)
    
    return initX_new, initY_new



In [15]:
def create_datasets_no_saving_of_xarray(initial_X,initial_Y,mtu, data_file_path_to_save, data_with_y_file_path_to_save,forcing):
    
    """
    No burn in
    
    """
    
    l96_two= L96TwoLevel_updated(save_dt=save_time_step, X_init=initial_X, Y_init=initial_Y,K=k, J=J, h=1, F=forcing, c=10, b=10, dt=time_step)
    l96_two.iterate(mtu)
    h2_xarray = l96_two.history
    
    x = np.ravel(h2_xarray.X)
    x_subset = x.reshape(-1,k) #shape (timesteps, k)
    advection = np.roll(x_subset, 1,axis=1) * (np.roll(x_subset, 2,axis=1) - np.roll(x_subset, -1,axis=1))
    data = np.stack([x_subset,advection],axis=2)
    y  = np.ravel(h2_xarray.Y).reshape(-1,k,J)
    data_with_y = np.concatenate([data,y],axis=2)
    
    del h2_xarray
    
    np.save(data_file_path_to_save,data[1:])
    np.save(data_with_y_file_path_to_save,data_with_y[1:])
    
    initX_new = x_subset[-1,:]
    initY_new = y[-1,:,:].reshape(k*J)
    
    return initX_new, initY_new


In [16]:
k = 8
J = 32
burn_in_mtu = 2
time_step = 0.001
save_time_step = 0.005

initX = np.zeros(shape=k)
initY = np.zeros(shape=J*k)
initX[0] = 1
initY[0] = 1

One MTU is one 'model time unit' i.e. time_step = 1.

# Data generation #

Example - 2000 MTU for F = 20

In [None]:
_, _ = create_datasets_no_saving_of_xarray_burn_in(initX,initY,2000,"../data/truth_run/training_dataset.npy","../data/truth_run/training_dataset_with_y.npy",20)

The below can be run to split up the generation process if you want to create a big dataset but there are OOM issues. 

In [None]:
initial_data = np.load("../data/truth_run/training_dataset_with_y.npy")

initX_extra = initial_data[-1,:,0]
initY_extra = initial_data[-1,:,2:].reshape(k*J)

_, _ = create_datasets_no_saving_of_xarray(initX_extra,initY_extra,25000,"../data/truth_run/extra.npy","../data/truth_run/extra_with_y.npy",20)

In [None]:
extra = np.load("../data/truth_run/extra_with_y.npy")

initX_extra2 = extra[-1,:,0]
initY_extra2 = extra[-1,:,2:].reshape(k*J)

_, _ = create_datasets_no_saving_of_xarray(initX_extra2,initY_extra2,25000,"../data/truth_run/extra2.npy","../data/truth_run/extra2_with_y.npy",20)