In [1]:
import numpy as np
import scipy as sp
from scipy import integrate
import matplotlib.pyplot as plt
import sys
#sys.path.append("/Users/tcasey/.pyenv/versions/3.10.4/lib/python3.10/site-packages")
import h5py

import os

import pandas as pd

# WORKSPACE_PATH = os.environ['WORKSPACE_PATH']
WORKSPACE_PATH = os.path.join(os.getcwd(), '../../../../../')
ROMNet_fld     = os.path.join(WORKSPACE_PATH, 'ROMNet/romnet/')

Missing colon in file PosixPath('/Users/sventur/.matplotlib/stylelib/paper_2columns.mplstyle'), line 1 ('f#### MATPLOTLIBRC FORMAT')


In [2]:
OutputDir   = os.path.join(WORKSPACE_PATH, 'ROMNet/Data/PDFEvolve_10Cases/')


#define domain
Npts=50
x = np.linspace(-2.,2.,Npts)

#generate data
sig1=0.5
mu1=0.0
sig2=0.1
mu2=-1.75 #use this as parametet to vary initial conditions
nSamps=10
mu2_samps = np.random.uniform(-1.75, -1.25, nSamps) #uniform random samples


try:
    os.makedirs(OutputDir)
except OSError as e:
    pass

In [3]:
x_names  = ['x_'+str(i+1) for i in range(Npts)]
x0_names = ['x0_'+str(i+1) for i in range(Npts)]

In [4]:
#background density
f1 = 1./(sig1*np.sqrt(2.*np.pi)) * np.exp( -(x-mu1)**2. /(2.*sig1**2.))
#cdf
f1_int =  integrate.cumtrapz(f1, x=x, axis=-1, initial=None)


pdf_all = []
for i in range(0,mu2_samps.shape[0]) :
    mu2=mu2_samps[i]
    #do all the data generation and write output hdf5 files
    #pass
    T=1.
    dt=0.01
    vt=3.
    nt = int(T/dt)
    #print('nt=',nt)
    t=np.linspace(0,T,nt)

    #container for output
    pdf_t = np.zeros((Npts,nt))
    cdf_t = np.zeros((Npts-1,nt))

    #initial condition
    f2 = 1./(sig2*np.sqrt(2.*np.pi)) * np.exp( -(x-mu2)**2. /(2.*sig2**2.))
    f=(f1+0.2*f2)
    pdf_t0=f/np.trapz(f,x)
    
    #time evolution
    for it, t_ in enumerate(range(0,nt)):
        mu2=mu2 + vt*dt
        #print(mu2)
        f2 = 1./(sig2*np.sqrt(2.*np.pi)) * np.exp( -(x-mu2)**2. /(2.*sig2**2.))
        f=(f1+0.2*f2)
        #normalize
        f=f/np.trapz(f,x)
        #store
        pdf_t[:,t_]=f #pdf
        cdf_t[:,t_]=integrate.cumtrapz(f, x=x, axis=-1, initial=None) #cdf
        
    pdf_df           = pd.DataFrame(pdf_t.T, columns=x_names)
    pdf_df[x0_names] = np.tile(pdf_t0[np.newaxis,...], (nt,1))
    pdf_df['t']      = t
    
    pdf_all.append(pdf_df)
    
    
    
    
    #normalization check
    #print(np.trapz(f,x))

    #write data sets to file
    #probably want to pack all the data fields into a single hdf5 file within the 'i' loop
#     f1_ = h5py.File(OutputDir+'/pdf_t_'+str(i)+'.hdf5', 'w')
#     tmp_ = f1_.create_dataset("p_xt", data=pdf_t, dtype='f')
#     tmp_ = f1_.create_dataset("x", data=x, dtype='f')
#     tmp_ = f1_.create_dataset("t", data=t, dtype='f')
#     f1_.close()

#     c1_ = h5py.File(OutputDir+'/cdf_t_'+str(i)+'.hdf5', 'w')
#     tmp_ = c1_.create_dataset("c_xt", data=cdf_t, dtype='f')
#     tmp_ = c1_.create_dataset("x", data=x, dtype='f')
#     tmp_ = c1_.create_dataset("t", data=t, dtype='f')
#     c1_.close()
    
pdf_all = pd.concat(pdf_all, axis=0)

In [5]:
from sklearn.model_selection import train_test_split

try:
    os.makedirs(OutputDir+'/Orig/')
except:
    pass
try:
    os.makedirs(OutputDir+'/Orig/train/')
except:
    pass
try:
    os.makedirs(OutputDir+'/Orig/valid/')
except:
    pass

In [6]:
data_id    = 'pts'

DataInput  = pd.DataFrame(pdf_all[['t']+x0_names])
DataOutput = pd.DataFrame(pdf_all[['t']+x_names])
                          
n_points   = len(DataInput)

idx                  = np.arange(n_points)
train_idx, valid_idx = train_test_split(idx, test_size=0.2, random_state=42)

n_valid              = len(valid_idx)
n_train              = len(train_idx)

try:
    os.makedirs(OutputDir+'/Orig/train/'+data_id+'/')
except:
    pass
try:
    os.makedirs(OutputDir+'/Orig/valid/'+data_id+'/')
except:
    pass
DataInput.iloc[train_idx].to_csv(OutputDir+'/Orig/train/'+data_id+'/Input.csv', index=False)
DataInput.iloc[valid_idx].to_csv(OutputDir+'/Orig/valid/'+data_id+'/Input.csv', index=False)

DataOutput.iloc[train_idx].to_csv(OutputDir+'/Orig/train/'+data_id+'/Output.csv', index=False)
DataOutput.iloc[valid_idx].to_csv(OutputDir+'/Orig/valid/'+data_id+'/Output.csv', index=False)

In [12]:
try:
    os.makedirs(OutputDir+'/Orig/test/ext/')
except:
    pass

In [13]:
#background density
f1 = 1./(sig1*np.sqrt(2.*np.pi)) * np.exp( -(x-mu1)**2. /(2.*sig1**2.))
#cdf
f1_int =  integrate.cumtrapz(f1, x=x, axis=-1, initial=None)


pdf_all = []
for i in range(0,mu2_samps.shape[0]) :
    mu2=mu2_samps[i]
    #do all the data generation and write output hdf5 files
    #pass
    T=1.
    dt=0.01
    vt=3.
    nt = int(T/dt)
    #print('nt=',nt)
    t=np.linspace(0,T,nt)

    #container for output
    pdf_t = np.zeros((Npts,nt))
    cdf_t = np.zeros((Npts-1,nt))

    #initial condition
    f2 = 1./(sig2*np.sqrt(2.*np.pi)) * np.exp( -(x-mu2)**2. /(2.*sig2**2.))
    f=(f1+0.2*f2)
    pdf_t0=f/np.trapz(f,x)
    
    #time evolution
    for it, t_ in enumerate(range(0,nt)):
        mu2=mu2 + vt*dt
        #print(mu2)
        f2 = 1./(sig2*np.sqrt(2.*np.pi)) * np.exp( -(x-mu2)**2. /(2.*sig2**2.))
        f=(f1+0.2*f2)
        #normalize
        f=f/np.trapz(f,x)
        #store
        pdf_t[:,t_]=f #pdf
        cdf_t[:,t_]=integrate.cumtrapz(f, x=x, axis=-1, initial=None) #cdf
        
    pdf_df           = pd.DataFrame(pdf_t.T, columns=x_names)
    pdf_df[x0_names] = np.tile(pdf_t0[np.newaxis,...], (nt,1))
    pdf_df['t']      = t
    
    pdf_all.append(pdf_df)
    
    
    pdf_df.to_csv(OutputDir+'/Orig/test/ext/y.csv.'+str(i+1), index=False)
    
    
    
    
    #normalization check
    #print(np.trapz(f,x))

    #write data sets to file
    #probably want to pack all the data fields into a single hdf5 file within the 'i' loop
#     f1_ = h5py.File(OutputDir+'/pdf_t_'+str(i)+'.hdf5', 'w')
#     tmp_ = f1_.create_dataset("p_xt", data=pdf_t, dtype='f')
#     tmp_ = f1_.create_dataset("x", data=x, dtype='f')
#     tmp_ = f1_.create_dataset("t", data=t, dtype='f')
#     f1_.close()

#     c1_ = h5py.File(OutputDir+'/cdf_t_'+str(i)+'.hdf5', 'w')
#     tmp_ = c1_.create_dataset("c_xt", data=cdf_t, dtype='f')
#     tmp_ = c1_.create_dataset("x", data=x, dtype='f')
#     tmp_ = c1_.create_dataset("t", data=t, dtype='f')
#     c1_.close()
    
pdf_all = pd.concat(pdf_all, axis=0)

In [7]:
#load hdf5

f2_=h5py.File(OutputDir+'/pdf_t_0.hdf5', 'r')
list(f2_.keys())
pdf_t = f2_['p_xt'][:]
x= f2_['x'][:]
t= f2_['t'][:]
f2_.close()

c2_=h5py.File(OutputDir+'/cdf_t_0.hdf5', 'r')
list(c2_.keys())
cdf_t = c2_['c_xt'][:]
#x= c2_['x']
#t= c2_['t']
c2_.close()

FileNotFoundError: [Errno 2] Unable to open file (unable to open file: name = '/Users/sventur/WORKSPACE/ROMNet/romnet/scripts/generating_data/PDFEvolve/../../../../../ROMNet/Data/PDFEvolve_10Cases//pdf_t_0.hdf5', errno = 2, error message = 'No such file or directory', flags = 0, o_flags = 0)

In [None]:
# ====== line plot =============
plt.plot(x,f,'bo')
plt.xlabel('x',fontsize=14)
plt.ylabel('f(x)',fontsize=14)
plt.grid()
plt.title('pdf',fontsize=14)
plt.show()
# ============================


#cdf
f_int =  integrate.cumtrapz(f, x=x, axis=-1, initial=None)
# ====== line plot =============
plt.plot(x[0:-1],f_int,'bo')
plt.xlabel('x',fontsize=14)
plt.ylabel('c(x)',fontsize=14)
plt.grid()
plt.title('cdf',fontsize=14)
plt.show()
# ============================

In [None]:
#plot saved data (hdf5)

# ====== line plot =============
n=25
plt.plot(t,cdf_t[n,:],'bo')
plt.xlabel('t',fontsize=14)
s = '{0:.3g}'.format(x[n])
plt.ylabel('p(x='+s+')',fontsize=14)
plt.grid()
plt.title('p(x,t)',fontsize=14)
plt.show()
# ============================


# ====== line plot =============
Npts=pdf_t.shape[0]
for n in range(0,Npts):
    plt.plot(t,pdf_t[n,:],'-')
plt.xlabel('t',fontsize=14)
plt.ylabel('p(t)',fontsize=14)
plt.grid()
plt.title('PDF, p(x,t)',fontsize=14)
plt.show()
# ============================

# ====== line plot =============
for n in range(0,Npts-1):
    plt.plot(t,cdf_t[n,:],'-')
plt.xlabel('t',fontsize=14)
plt.ylabel('p(t)',fontsize=14)
plt.grid()
plt.title('CDF, p(x,t)',fontsize=14)
plt.show()
# ============================