# Direct sampling with Gaussian process regression

#### Import general modules

mpi4py is always required when using these tools. Numpy is always good to have if any manipulation is to be done.

In [1]:
# Import required modules
from mpi4py import MPI #equivalent to the use of MPI_init() in C
import matplotlib.pyplot as plt
import numpy as np

# Get mpi info
comm = MPI.COMM_WORLD

#### Import modules from pynektools

In this case we will import all the data types that we currently support, as well as io functions that are required to populate them.

In [2]:
# Data types
from pynektools.datatypes.msh import Mesh
from pynektools.datatypes.coef import Coef
from pynektools.datatypes.field import Field, FieldRegistry

# Readers
from pynektools.io.ppymech.neksuite import preadnek, pynekread

# Writers
from pynektools.io.ppymech.neksuite import pwritenek, pynekwrite

fname = '/home/adperez/cpc_gaussian_process_compression/data/mixlay/mixlay0.f00001'
#fname = '/home/adperez/Documents/gaussian_process/Gaussian Process_0823/data/turbPipe/turbPipe0.f00001'


## Read the data

In [3]:
msh = Mesh(comm, create_connectivity=False)
fld = FieldRegistry(comm)

pynekread(fname, comm, data_dtype=np.double, msh = msh, fld = fld)

for e in range(msh.nelv):
    if (np.min(msh.x[e]), np.max(msh.x[e])) == (0, 0.25):
        if (np.min(msh.y[e]), np.max(msh.y[e])) == (0, 1.1399999856948853):
            print(e)
    
coef = Coef(msh=msh, comm=comm)

2025-02-22 21:28:18,414 - Mesh - INFO - Initializing empty Mesh object.
2025-02-22 21:28:18,415 - Field - INFO - Initializing empty Field object
2025-02-22 21:28:18,416 - pynekread - INFO - Reading file: /home/adperez/cpc_gaussian_process_compression/data/mixlay/mixlay0.f00001
2025-02-22 21:28:18,423 - Mesh - INFO - Initializing Mesh object from x,y,z ndarrays.
2025-02-22 21:28:18,424 - Mesh - INFO - Initializing common attributes.
2025-02-22 21:28:18,425 - Mesh - INFO - Getting vertices
2025-02-22 21:28:18,427 - Mesh - INFO - Getting edge centers
2025-02-22 21:28:18,432 - Mesh - INFO - Facet centers not available for 2D
2025-02-22 21:28:18,433 - Mesh - INFO - Mesh object initialized.
2025-02-22 21:28:18,434 - Mesh - INFO - Mesh data is of type: float64
2025-02-22 21:28:18,434 - Mesh - INFO - Elapsed time: 0.011101215s
2025-02-22 21:28:18,435 - pynekread - INFO - Reading field data
2025-02-22 21:28:18,441 - pynekread - INFO - File read
2025-02-22 21:28:18,442 - pynekread - INFO - Elaps

## Compress the data with GPR

### Initialize the Direct sampler

In [4]:
from pynektools.compression.gpc_direct_sampling import DirectSampler

ds = DirectSampler(comm=comm, msh=msh)

2025-02-22 21:28:18,521 - DirectSampler - INFO - Initializing the DirectSampler from a Mesh object


### Sample the data

In [5]:
ds.log.tic()

# Select the options
n_samples = 1
bitrate = n_samples/(msh.lx*msh.ly*msh.lz)

# Sample here
ds.sample_field(field=fld.registry["u"], field_name="u", covariance_method="svd", compression_method="fixed_bitrate", bitrate = bitrate, covariance_keep_modes=3)
#ds.sample_field(field=fld.registry["u"], field_name="u", covariance_method="average", covariance_elements_to_average=int(msh.nelv/16), compression_method="fixed_bitrate", bitrate = bitrate)
#ds.sample_field(field=fld.registry["u"], field_name="u", covariance_method="average", covariance_elements_to_average=10, compression_method="fixed_bitrate", bitrate = bitrate)

ds.log.toc()

2025-02-22 21:28:18,536 - DirectSampler - INFO - Sampling the field with options: covariance_method: {covariance_method}, compression_method: {compression_method}
2025-02-22 21:28:18,538 - DirectSampler - INFO - Estimating the covariance matrix
2025-02-22 21:28:18,538 - DirectSampler - INFO - Transforming the field into to legendre space
2025-02-22 21:28:18,543 - DirectSampler - INFO - Estimating the covariance matrix using the SVD method. Keeping 3 modes
2025-02-22 21:28:18,558 - DirectSampler - INFO - U saved in field data_to_compress["u"]["U"]
2025-02-22 21:28:18,560 - DirectSampler - INFO - s saved in field data_to_compress["u"]["s"]
2025-02-22 21:28:18,568 - DirectSampler - INFO - Vt saved in field data_to_compress["u"]["Vt"]
2025-02-22 21:28:18,569 - DirectSampler - INFO - Sampling the field using the fixed bitrate method. using settings: {'method': 'fixed_bitrate', 'bitrate': np.float64(0.015625), 'n_samples': 1}
2025-02-22 21:28:18,570 - DirectSampler - INFO - Proccesing up to 

### Encode it

In [6]:
ds.compress_samples(lossless_compressor="bzip2")
print(ds.compressed_data["u"].keys())

2025-02-22 21:28:18,769 - DirectSampler - INFO - Compressing the data using the lossless compressor: bzip2
2025-02-22 21:28:18,770 - DirectSampler - INFO - Compressing data in data_to_compress
2025-02-22 21:28:18,775 - DirectSampler - INFO - Compressing data for field ["u"]:
2025-02-22 21:28:18,776 - DirectSampler - INFO - Compressing ["U"] for field ["u"]
2025-02-22 21:28:18,787 - DirectSampler - INFO - Compressing ["s"] for field ["u"]
2025-02-22 21:28:18,789 - DirectSampler - INFO - Compressing ["Vt"] for field ["u"]
2025-02-22 21:28:18,790 - DirectSampler - INFO - Compressing ["field"] for field ["u"]
dict_keys(['U', 's', 'Vt', 'field'])


### Write it out

In [7]:
ds.write_compressed_samples(comm=comm, filename="test")

2025-02-22 21:28:19,011 - DirectSampler - INFO - Parallel HDF5 not available; creating folder to store rank files.


In [8]:
print(ds.uncompressed_data["u"]["field"].shape)
#print(ds.data_to_compress["u"]["kw"].shape)
print(ds.uncompressed_data["u"]["U"].shape)
print(ds.uncompressed_data["u"]["s"].shape)
print(ds.uncompressed_data["u"]["Vt"].shape)

(1600, 1, 8, 8)
(1600, 3)
(3,)
(3, 64)


### Read the data in another object

In [9]:
ds_read = DirectSampler(comm, filename="test")



2025-02-22 21:28:19,036 - DirectSampler - INFO - Initializing the DirectSampler from file: test
reading


In [10]:
print(ds.settings)
print(ds_read.settings)

for key in ds_read.settings.keys():
    print("=============================")
    print(key, ds.settings[key])
    print(key, ds_read.settings[key])

{'dtype': 'double', 'mesh_information': {'lx': np.int64(8), 'ly': np.int64(8), 'lz': np.int64(1), 'nelv': np.int64(1600), 'gdim': 2}, 'covariance': {'method': 'svd', 'averages': np.int64(1600), 'elements_to_average': 1, 'keep_modes': 3, 'kw_diag': True}, 'compression': {'method': 'fixed_bitrate', 'bitrate': np.float64(0.015625), 'n_samples': 1}}
{'dtype': 'double', 'compression': {'bitrate': np.float64(0.015625), 'method': 'fixed_bitrate', 'n_samples': np.int64(1)}, 'covariance': {'averages': np.int64(1600), 'elements_to_average': np.int64(1), 'keep_modes': np.int64(3), 'kw_diag': np.True_, 'method': 'svd'}, 'mesh_information': {'gdim': np.int64(2), 'lx': np.int64(8), 'ly': np.int64(8), 'lz': np.int64(1), 'nelv': np.int64(1600)}}
dtype double
dtype double
compression {'method': 'fixed_bitrate', 'bitrate': np.float64(0.015625), 'n_samples': 1}
compression {'bitrate': np.float64(0.015625), 'method': 'fixed_bitrate', 'n_samples': np.int64(1)}
covariance {'method': 'svd', 'averages': np.in

In [11]:
t1 = ds_read.uncompressed_data["u"]["field"] - ds.uncompressed_data["u"]["field"]

#t2 = decompressed_data["u"]["kw"] - ds.data_to_compress["u"]["kw"]
t2 = ds_read.uncompressed_data["u"]["U"] - ds.uncompressed_data["u"]["U"]
#t2 = decompressed_data["u"]["s"] - ds.data_to_compress["u"]["s"]
#t2 = decompressed_data["u"]["Vt"] - ds.data_to_compress["u"]["Vt"]


print(np.max(t1))
print(np.min(t1))
print(np.max(t2))
print(np.min(t2))
print(np.mean(t1))
print(np.mean(t2))

import sys
sys.exit(0)

0.0
0.0
0.0
0.0
0.0
0.0


SystemExit: 0

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [None]:
ds_read = DirectSampler(comm=comm, msh=msh)

settings, compressed_data = ds_read.read_compressed_samples(comm=comm, filename="test")
print(compressed_data.keys())
print(compressed_data["u"].keys())
print(compressed_data["u"]["field"])
#print(compressed_data["u"]["kw"])
print(settings)

In [None]:
decompressed_data = ds.decompress_samples(settings, compressed_data)
print(decompressed_data.keys())

#print(decompressed_data["u"].keys())
#print(decompressed_data["u"]["field"])




In [None]:
t1 = decompressed_data["u"]["field"] - ds.uncompressed_data["u"]["field"]

#t2 = decompressed_data["u"]["kw"] - ds.data_to_compress["u"]["kw"]
t2 = decompressed_data["u"]["U"] - ds.uncompressed_data["u"]["U"]
#t2 = decompressed_data["u"]["s"] - ds.data_to_compress["u"]["s"]
#t2 = decompressed_data["u"]["Vt"] - ds.data_to_compress["u"]["Vt"]


print(np.max(t1))
print(np.min(t1))
print(np.max(t2))
print(np.min(t2))
print(np.mean(t1))
print(np.mean(t2))


In [None]:
# Check the difference between settings and ds.settings
print(settings)
print(ds.settings)



import h5py
with h5py.File("test_comp/test_rank_0.h5", "r") as f:
    
    for key in f.keys():
    
            item = f[key]
            print("  " + key)

    print(f.keys())
    print("\n")

    print(f["rank_0"]["u"].keys())



In [None]:
print(ds_read.settings)
print(settings)

if True == np.True_:
    print("True")

In [None]:
import sys
sys.exit(0)

In [None]:
# Hack it a bit to make the rest work

ds.field = fld.registry["u"]
ds.field_sampled = ds.uncompre["u"]["field"]
#ds.kw = ds.data_to_compress["u"]["kw"]

averages = ds.settings["covariance"]["averages"]
elements_to_average = ds.settings["covariance"]["elements_to_average"]

if ds.settings["covariance"]["method"] == "svd":
    # Retrieve the SVD components
    U = ds.uncompre[f"u"]["U"]
    s = ds.uncompre[f"u"]["s"]
    Vt = ds.uncompre[f"u"]["Vt"]

    # Construct the f_hat
    f_hat = np.einsum("ik,k,kj->ij", U, s, Vt)

    # This is the way in which I calculate the covariance here and then get the diagonals
    if ds.kw_diag == True:
        # Get the covariances
        kw_ = np.einsum("eik,ekj->eij", f_hat.reshape(averages*elements_to_average,-1,1), f_hat.reshape(averages*elements_to_average,-1,1).transpose(0,2,1))
        # Extract only the diagonals
        kw_ = np.einsum("...ii->...i", kw_)
        
    else:
        # But I can leave the calculation of the covariance itself for later and store here the average of field_hat
        kw_ = f_hat.reshape(averages*elements_to_average,-1,1)

    ds.kw = kw_

else:
    ds.kw = ds.uncompre[f"u"]["kw"]



print(ds.kw.shape)

ds.elements_to_average = ds.settings["covariance"]["elements_to_average"]
ds.n_samples = ds.settings["compression"]["n_samples"]

In [None]:
ind_50 = np.where(ds.field_sampled[0] == -50)
#print(ds.field[0])
#print(ds.field_sampled[0])
a = abs(ds.field_sampled[0])-abs(ds.field[0])
a[ind_50] = -50
print(a)

for e in range(msh.nelv):
    ind_50 = np.where(ds.field_sampled[e] == -50)
    if len(ind_50[0]) > n_samples:
        print(e, len(ind_50[0]))

In [None]:

print(ds.field_hat[1466].reshape(-1,1, order='F'))

print("======")

print(ds.field_sampled[1466].reshape(-1,1, order='F'))

In [None]:
recon = ds.predict(ds.field_sampled)


AttributeError: 'DirectSampler' object has no attribute 'field_sampled'

In [None]:
fig, ax = plt.subplots(figsize=(5, 2.5), dpi = 200)
c = ax.tricontourf(msh.x.flatten(), msh.y.flatten() ,fld.registry["u"].flatten(), levels=100, cmap="RdBu_r")
fig.colorbar(c)
ax.set_aspect('equal')
ax.set_xlabel("x")
ax.set_ylabel("y")
ax.set_xlim([1.5,6.5])
ax.set_ylim([5.5,8.5])
plt.show()

In [None]:
fig, ax = plt.subplots(figsize=(5, 2.5), dpi = 200)
#c = ax.tricontourf(msh.x.flatten(), msh.y.flatten() , fld.registry["u"].flatten() - recon.flatten(), levels=100, cmap="RdBu_r")
c = ax.tricontourf(msh.x.flatten(), msh.y.flatten() ,recon.flatten(), levels=100, cmap="RdBu_r")
fig.colorbar(c)
ax.set_aspect('equal')
ax.set_xlabel("x")
ax.set_ylabel("y")
ax.set_xlim([1.5,6.5])
ax.set_ylim([5.5,8.5])
plt.show()

In [None]:
print(np.mean(fld.registry["u"] - recon))
print(np.max(abs(fld.registry["u"] - recon)))
print(np.min(abs(fld.registry["u"] - recon)))

In [None]:
print(ds.settings)