In [1]:
# %load defaults.ipy
import numpy as np
import matplotlib
matplotlib.rcParams['savefig.dpi'] = 600
%matplotlib inline
import matplotlib.pyplot as plt

import sys
sys.path.append('../python')
from plot_info import showAndSave, savePlot, get_environment
import plot_info
plot_info.set_notebook_name("WassersteinDistancesPerturbationsAll.ipynb")

import netCDF4
from IPython.core.display import display, HTML
import matplotlib2tikz
import os
import h5py
import ot
import sys
import scipy
import scipy.stats

# we had some issues on the euler cluster loading the correct libraries
for p in sys.path:
    if 'matplotlib' in p.lower():
        sys.path.remove(p)
    if 'netcdf' in p.lower():
        sys.path.remove(p)

from mpl_toolkits.mplot3d import Axes3D

conserved_variables = ['rho', 'mx', 'my', 'E']
number_of_variables = len(conserved_variables)

def load(f, sample):
    if '.nc' in f:
        with netCDF4.Dataset(f) as d:
            data = np.zeros((*d.variables[f'sample_{sample}_rho'][:,:,0].shape, number_of_variables))
            for n, variable in enumerate(conserved_variables):
                data[:,:,n] = d.variables[f'sample_{sample}_{variable}'][:,:,0]
    else:
        raise Exception(f"Unsupported file type: {f}")
        
    return data
        

print("STATISTICAL_KH_PERTS={}".format(plot_info.get_environment("STATISTICAL_KH_PERTS", [])))
print("STATISTICAL_KH_PERTS_NORMAL_UNIFORM={}".format(plot_info.get_environment("STATISTICAL_KH_PERTS_NORMAL_UNIFORM", [])))



ModuleNotFoundError: No module named 'ot'

# Computing Wasserstein distances

In [20]:
def wasserstein_point2_fast(data1, data2, i, j, ip, jp, a, b, xs, xt):
    """
    Computes the Wasserstein distance for a single point in the spatain domain
    """
    

    xs[:,:number_of_variables] = data1[i,j,:,:]
    xs[:,number_of_variables:] = data1[ip, jp, :,:]

    xt[:,:number_of_variables] = data2[i,j, :]
    xt[:,number_of_variables:] = data2[ip, jp, :]


    M = ot.dist(xs, xt, metric='euclidean')
    G0 = ot.emd(a,b,M)

    return np.sum(G0*M)

def wasserstein1pt_fast(data1, data2):
    """
    Approximate the L^1(W_1) distance (||W_1(nu1, nu2)||_{L^1})
    """
    N = data1.shape[0]
    distance = 0
    
    a = np.ones(N)/N
    b = np.ones(N)/N
    
    for i in range(N):
        for j in range(N):
            xs = data1[i,j,:,:]
            xt = data2[i,j,:,:]
            M = ot.dist(xs, xt, metric='euclidean')
            G0 = ot.emd(a,b,M)

            distance += np.sum(G0*M)


    
    return distance / N**2



def wasserstein2pt_fast(data1, data2):
    """
    Approximate the L^1(W_1) distance (||W_1(nu1, nu2)||_{L^1})
    """
    N = data1.shape[0]
    a = np.ones(N)/N
    b = np.ones(N)/N
    xs = np.zeros((N, 2*number_of_variables))
    xt = np.zeros((N, 2*number_of_variables))
    distance = 0
    

    points = 0.1*np.array(range(0,10))
    for (n,x) in enumerate(points):
        for y in points:

            for xp in points:
                for yp in points:
                    i = int(x*N)
                    j = int(y*N)
                    ip = int(xp*N)
                    jp = int(yp*N)
                    distance += wasserstein_point2_fast(data1, data2, i,j, ip, jp, a, b, xs, xt)


    
    return distance / len(points)**4


def plotWassersteinConvergence(name, basename, r, perturbations):
    wasserstein2pterrors = []
    for (n, p) in enumerate(perturbations[:-1]):
        filename = basename.format(perturbation=p)
        filename_coarse = basename.format(perturbation=perturbations[-1])
        data1 = np.zeros((r,r,r, number_of_variables))
        data2 = np.zeros((r,r,r, number_of_variables))
        for k in range(r):
            d1 = load(filename, k)
            d2 = load(filename_coarse, k)
            data1[:,:,k,:] = d1
            data2[:,:,k,:] = d2

        wasserstein2pterrors.append(wasserstein2pt_fast(data1, data2))
        print("wasserstein2pterrors=%s" % wasserstein2pterrors)
    

    plt.loglog(perturbations[1:], wasserstein2pterrors, '-o', basex=2, basey=2)
    plt.xlabel("Perturbation $\\epsilon$")
    plt.ylabel('$||W_1(\\nu^{2, \\Delta x, \\epsilon}, \\nu^{2,\\Delta x, \\epsilon_0})||_{L^1(D\\times D)}$')
    plt.title("Wasserstein convergence for %s\nfor second correlation measure,\nwith respect to perturbation size\nagainst a reference solution with $\epsilon_0=%.4f$"%(name,perturbations[-1]))
    showAndSave('%s_wasserstein_perturbation_convergence_2pt_all_components' % name)
    
    
    
    # one point
    wasserstein1pterrors = []
    for (n, p) in enumerate(perturbations[:-1]):
        filename = basename.format(perturbation=p)
        filename_coarse = basename.format(perturbation=perturbations[-1])
        data1 = np.zeros((r,r,r, number_of_variables))
        data2 = np.zeros((r,r,r, number_of_variables))
        for k in range(r):
            d1 = load(filename, k)
            d2 = load(filename_coarse, k)
            data1[:,:,k,:] = d1
            data2[:,:,k,:] = d2

        wasserstein1pterrors.append(wasserstein1pt_fast(data1, data2))
        print("wasserstein1pterrors=%s" % wasserstein1pterrors)
    

    plt.loglog(perturbations[1:], wasserstein1pterrors, '-o', basex=2, basey=2)
    plt.xlabel("Perturbation $\\epsilon$")
    plt.ylabel('$||W_1(\\nu^{1, \\Delta x, \\epsilon}, \\nu^{1,\\Delta x, \\epsilon_0})||_{L^1(D)}$')
    plt.title("Wasserstein convergence for %s\nfor first correlation measure,\nwith respect to perturbation size\nagainst a reference solution with $\epsilon_0=%.4f$"%(name,perturbations[-1]))
    showAndSave('%s_wasserstein_perturbation_convergence_1pt_all_components' % name)
    
    

def plotWassersteinConvergenceDifferentTypes(name, filenames, r, perturbations_inverse):
    wasserstein2pterrors = []
    types = [k for k in filenames.keys()]
    
    if len(types)!=2:
        raise Exception("Only support two perturbation types")
    for filename_a, filename_b in zip(filenames[types[0]], filenames[types[1]]):
        
        data1 = np.zeros((r,r,r, number_of_variables))
        data2 = np.zeros((r,r,r, number_of_variables))
        for k in range(r):
            d1 = load(filename_a, k)
            d2 = load(filename_b, k)
            data1[:,:,k,:] = d1
            data2[:,:,k,:] = d2

        wasserstein2pterrors.append(wasserstein2pt_fast(data1, data2))
        print("wasserstein2pterrors=%s" % wasserstein2pterrors)
    

    plt.loglog(1.0/np.array(perturbations_inverse,dtype=np.float64), wasserstein2pterrors, '-o', basex=2, basey=2)
    plt.xlabel("Perturbation $\\epsilon$")
    
    plt.ylabel('$||W_1(\\nu^{2, \\Delta x, \\epsilon}_{\\mathrm{%s}}, \\nu^{2,\\Delta x, \\epsilon}_{\\mathrm{%s}})||_{L^1(D\\times D)}$' % (types[0], types[1]))
    plt.title("Wasserstein convergence for %s\nfor second correlation measure"%(name))
    showAndSave('%s_type_comparison_wasserstein_perturbation_convergence_2pt_all_components' % name)
    
    
    
    
    # one point
    wasserstein1pterrors = []
   
   
    for filename_a, filename_b in zip(filenames[types[0]], filenames[types[1]]):
        
        data1 = np.zeros((r,r,r, number_of_variables))
        data2 = np.zeros((r,r,r, number_of_variables))
        for k in range(r):
            d1 = load(filename_a, k)
            d2 = load(filename_b, k)
            data1[:,:,k,:] = d1
            data2[:,:,k,:] = d2

        wasserstein1pterrors.append(wasserstein1pt_fast(data1, data2))
        print("wasserstein1pterrors=%s" % wasserstein1pterrors)
    

    plt.loglog(1.0/np.array(perturbations_inverse,dtype=np.float64), wasserstein1pterrors, '-o', basex=2, basey=2)
    plt.xlabel("Perturbation $\\epsilon$")
    
    plt.ylabel('$||W_1(\\nu^{1, \\Delta x, \\epsilon}_{\\mathrm{%s}}, \\nu^{1,\\Delta x, \\epsilon}_{\\mathrm{%s}})||_{L^1(D)}$' % (types[0], types[1]))
    plt.title("Wasserstein convergence for %s\nfor first correlation measure,\nwith respect to perturbation size"%(name))
    showAndSave('%s_type_comparison_wasserstein_perturbation_convergence_1pt_all_components' % name)

# Kelvin-Helmholtz

## Convergence as we refine the perturbation

In the cell below, we look at the convergence

$$\mathrm{Error}(\epsilon)=\|W_1(\mu^{\epsilon}, \mu^{\epsilon_{\mathrm{ref}}})\|_{L^1}$$

where $\mu^{\epsilon_{\mathrm{ref}}}$ is a reference solution with small perturbation size ($\epsilon_{\mathrm{ref}}=0.0025$). We keep the number of samples and resolution fixed ($1024$ samples at $1024x1024$ resolution).

In [None]:

resolution = 1024
perturbations = [0.09, 0.075, 0.06, 0.05, 0.025, 0.01, 0.0075, 0.005,0.0025]

basepath_perts = get_environment("STATISTICAL_KH_PERTS", 
                                 ["kh_perts/q{}/kh_1.nc".format(p) for p in perturbations])

plot_info.console_log("Using basepath_perts={}".format(basepath_perts))


basename = os.path.join(basepath_perts, 'kh_perts/q{perturbation}/kh_1.nc')
name = 'Kelvin-Helmholtz'
samples = 1024

plotWassersteinConvergence(name, basename, resolution, perturbations)


# Convergence for different perturbation types

In this experiment, we have done two perturbations. One with a normal distribution, and one with a uniform distribution. We measure the following for each perturbation size

$$\mathrm{Error}(\epsilon)=\|W_1(\mu^{\epsilon}_{\mathrm{normal}}, \mu^{\epsilon}_{\mathrm{uniform}})\|_{L^1}$$

We plot the error as a function of $\epsilon$. If the statisitical solution is invariant to the different perturbation types, we should get something that converges to zero.

In [None]:

resolution = 1024
pert_inverses = [8, 16, 32, 64, 128, 256, 512]
types = ['normal', 'uniform']

normal_uniform_base = 'dist_{t}/pertinv_{inv}/kh_1.nc'

# all_filenames is just used for verification
all_filenames = []
for t in types:
    for p in pert_inverses:
        all_filenames.append(normal_uniform_base.format(t=t, inv=p))

basepath_perts_normal_uniform = get_environment("STATISTICAL_KH_PERTS_NORMAL_UNIFORM", 
                                 all_filenames)

plot_info.console_log("Using basepath_perts_normal_uniform={}".format(basepath_perts_normal_uniform))


filenames_per_type = {}

for t in types:
    filenames_per_type[t] = []
    for p in pert_inverses:
        filenames_per_type[t].append(os.path.join(basepath_perts_normal_uniform,
                                                  normal_uniform_base.format(t=t,inv=p)))


name = 'Kelvin-Helmholtz Perturbation comparison'
samples = 1024

plotWassersteinConvergenceDifferentTypes(name, filenames_per_type, resolution, pert_inverses)
