# Influence of mesh numbers

- /Users/hkromer/02_PhD/02_Data/01_COMSOL/01_IonOptics/02.current_chamber/02.MR/temporal_refinement/data/mesh_resolution/02.MR.102.temporal_refinement.003.hmesh_save1e8.001.qxqyqz.3813903.csv

- 02.MR.102.temporal_refinement.003.hmesh_save1e8.001.qxqyqz.3813903.csv: 3813903 mesh elements
- 02.MR.102.temporal_refinement.003.hmesh_save1e8.002.qxqyqz.833076.csv: 833076 mesh elements
- 02.MR.102.temporal_refinement.003.hmesh_save1e8.003.qxqyqz.669145.csv: 669145 mesh elements
- 02.MR.102.temporal_refinement.003.hmesh_save1e8.004.qxqyqz.5548774.csv: 5548774 mesh elements

In [None]:
# thesis folder
savefolder = '/Users/hkromer/polybox/Thesis/Chapters/DetailedNeutronGeneratorCharacterization/Figures/'
figname = 'COMSOL_mesh_refinement'
figname = f'{savefolder}/{figname}/{figname}.pdf'

In [None]:
import pandas as pd
import re
import numpy as np
import matplotlib.pyplot as plt
from scipy import interpolate
import os

In [None]:
# import data
path = '/Users/hkromer/02_PhD/02_Data/01_COMSOL/01_IonOptics/02.current_chamber/02.MR/temporal_refinement/data/mesh_resolution/'
files = os.listdir(path)
files = [f for f in files if f.endswith('.csv')]
files = [f for f in files if 'ecdf' not in f]
files = [f for f in files if 'qxqyqz' in f]
assert len(files) == 4
d = {}
ii = 0
for file in files:
    fname = f'{path}{file}'
    num_mesh_elements = re.findall(r'(\d+).csv',file)[0]
    
    data = pd.read_csv(fname, skiprows=7)

    # remove the index
    data = data.iloc[:, 1:]

    # multicolumn indexing
    cols = data.columns.tolist()

    first_cols = [re.findall(r't=(.+)', c)[0] if len(re.findall(r't=(.+)', c)) > 0 else None for c in cols] # time
    second_cols = [re.findall(r'(.+) \(', c)[0] if len(re.findall(r't=(.+)', c)) > 0 else None for c in cols] # qx, qy, or qz

    tuples = [(first_index, second_index) for first_index, second_index in zip(first_cols, second_cols)]
    data.columns = pd.MultiIndex.from_tuples(tuples, names=['Time', 'Coordinate'])



    # for each time, compute the beam radius
    def compute_qr(df):
        """
        Compute beam radius for each particle trajectory
        """
        # select second level of columns, one as series
        y = df.xs('qy', axis=1, level=1).values
        z = df.xs('qz', axis=1, level=1).values

        r = np.sqrt(y**2+z**2)

#         y_0 = np.sum(y*r)/np.sum(r)
#         z_0 = np.sum(z*r)/np.sum(r)
        # y_0, z_0 = np.median(data_y), np.median(data_z)
        # create as a dataframe
        df_r = pd.DataFrame(np.sqrt((y)**2+(z)**2), columns=df.xs('qy', axis=1, level=1).columns)

        #create MultiIndex in columns 
        df_r.columns = pd.MultiIndex.from_product([df_r.columns, ['qr']])
        #join to original
        return pd.concat([df, df_r], axis=1).sort_index(axis=1)

    data = compute_qr(data)

    # select second level of columns, one as series
    data_qr = data.xs('qr', axis=1, level=1)

    data_x_ecdf = pd.DataFrame(np.sort(data_qr.T)).T
    data_x_ecdf.columns = data_qr.columns
    data_x_ecdf.columns = pd.MultiIndex.from_product([data_x_ecdf.columns, ['x_ecdf']])
    data = pd.concat([data, data_x_ecdf], axis=1).sort_index(axis=1)

    n = data_qr.shape[0]
    data_y_ecdf = pd.DataFrame([np.arange(1, n+1) / n for ii in range(data_qr.shape[1])], index=data_qr.columns).T
    data_y_ecdf.columns = pd.MultiIndex.from_product([data_y_ecdf.columns, ['y_ecdf']])
    data = pd.concat([data, data_y_ecdf], axis=1).sort_index(axis=1)

    data['num_mesh_elements'] = num_mesh_elements
    # save data
    fname = f'{fname}ecdf.csv'
    data.to_csv(fname)
    d[ii] = data
    ii = ii + 1
data

In [None]:
# plot a random cdf
ii = np.random.randint(0,len(data_qr.columns))
time = data_qr.columns[ii]

print(time, data[time, 'x_ecdf'].name)
x = data[time, 'x_ecdf'].values
y = data[time, 'y_ecdf'].values

plt.plot(x, y)
plt.title(time)
plt.show()
t=data[time]
def ecdf(data):
    """Compute ECDF for a one-dimensional array of measurements."""
    # Number of data points: n
    n = len(data)

    # x-data for the ECDF: x
    x = np.sort(data)

    # y-data for the ECDF: y
    y = np.arange(1, n+1) / n

    return x, y

r = t['qr'].values
x_ecdf, y_ecdf = ecdf(r)

plt.scatter(x_ecdf, y_ecdf)
plt.show()

# Compare the CDFs

- for each timestep, interpolate the CDFs of each of the 4 cases
- compare the CDFs based on Kolmogorov-Smirnov test, plot the overall largest deviation, or one example?

### Get a dictionary of the interpolations for each mesh

In [None]:
d_interp = {}
meshes = range(0,4)

def interpolate_CDF(col_x, df_y):
    """
    Takes a dataframe at one timestamp as input and returns an interpolation of this CDF.
    """
    time = col_x.name
    col_y = df_y.loc[:,time]

    interpolation = interpolate.interp1d(col_x.values, col_y.values, kind='slinear', fill_value="extrapolate")
    return interpolation

for ii in meshes:
    data = d[ii]

    # x and y values of the ecdf
    data_x_ecdf = data.xs('x_ecdf', axis=1, level=1)
    data_y_ecdf = data.xs('y_ecdf', axis=1, level=1)

    # interpolate 100000 datapoints for each timestamp
    # rows are the different times, cols are the 100000 x axis values (beam radii)
    data_interp_x = np.zeros([26,100000])
    data_interp_y = np.zeros([26,100000])

    # # same for the y values, which are the interpolated CDFs
    # data_interp_y = pd.DataFrame(0, index=data_y_ecdf.columns, columns=np.arange(0,100000,1))

    # minimal and maximal x value for the interpolation
    min_x = data_x_ecdf.min()
    max_x = data_x_ecdf.max()

    # get the query points for the interpolation
    data_interp_x = pd.DataFrame(np.linspace(min_x, max_x, 100000).T, index=data_x_ecdf.columns, columns=np.arange(0,100000,1))


    # interpolation for each time
    df_interp = data_x_ecdf.apply(lambda x: interpolate_CDF(x, data_y_ecdf))
    
    d_interp[ii] = df_interp



### Compute the absolute differences in the CDFs for each mesh and timestamp

In [None]:
# reference interpolated CDF
df_reference = d_interp[3]

# get also the query points from the reference CDF
data_ref = d[3]

# x and y values of the ecdf
data_x_ecdf = data_ref.xs('x_ecdf', axis=1, level=1)
data_y_ecdf = data_ref.xs('y_ecdf', axis=1, level=1)

# interpolate 100000 datapoints for each timestamp
# rows are the different times, cols are the 100000 x axis values (beam radii)
data_interp_x = np.zeros([26,100000])
data_interp_y = np.zeros([26,100000])

# # same for the y values, which are the interpolated CDFs
# data_interp_y = pd.DataFrame(0, index=data_y_ecdf.columns, columns=np.arange(0,100000,1))

# minimal and maximal x value for the interpolation
min_x = data_x_ecdf.min()
max_x = data_x_ecdf.max()

# get the query points for the interpolation
# rows are query points, columns are times
data_interp_x = pd.DataFrame(np.linspace(min_x, max_x, 100000).T, index=data_x_ecdf.columns, columns=np.arange(0,100000,1)).T

# dataframe to store the maximum differences
data_differences = pd.DataFrame(index=df_reference.index)

for ii in range(0,3):
    # get the interpolation
    data = d_interp[ii]
    data = data.reset_index()

    def compute_absolute_difference_in_CDF(interpolation, reference, querypoints):
        time = interpolation.loc['Time']
        interp = interpolation.loc[0]

        # reference interpolation
        interp_ref = reference.loc[time]

        # get the querypoints for this timestamp
        x = querypoints.loc[:,time].values

        # get the interpolated y values
        y_interpolated = interp(x)

        # get interpolated reference values
        y_interpolated_reference = interp_ref(x)

        # absolute difference between the CDFs
        diff_abs = np.abs(y_interpolated - y_interpolated_reference)
        
        # replace riddiculously large values at singularities (t=0)
        diff_abs[diff_abs > 1e6] = 0.001
        s = pd.Series(diff_abs)

        s = s.append(pd.Series([time],index=['Time']))

        return s


    data_difference = data.apply(lambda x: compute_absolute_difference_in_CDF(x, df_reference, data_interp_x), axis=1).set_index('Time', drop=True)

    # maximum difference per timestep
    max_diffs = data_difference.max(axis=1)
    
    data_differences[ii] = max_diffs

In [None]:
data_differences

## Plot

In [None]:
# ii vs mesh
for ii in range(0,4):
    print(ii, d[ii]['num_mesh_elements'].unique())

In [None]:
import matplotlib.pyplot as plt
import matplotlib
from matplotlib.ticker import AutoMinorLocator
import os

os.environ["PATH"] += os.pathsep + '/usr/local/texlive/2018/bin/x86_64-darwin'
plt.rc('text', usetex=True)
plt.rc('font', weight='bold')
matplotlib.rcParams['font.weight']= 'bold'
matplotlib.rcParams['mathtext.fontset'] = 'custom'
matplotlib.rcParams['mathtext.rm'] = 'Arial'
matplotlib.rcParams['mathtext.it'] = 'Arial:italic'
matplotlib.rcParams['mathtext.bf'] = 'Arial:bold'
matplotlib.rcParams['mathtext.tt'] = 'Arial'
matplotlib.rcParams['mathtext.cal'] = 'Arial'
matplotlib.rcParams['text.latex.preamble'] = [r'\usepackage{sfmath} \boldmath']
plt.rcParams["axes.labelweight"] = "heavy"


# thesis folder
savefolder = '/Users/hkromer/polybox/Thesis/Chapters/DetailedNeutronGeneratorCharacterization/Figures/'
figname = 'COMSOL_mesh_refinement'
figname = f'{savefolder}/{figname}/{figname}.pdf'


data_plot = data_differences.copy()

data_plot['timestamp'] = [float(t) for t in data_plot.index.values]
data_plot = data_plot.sort_values(by='timestamp')
data_plot = data_plot.set_index('timestamp', drop=True)

cols = data_plot.columns
colors = ['darkred', 'darkblue',  'darkorange']
linestyles = ["dotted",  '-','--']
labels = [f"00{ii}" for ii in cols]

# mesh 1 2 3
# mesh_elements = [3800000, 830000, 670000]
mesh_elements = ['A', 'C', 'B'] 
# mesh_elements = [1, 2, 3]

# plot for each time
fig, ax1 = plt.subplots(figsize=(8, 6))
fs = 20

for mesh, color, label, ls, ms in zip(cols, colors, labels, linestyles, mesh_elements):
    x = data_plot.index.values*1e6 # convert to mus
    y = data_plot.loc[:, mesh].values
    ax1.plot(x, y, color=color, linestyle=ls, linewidth=2.5, label=ms)

ylims = ax1.get_ylim()


ax1.plot((0.0, 0.0), (-100, 100), 'k--', linewidth=2, color='black')  # Plasma meniscus
ax1.plot((0.08, 0.08), (-100, 100), 'k--', linewidth=2, color='black')  # Target surface
ax1.set_xlabel(r"\textbf{Simulation time [$\mu$s]}", fontsize=fs)
ax1.set_ylabel(r"\textbf{Maximum absolute difference in}" + '\n'+ r"\textbf{empirical CDF with respect to mesh D}", fontsize=fs)
ax1.tick_params('x', colors='black', labelsize=fs)
ax1.tick_params(axis='y', labelsize=fs-2)
ax1.set_ylim(ylims[0],ylims[1])
ax1.text(-0.0035, 0.225, s=r'\textbf{Plasma meniscus}', color='black',  fontsize=fs-4, rotation=90) # PLASMA
ax1.text(0.076, 0.2, s=r'\textbf{Target surface}', color='black',  fontsize=fs-4, rotation=90) # target


# ax1.set_yticks(np.arange(2.0,3.1,0.2))
# minor ticks x
minor_locator = AutoMinorLocator(2)
ax1.xaxis.set_minor_locator(minor_locator)
# minor ticks y
minor_locator = AutoMinorLocator(2)
ax1.yaxis.set_minor_locator(minor_locator)

hs, ls = ax1.get_legend_handles_labels()

leg1 = plt.legend([hs[0],hs[2],hs[1]],[ls[0],ls[2],ls[1]], loc=(0.53,0.73), fontsize=fs-6)
# leg1 = plt.legend(loc=(0.53,0.73), fontsize=fs-6)
# leg1 = plt.legend(loc=(0.69,0.74), fontsize=fs-6)
# leg1 = plt.legend(loc='best', fontsize=fs-6)
leg1.set_title(r"\textbf{Mesh}", prop = {'size': fs-4})
ax1.grid(b=True, which='major', linestyle='-')
ax1.grid(b=True, which='minor', linestyle='--')

plt.ylim(0.0, 0.40)
plt.xlim(-0.005, 0.1)

plt.tight_layout()
plt.savefig(figname)
plt.show()


## Check and plot the interpolation

In [None]:
# pick one time at random
data_qr = d[1].xs('qr', axis=1, level=1)
ii = np.random.randint(0,len(data_qr.columns))
time = data_qr.columns[ii]

data = d[1][time]
interp = interpolate.interp1d(data['x_ecdf'].values, data['y_ecdf'].values, kind='slinear')

x = np.linspace(data['x_ecdf'].min(), data['x_ecdf'].max(), 100000)

fig, ax = plt.subplots(figsize=(10,5))

plt.title(time)
plt.plot(x, interp(x), color='red')
plt.plot(data['x_ecdf'].values, data['y_ecdf'].values)
plt.show()

fig, ax = plt.subplots(figsize=(10,5))

plt.title("interpolated df")
plt.plot(x, df_interp.loc[time](x), color='red')

plt.show()

# fig, ax = plt.subplots(figsize=(20,10))

# plt.title(time)
# plt.plot(x, interp(x), '-o', color='red')
# plt.plot(data['x_ecdf'].values, data['y_ecdf'].values)

# plt.xlim(1,1.025)
# plt.ylim(0.42,0.445)
# plt.show()


# Plot two ECDFs to show the KS Test

In [None]:
import numpy as np
def find_nearest(array, value):
    array = np.asarray(array)
    idx = (np.abs(array - value)).argmin()
    return array[idx], idx

find_nearest(x_ecdf_ref, 0.12)
find_nearest(x_ecdf_ref, 0.12), x_ecdf_ref[2534],y_ecdf_ref[2534]

In [None]:
# thesis folder
savefolder = '/Users/hkromer/polybox/Thesis/Chapters/DetailedNeutronGeneratorCharacterization/Figures/'
figname = 'COMSOL_mesh_refinement_two_ECDFs'
figname = f'{savefolder}/{figname}/{figname}.pdf'

# select time
time = '3E-8'

x_ecdf3 = d[3].xs('x_ecdf', axis=1, level=1).loc[:, time].values
x_ecdf_ref = d[4].xs('x_ecdf', axis=1, level=1).loc[:, time].values

y_ecdf3 = d[3].xs('y_ecdf', axis=1, level=1).loc[:, time].values
y_ecdf_ref = d[4].xs('y_ecdf', axis=1, level=1).loc[:, time].values



# plot 
fig, ax1 = plt.subplots(figsize=(8, 6))
fs = 20

plt.plot(x_ecdf3, y_ecdf3, color='darkblue', linewidth=2.5, label='A')
plt.plot(x_ecdf_ref, y_ecdf_ref, '--', color='darkorange', linewidth=2.5, label='D')

ylims = ax1.get_ylim()


# ax1.plot((0.0, 0.0), (-100, 100), 'k--', linewidth=2, color='black')  # Plasma meniscus
# ax1.plot((0.08, 0.08), (-100, 100), 'k--', linewidth=2, color='black')  # Target surface
ax1.set_xlabel(r"$\sqrt{\textbf{y}^{\textrm{\textbf{2}}}-\textbf{z}^{\textrm{\textbf{2}}}}$"+ r" \textbf{[mm]}", fontsize=fs)
ax1.set_ylabel(r"\textbf{Empirical CDF [-]}", fontsize=fs)
ax1.tick_params('x', colors='black', labelsize=fs)
ax1.tick_params(axis='y', labelsize=fs-2)
ax1.set_ylim(ylims[0],ylims[1])
# ax1.text(-0.0035, 0.065, s=r'\textbf{Plasma meniscus}', color='black',  fontsize=fs-4, rotation=90) # PLASMA
# ax1.text(0.076, 0.065, s=r'\textbf{Target surface}', color='black',  fontsize=fs-4, rotation=90) # target


# ax1.set_yticks(np.arange(2.0,3.1,0.2))
# minor ticks x
minor_locator = AutoMinorLocator(2)
ax1.xaxis.set_minor_locator(minor_locator)
# minor ticks y
minor_locator = AutoMinorLocator(2)
ax1.yaxis.set_minor_locator(minor_locator)

hs, ls = ax1.get_legend_handles_labels()

# leg1 = plt.legend([hs[2],hs[1],hs[0]],[ls[2],ls[1],ls[0]], loc=(0.53,0.73), fontsize=fs-6)
# leg1 = plt.legend(loc=(0.69,0.74), fontsize=fs-6)
leg1 = plt.legend(loc='best', fontsize=fs-6)
leg1.set_title(r"\textbf{Mesh}", prop = {'size': fs-4})
ax1.grid(b=True, which='major', linestyle='-')
ax1.grid(b=True, which='minor', linestyle='--')
ax1.plot([0.12, 0.12], [0.845, 0.9316666666666666], color='darkred', linewidth=3.5)
offset = 0.00125
ax1.plot([0.12-offset, 0.12+offset], [0.845, 0.845], color='darkred', linewidth=3.5)
ax1.plot([0.12-offset, 0.12+offset], [0.9316666666666666, 0.9316666666666666], color='darkred', linewidth=3.5)
ax1.text(0.074, 0.865, r'\textbf{Maximum difference}', color='darkred', fontsize=16)
plt.ylim(0.0, 1.0)
plt.xlim(0, 0.14)

plt.tight_layout()
plt.savefig(figname)

plt.show()

# Plot 2D some particles

In [None]:
time = '2.5E-7'
# take 100 random samples 
number_of_particles = 300 
rand_numbers = np.random.randint(0, high=d[4].shape[0], size=number_of_particles)

In [None]:
# compute weight centre of the beam
data_y = d[4].xs('qy', axis=1, level=1).loc[:, time].values.astype(np.float64)
data_z = d[4].xs('qz', axis=1, level=1).loc[:, time].values.astype(np.float64)

r = np.sqrt(data_y**2+data_z**2)
y_0 = np.sum(data_y*r)/np.sum(r)
z_0 = np.sum(data_z*r)/np.sum(r)
# y_0, z_0 = np.median(data_y), np.median(data_z)

In [None]:
savefolder = '/Users/hkromer/polybox/Thesis/Chapters/DetailedNeutronGeneratorCharacterization/Figures/'
figname = 'COMSOL_mesh_refinement_beam_radius'
figname = f'{savefolder}/{figname}/{figname}.pdf'

data_y = d[4].xs('qy', axis=1, level=1).loc[rand_numbers, time].values.astype(np.float64)
data_z = d[4].xs('qz', axis=1, level=1).loc[rand_numbers, time].values.astype(np.float64)

fig, ax = plt.subplots(figsize=(10,10))
s0 = 280
ax.scatter(data_y, data_z, marker='x', color='red', s=s0)
ax.scatter(0, 0, marker='o', color='darkgreen', s=s0+3*s0)
# ax.patch.set_facecolor('blue')
ax.set_xticks([ ])
ax.set_yticks([ ])
ax.set_facecolor('#FFFFEB')
plt.axis('equal')
plt.tight_layout()
plt.savefig(figname)
plt.show()

In [None]:
import numpy as np

def ecdf(data):
    """Compute ECDF for a one-dimensional array of measurements."""
    # Number of data points: n
    n = len(data)

    # x-data for the ECDF: x
    x = np.sort(data)

    # y-data for the ECDF: y
    y = np.arange(1, n+1) / n

    return x, y
ecdf([3, 3, 1, 4])

In [None]:
ecdf([3, 55, 0.5, 1.5])