In [None]:
# preprocessing with dask
import os, sys, re, io, pathlib
import pandas as pd
import hiplot as hip
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import itertools
from sklearn.preprocessing import normalize

idx = pd.IndexSlice
buffer = io.StringIO()
idx = pd.IndexSlice

# define the current path (notebooks in lab_utils)
labutilspath = str(pathlib.Path(os.getcwd()).parents[1])
sys.path.append(labutilspath)

# import the autoscan routines
from autoscan import autoscan

pp = autoscan.basics(material_info = True)

def hip_visualize(df, pcols = None, index = ['family', 'code']):
    dp = df.reset_index().loc[:, np.append(index, pcols)]
    s = hip.Experiment.from_dataframe(dp)
    s.colormap = 'interpolateViridis'
    s.display()
    return s

def ix_before_and_after(ds, index = ['tag', 'subtag'], columns = 'experiment', values = 'ix', mask = None, subset = None, aggfunc = lambda x: [*x],
                        dropna_tresh = 2, chain = True):
    ds.loc[:, 'ix'] = ds.index.values
    if mask is not None:
        ds = ds.loc[mask, :]
    dx = ds.pivot_table(index = index, 
                        columns = columns, 
                        values = values, 
                        aggfunc = aggfunc)
    if subset is not None:
        dx = dx.loc[:, subset]
    dx.dropna(thresh = dropna_tresh, inplace = True)

    # dp.loc[idx[dx.index[1], :], 'ix']

    # get the index of samples with before and after characterization
    if chain:
        ix = list(itertools.chain(*dx.apply(lambda x: [*itertools.chain(*[s for s in x if type(s) != float])], axis = 1).values))
    else:
        ix = dx
    return ix

def set_spe_style(ax, title = '', xlabel = '', ylabel=''):
    plt.sca(ax)
    plt.title(title, fontweight = 'bold');
    plt.xlabel(xlabel, fontweight = 'bold')
    plt.ylabel(ylabel, fontweight = 'bold');
    plt.xticks(fontweight = 'bold');
    plt.yticks(fontweight = 'bold');
    return ax

rc_dict = {
    "font.size":12,
    'font.weight':'bold',
    "axes.titlesize":12,
    "axes.titleweight":'bold',
    "axes.labelsize":12,
    'axes.labelweight':'bold',
    'xtick.labelsize':12,
#     'xtick.labelweight':'bold',
    'ytick.labelsize':12,
#     'ytick.labelweight':'bold',
    'legend.frameon':True,
    'legend.fontsize':12,
    'legend.title_fontsize':12,
#     'legend.fontweight':'bold',
#     'legeld.title_fontweight':'bold',
#     'title.fontsize':12
}

sns.set_context("paper", 
                rc = rc_dict)

sns.set(rc = rc_dict)

sns.set_style('darkgrid')

In [None]:
# define paths
datapath = '/sandbox/data/autoscan/'
vispath = '/sandbox/vis/autoscan/'
savepath = datapath
datafname = 'autoscan_corrected.h5'

datafile = os.path.join(datapath, datafname)
figspath = os.path.join(vispath, 'rock_multiphysics_display')

In [None]:
# read the data
dd = pd.read_hdf(datafile, key = 'data')
ds = pd.read_hdf(datafile, key = 'desc')
df = ds.join(dd)

In [None]:
for k,v in {'sandstone':1e3, "shale":1e2, 'carbonate':1e3}.items():
    ix = df.loc[(df.family == k) & (df.instance == 'before'), 'perm'] > v
    fill_val = df.loc[ix.index[ix == False], 'perm'].mean()
    df.loc[ix.index[ix == True], 'perm'] = fill_val

In [None]:
df_rocks = df.query("family != 'metal' & family != 'gemstones'")
df_rocks = df_rocks.sort_values(by = 'instance', ascending = False)

In [None]:
# define important columns (categorical and numerical)
col_numerical = pp.grid_cols + pp.probe_cols + ['l_max_peak']
col_meassmall = subset_cols = ['l_max_peak', 'perm'] + pp.mech_cols[::2] + ['e_star']

In [None]:
# separate the values in another dataframe
ix = ix_before_and_after(ds, index = 'code', subset = ['before', 'heat_treatment', 'perf'])
df_bna = df.loc[ix, :]

In [None]:
mask = df.perm.isna() == False
ix = ix_before_and_after(ds, mask = mask, subset = ['before', 'heat_treatment', 'perf'])
df_perm_bna = df.loc[ix, pp.meta_cols + pp.grid_cols + ['perm']]

In [None]:
mask = df.loc[:, pp.mech_cols[::2]].isna().any(axis = 1) == False
ix = ix_before_and_after(ds, mask = mask, subset = ['before', 'heat_treatment'])
df_mech_bna = df.loc[ix, pp.meta_cols + pp.grid_cols + pp.mech_cols + pp.vel_cols]

In [None]:
mask = df.loc[:, pp.ftir_cols].isna().any(axis = 1) == False
ix = ix_before_and_after(ds, mask = mask, subset = ['before', 'heat_treatment', 'perf'])
df_ftir_bna = df.loc[ix, pp.meta_cols + pp.grid_cols + pp.ftir_cols + ['l_max_peak']]

# visualization
1. distribution of measurements
1. hip-plot 
2. distributions

## distribution of samples and measurements 

In [None]:
cols = {'perm':'perm', 'vp0':'vel_0', 'vp90':'vel_90', 'e_star':'impulse', 'l_max_peak':'ftir'}
nsamples = df_rocks.shape[0]
df_temp = df_rocks.loc[:, cols.keys()].copy()
df_temp = df_temp.join(ds.loc[:, ['family']])
df_temp.reset_index(drop = True, inplace = True)
df_temp.loc[:, 'ix'] = df_temp.index.values
df_temp.loc[:, 'theta'] = (2 * np.pi / nsamples) * df_temp.loc[:, 'ix'].values

In [None]:
df_temp.pivot_table(index  = 'family', values = 'ix', aggfunc = lambda x: len(x))

In [None]:
m = 5
for k, v in cols.items():
    mask = df_temp.loc[:, [k]].isna().any(axis = 1) == False
    dp = ix_before_and_after(df_temp, index = 'ix', mask = mask, 
                             columns = None, values = [k, 'theta'], aggfunc = lambda x: x, chain = False).loc[:, ['theta']]
    df_temp.loc[mask, 'r_' + v] = m
    df_temp = df_temp.join(dp, rsuffix = '_' + v)
    m += 5
nmeasurements = df_temp.loc[:, cols.keys()].count()

In [None]:
from matplotlib.projections import PolarAxes
import matplotlib._color_data as mcd

pi = np.pi
ones = np.ones
theta = np.arange(1000)*2*pi/1000.
r = 30 * ones(nsamples)

fig = plt.figure(figsize = (15, 15))
ax = fig.add_subplot(111, projection = 'polar')

ax.plot(df_temp.theta.values, r, 'c', linewidth = 10, label = 'samples')

t = []
for k, v in cols.items():
    ax.plot(df_temp.loc[:, 'theta_' + v].values, df_temp.loc[:, 'r_' + v].values, linewidth = 10, label = v)
    t.append(v + '\n' + str(nmeasurements[k]))

colors = [mcd.XKCD_COLORS["xkcd:" + v] for v in ['beige', 'khaki', 'orange']]
for n, f in enumerate(df_temp.family.unique()):
    mask = df_temp.family == f
    dt = df_temp.loc[:, ['theta']]
    dt.loc[mask == False, :] = np.nan
    ax.plot(dt.theta.values, 27.5 * ones(nsamples), linewidth = 10, label = f, color = colors[n])

ax.set_theta_zero_location("W")
ax.set_theta_direction('clockwise')         
ax.tick_params(labelleft=True, labelright=False,
               labeltop=False, labelbottom=True)

plt.yticks([4, 9, 14, 19, 23.9], t, rotation = 60)
plt.xticks([0, pi/2, pi, 3 * pi / 2], ['0\n'+str(nsamples), nsamples//4, nsamples // 2, 3 * nsamples // 4], rotation = 60)

trans, _ , _ = ax.get_xaxis_text1_transform(10)
ax.text(np.deg2rad(45), -0.05, "sample #", transform = trans,
        rotation = 45, 
        ha="center", va="center")

ax.text(np.deg2rad(67.5), 0.08, 'rock type', transform = trans, ha = 'center', va = 'center', rotation = 22.5)

plt.legend(ncol = 6, markerscale = 0.01, loc = 'lower center', bbox_to_anchor=(0.5, -0.11))

plt.savefig(os.path.join(figspath, 'sample_distribution.png'), dpi = 300, bbox_inches = 'tight')
plt.show()

In [None]:
# set pointers to before and after datasets
df_before = df.query("instance == 'before'")
df_after  = df.query("instance == 'after'")

## hiplot: hierarchical data relations

In [None]:
# get the subset_cols
print('subset cols for hip: %s' %(subset_cols))

### ftir, perm, mech, e_star

In [None]:
s = hip_visualize(df_before.dropna(subset = subset_cols), 
                  pcols = subset_cols, 
                  index = ['code'])

s.to_html(os.path.join(figspath, 'hip_before_ftirmaxloc-perm-and-mechx0-estar.html'));

In [None]:
s = hip_visualize(df_after.dropna(subset = subset_cols), 
                  pcols = subset_cols, 
                  index = ['code'])

s.to_html(os.path.join(figspath, 'hip_after_ftirmaxloc-perm-and-mechx0-estar.html'));

### ftir, perm, and mech

In [None]:
s = hip_visualize(df_before.dropna(subset = subset_cols[:-1]), 
                  pcols = subset_cols, 
                  index = ['code'])

s.to_html(os.path.join(figspath, 'hip_before_ftirmaxloc-perm-and-mechx0.html'));

In [None]:
s = hip_visualize(df_after.dropna(subset = subset_cols[:-1]), 
                  pcols = subset_cols, 
                  index = ['code'])

s.to_html(os.path.join(figspath, 'hip_after_ftirmaxloc-perm-and-mechx0.html'));

### perm and mech

In [None]:
s = hip_visualize(df_before.dropna(subset = subset_cols[1:-1]), 
                  pcols = subset_cols, 
                  index = ['code'])

s.to_html(os.path.join(figspath, 'hip_before_permvel-mechx0.html'));

In [None]:
s = hip_visualize(df_after.dropna(subset = subset_cols[1:-1]), 
                  pcols = subset_cols, 
                  index = ['code'])

s.to_html(os.path.join(figspath, 'hip_after_permvel-mechx0.html'));

### ftir and mech

In [None]:
sc = [subset_cols[0]] +  subset_cols[2:]
s = hip_visualize(df_before.dropna(subset = sc), 
                  pcols = subset_cols, 
                  index = ['code'])

s.to_html(os.path.join(figspath, 'hip_before_ftirmaxloc-mechx0.html'));

In [None]:
s = hip_visualize(df_after.dropna(subset = sc), 
                  pcols = subset_cols, 
                  index = ['code'])

s.to_html(os.path.join(figspath, 'hip_after_ftirmaxloc-mechx0.html'));

In [None]:
groupby_cols =['code', 'instance']
probes = ['perm']
subset_cols = groupby_cols + probes

In [None]:
# # df_bna.loc[:, subset_cols].plot(kind = 'kde')
# fig, ax = plt.subplots(figsize = (12, 12))
# sns.kdeplot(x = 'perm',  hue = 'code', data = df_bna, clip = [0, 500], vertical = True,
#             palette = 'viridis', shade = 'fill', ax = ax)
# sns.set_style('darkgrid')
# plt.title('permeability before');

In [None]:
# ix_perm = df_before.perm.isna() == False
# df_perm_before = df_before.loc[ix_perm, ['family', 'code', 'perm']]

## distributions

### permeability

In [None]:
# create a temporary store for permeability data and calculate log
df_temp = df_rocks.loc[:, ['family', 'instance', 'perm']]
df_temp.loc[:, 'log_perm'] = df_temp.perm.apply(np.log10)

df_perm_bna.loc[:, 'log_perm'] = df_perm_bna.perm.apply(np.log10)

#### scatter

In [None]:
fig, ax = plt.subplots(figsize = (12, 12))
sns.stripplot(y = 'perm', x = 'family', hue = 'instance', dodge = True, data = df_rocks, 
              palette = 'viridis', ax = ax)
plt.yscale('log')
ax = set_spe_style(ax, title = 'Permeability before & after', xlabel = '', ylabel = 'Permeability (mD)')
plt.savefig(os.path.join(figspath, 'perm_stripplot.png'), dpi = 300, bbox_inches = 'tight')

#### boxplot

In [None]:
fig, ax = plt.subplots(figsize = (12, 12))
sns.boxplot(x = 'family', y = 'perm', hue = 'instance', data = df_temp, dodge = True, width = 0.5, palette = 'pastel')
plt.yscale('log')
ax = set_spe_style(ax, title = 'Permeability before & after', xlabel = '', ylabel = 'Permeability (mD)')

#### violin

In [None]:
fig, ax = plt.subplots(figsize = (12, 12))
sns.violinplot(x = 'family', y = 'log_perm', hue = 'instance', split = True, data = df_temp, scale = 'width', palette = 'pastel', bw = 'scott',
               inner = 'quartile')
ax = set_spe_style(ax, title = 'Permeability distibution before & after', xlabel = '', ylabel = 'Log$_{10}$ of Permeability (mD)')
plt.savefig(os.path.join(figspath, 'perm_violin.png'), dpi = 300, bbox_inches = 'tight')
# plt.yscale('log')

In [None]:
fig, ax = plt.subplots(figsize = (12, 12))
sns.pointplot(x="instance", y = "log_perm", hue = "family", data = df_temp, ax = ax, errwidth = 1, capsize = 0.1)
ax = set_spe_style(ax, title = 'Mean of permeability before & after', xlabel = '', ylabel = 'Log$_{10}$ of Permeability (mD)')
plt.savefig(os.path.join(figspath, 'perm_pointplot.png'), dpi = 300, bbox_inches = 'tight')

In [None]:
fig, ax = plt.subplots(figsize = (12, 12))
sns.violinplot(x = 'family', y = 'log_perm', hue = 'instance', split = True, data = df_perm_bna, scale = 'width', palette = 'pastel', bw = 'scott',
               inner = 'quartile')
ax = set_spe_style(ax, title = 'Permeability distibution before & after', xlabel = '', ylabel = 'Log$_{10}$ of Permeability (mD)')

### vels

In [None]:
# df_temp = df_rocks.loc[:, ['family', 'instance'] +  ]
# df_temp.loc[:, 'log_perm'] = df_temp.perm.apply(np.log10)
markers = {"shale": "o", "sandstone": "v", "carbonate":"s"}

#### pairplot: vels 

In [None]:
g = sns.pairplot(df_rocks.query("instance == 'before'").loc[:, ['family', 'code'] + pp.vel_cols], hue="family", 
                 diag_kind = 'kde', corner = True, markers = markers, dropna = True, height = 3)
# g.map_lower(sns.kdeplot, levels=4, color=".2")
plt.savefig(os.path.join(figspath, 'vels_pairplot-before.png'), dpi = 300, bbox_inches = 'tight')

In [None]:
# # https://catherineh.github.io/programming/2016/05/24/seaborn-pairgrid-tips

# replacements = {'sepal_length': r'$\alpha$', 'sepal_width': 'sepal',
#                 'petal_length': r'$\beta$', 'petal_width': 'petal',
#                 'versicolor': 'bloop'}

# for i in range(4):
#     for j in range(4):
#         xlabel = g.axes[i][j].get_xlabel()
#         ylabel = g.axes[i][j].get_ylabel()
#         if xlabel in replacements.keys():
#             g.axes[i][j].set_xlabel(replacements[xlabel])
#         if ylabel in replacements.keys():
#             g.axes[i][j].set_ylabel(replacements[ylabel])

In [None]:
g = sns.pairplot(df_rocks.query("instance == 'after'").loc[:, ['family', 'code'] + pp.vel_cols], hue="family", 
                 diag_kind = 'kde', corner = True, markers = markers, dropna = True, height = 3)
# g.map_lower(sns.kdeplot, levels=4, color=".2")
plt.savefig(os.path.join(figspath, 'vels_pairplot-after.png'), dpi = 300, bbox_inches = 'tight')

#### scatter: velocities

In [None]:
# fig, ax = plt.subplots(figsize = (12, 12))
# sns.scatterplot(x = 'vp0', y = 'vs0', hue = 'code', style = 'family', data = df_rocks.query("instance == 'before'"), 
#                 ax = ax, palette = 'deep', markers = markers, legend = 'full')
# plt.xscale('log')
# plt.yscale('log')

In [None]:
# fig, ax = plt.subplots(figsize = (12, 12))
g = sns.jointplot(x = 'vp0', y = 'vs0', hue = 'family', data = df_rocks.query("instance == 'before'"), palette = 'deep', height = 15, space = 0.1)
# g.plot_joint(sns.kdeplot, color = 'code', zorder=0, levels=6)
g.set_axis_labels('vp$_0$ (m/s)', 'vs$_0$ (m/s)', fontsize = 12, fontweight = 'bold')
g.plot_marginals(sns.rugplot, height = .05, clip_on = False)
plt.savefig(os.path.join(figspath, 'vels_jointplot-before.png'), dpi = 300, bbox_inches = 'tight')

In [None]:
# fig, ax = plt.subplots(figsize = (12, 12))
g = sns.jointplot(x = 'vp0', y = 'vs0', hue = 'family', data = df_rocks.query("instance == 'after'"), palette = 'deep', height = 15, space = 0.1)
# g.plot_joint(sns.kdeplot, color = 'code', zorder=0, levels=6)
g.set_axis_labels('vp$_0$ (m/s)', 'vs$_0$ (m/s)', fontsize = 12, fontweight = 'bold')
g.plot_marginals(sns.rugplot, height = .05, clip_on = False)
plt.savefig(os.path.join(figspath, 'vels_jointplot-after.png'), dpi = 300, bbox_inches = 'tight')

#### violin: velocities

In [None]:
fig, ax = plt.subplots(figsize = (12, 12))
sns.violinplot(x = 'family', y = 'vp0', hue = 'instance', split = True, data = df_rocks, scale = 'width', palette = 'pastel', bw = 'scott',
               inner = 'quartile')
ax = set_spe_style(ax, title = 'P-wave velocity, $\\theta = 0$, before & after', xlabel = '', ylabel = 'v$_p$(0) (m/s)')
plt.savefig(os.path.join(figspath, 'velp_violin.png'), dpi = 300, bbox_inches = 'tight')

In [None]:
fig, ax = plt.subplots(figsize = (12, 12))
sns.pointplot(x="instance", y = "vp0", hue = "family", data = df_rocks, ax = ax, errwidth = 1, capsize = 0.1)
ax = set_spe_style(ax, title = 'Mean of P-wave velocity, $\\theta = 0$, before & after', xlabel = '', ylabel = 'v$_p$(0) (m/s)')
plt.savefig(os.path.join(figspath, 'velp_pointplot.png'), dpi = 300, bbox_inches = 'tight')

In [None]:
fig, ax = plt.subplots(figsize = (12, 12))
sns.violinplot(x = 'family', y = 'vs0', hue = 'instance', split = True, data = df_rocks, scale = 'width', palette = 'pastel', bw = 'scott',
               inner = 'quartile')
ax = set_spe_style(ax, title = 'S-wave velocity, $\\theta = 0$, before & after', xlabel = '', ylabel = 'v$_s$(0) (m/s)')
plt.savefig(os.path.join(figspath, 'vels_violin.png'), dpi = 300, bbox_inches = 'tight')

In [None]:
fig, ax = plt.subplots(figsize = (12, 12))
sns.pointplot(x="instance", y = "vs0", hue = "family", data = df_rocks, ax = ax, errwidth = 1, capsize = 0.1)
ax = set_spe_style(ax, title = 'Mean of S-wave velocity, $\\theta = 0$, before & after', xlabel = '', ylabel = 'v$_s$(0) (m/s)')
plt.savefig(os.path.join(figspath, 'vels_pointplot.png'), dpi = 300, bbox_inches = 'tight')

### mech

In [None]:
mech_cols = pp.mech_cols[::2]

#### pairplot

In [None]:
g = sns.pairplot(df_rocks.query("instance == 'before'").loc[:, ['family', 'code'] + mech_cols], hue="family", 
                 diag_kind = 'kde', corner = True, markers = markers, dropna = True, height = 3)
plt.savefig(os.path.join(figspath, 'mech_pairplot-before.png'), dpi = 300, bbox_inches = 'tight')
# g.map_lower(sns.kdeplot, levels=4, color=".2")

In [None]:
g = sns.pairplot(df_rocks.query("instance == 'after'").loc[:, ['family', 'code'] + mech_cols], hue="family", 
                 diag_kind = 'kde', corner = True, markers = markers, dropna = True, height = 3)
# g.map_lower(sns.kdeplot, levels=4, color=".2")
plt.savefig(os.path.join(figspath, 'mech_pairplot-after.png'), dpi = 300, bbox_inches = 'tight')


#### scatter

In [None]:
g = sns.jointplot(x = 'mech_e0', y = 'mech_k0', hue = 'family', data = df_rocks.query("instance == 'before'"), palette = 'deep', height = 15, space = 0.1)
# g.plot_joint(sns.kdeplot, color = 'code', zorder=0, levels=6)
g.set_axis_labels('E (GPa)', 'K (GPa)', fontsize = 12, fontweight = 'bold')
g.plot_marginals(sns.rugplot, height = .05, clip_on = False)
plt.savefig(os.path.join(figspath, 'mech_jointplot-before.png'), dpi = 300, bbox_inches = 'tight')

In [None]:
g = sns.jointplot(x = 'mech_e0', y = 'mech_k0', hue = 'family', data = df_rocks.query("instance == 'after'"), palette = 'deep', height = 15, space = 0.1)
# g.plot_joint(sns.kdeplot, color = 'code', zorder=0, levels=6)
g.set_axis_labels('E (GPa)', 'K (GPa)', fontsize = 12, fontweight = 'bold')
g.plot_marginals(sns.rugplot, height = .05, clip_on = False)
plt.savefig(os.path.join(figspath, 'mech_jointplot-after.png'), dpi = 300, bbox_inches = 'tight')

#### violin

In [None]:
fig, ax = plt.subplots(figsize = (12, 12))
sns.violinplot(x = 'family', y = 'mech_e0', hue = 'instance', split = True, data = df_rocks, scale = 'width', palette = 'pastel', bw = 'scott',
               inner = 'quartile')
ax = set_spe_style(ax, title = 'Young\'s modulus, $\\theta = 0$, before & after', xlabel = '', ylabel = 'E(0) (GPa)')
plt.savefig(os.path.join(figspath, 'meche_violin.png'), dpi = 300, bbox_inches = 'tight')

In [None]:
fig, ax = plt.subplots(figsize = (12, 12))
sns.pointplot(x="instance", y = "mech_e0", hue = "family", data = df_rocks, ax = ax, errwidth = 1, capsize = 0.1)
ax = set_spe_style(ax, title = 'Mean of Young\'s modulus, $\\theta = 0$, before & after', xlabel = '', ylabel = 'E(0) (GPa)')
plt.savefig(os.path.join(figspath, 'meche_pointplot.png'), dpi = 300, bbox_inches = 'tight')

In [None]:
fig, ax = plt.subplots(figsize = (12, 12))
sns.violinplot(x = 'family', y = 'mech_k0', hue = 'instance', split = True, data = df_rocks, scale = 'width', palette = 'pastel', bw = 'scott',
               inner = 'quartile')
ax = set_spe_style(ax, title = 'Bulk modulus, $\\theta = 0$, before & after', xlabel = '', ylabel = '$\\kappa$(0) (GPa)')
plt.savefig(os.path.join(figspath, 'mechk_violin.png'), dpi = 300, bbox_inches = 'tight')

In [None]:
fig, ax = plt.subplots(figsize = (12, 12))
sns.pointplot(x="instance", y = "mech_k0", hue = "family", data = df_rocks, ax = ax, errwidth = 1, capsize = 0.1)
ax = set_spe_style(ax, title = 'Mean of bulk modulus, $\\theta = 0$, before & after', xlabel = '', ylabel = '$\\kappa$(0) (GPa)')
plt.savefig(os.path.join(figspath, 'mechk_pointplot.png'), dpi = 300, bbox_inches = 'tight')

In [None]:
fig, ax = plt.subplots(figsize = (12, 12))
sns.violinplot(x = 'family', y = 'mech_n0', hue = 'instance', split = True, data = df_rocks, scale = 'width', palette = 'pastel', bw = 'scott',
               inner = 'quartile')
ax = set_spe_style(ax, title = 'Poisson ratio, $\\theta = 0$, before & after', xlabel = '', ylabel = '$\\nu$(0)')
plt.savefig(os.path.join(figspath, 'mechn_violin.png'), dpi = 300, bbox_inches = 'tight')

In [None]:
fig, ax = plt.subplots(figsize = (12, 12))
sns.pointplot(x="instance", y = "mech_n0", hue = "family", data = df_rocks, ax = ax, errwidth = 1, capsize = 0.1)
ax = set_spe_style(ax, title = 'Mean of Poisson ratio, $\\theta = 0$, before & after', xlabel = '', ylabel = '$\\nu$(0)')
plt.savefig(os.path.join(figspath, 'mechn_pointplot.png'), dpi = 300, bbox_inches = 'tight')

### ftir (max peak)

In [None]:
fig, ax = plt.subplots(figsize = (12, 12))
sns.stripplot(y = 'l_max_peak', x = 'family', hue = 'instance', dodge = True, data = df_rocks, 
              palette = 'viridis', ax = ax)
# plt.yscale('log')
ax = set_spe_style(ax, title = 'Wavelength of maximum spectral peak', xlabel = '', ylabel = r'Wavenumber (cm$^{-1}$)')
plt.savefig(os.path.join(figspath, 'ftirmaxloc_stripplot.png'), dpi = 300, bbox_inches = 'tight')

In [None]:
fig, ax = plt.subplots(figsize = (12, 12))
sns.violinplot(x = 'family', y = 'l_max_peak', hue = 'instance', split = True, data = df_rocks, scale = 'width', palette = 'pastel', bw = 'scott',
               inner = 'quartile')
ax = set_spe_style(ax, title = 'DIstribution of wavelength at maximum spectral peak', xlabel = '', ylabel = r'Wavenumber (cm$^{-1}$)')
plt.savefig(os.path.join(figspath, 'ftirmaxloc_violin.png'), dpi = 300, bbox_inches = 'tight')

In [None]:
fig, ax = plt.subplots(figsize = (12, 12))
sns.pointplot(x="instance", y = "l_max_peak", hue = "family", data = df_rocks, ax = ax, errwidth = 1, capsize = 0.1)
ax = set_spe_style(ax, title = 'Mean of wavelength at maximum spectral peak, before & after', xlabel = '', ylabel = r'Wavenumber (cm$^{-1}$)')
plt.savefig(os.path.join(figspath, 'ftirmaxloc_pointplot.png'), dpi = 300, bbox_inches = 'tight')

In [None]:
ftir = df_rocks.loc[:, ['family', 'instance'] + pp.ftir_cols].dropna()
newcolumns = list(ftir.columns[:2].values) + list(pp.ftir_lambdas)
ftir.columns = newcolumns
# normalize the input for each sample
ftir.iloc[:, 2:] = normalize(ftir.iloc[:, 2:], axis = 1)
ftir.sort_values(by = ['family', 'instance'], ascending = [True, False], inplace = True)

In [None]:
fig, ax = plt.subplots(figsize = (12, 12))
# ftir.shape
# ftir.loc[:, 'lambdas'] = np.nan
# ftir.loc[pp.ftir_cols, 'lambdas'] = pp.ftir_lambdas
sep = 0
for n, g in enumerate(ftir.groupby(['family', 'instance'], sort = True)): #.mean().T.plot(figsize = (12, 6))
    v =' '.join(g[0])
    r = np.mod(n, 2)
    if r == 0:
        sep += 0.03

    mu = g[1].mean() + r * 0.01 +  sep
    sd = g[1].std()
    ax.plot(mu, label = v)
    ax.fill_between(pp.ftir_lambdas, mu - sd, mu + sd, alpha = 0.5)
    # ax.plot()
    # g[1].iloc[:, 2:] += n
    # f = g[1].melt(id_vars = ['family', 'instance'], value_vars = pp.ftir_lambdas)
    # sns.lineplot(x = 'variable', y = 'value', data = f)
plt.yticks([0.06, 0.09, 0.12], ['carbonate', 'sandstone', 'shale'], rotation = 90)
plt.xlabel(r'Wavenumber (cm$^{-1}$)')
plt.ylabel('FTIR reflectance (a.u.)')
plt.legend(ncol = 3, markerscale = 0.01, loc = 'lower center', bbox_to_anchor=(0.5, -0.17))

In [None]:
# f = ftir.melt(id_vars = ['family', 'instance'], value_vars = pp.ftir_lambdas)

In [None]:
# lambda_dict = {k:v for k, v in zip(pp.ftir_cols, pp.ftir_lambdas)}
# f.loc[:, 'variable'] = f.variable.apply(lambda x: lambda_dict[x])

In [None]:
# sns.lineplot(x="variable", y="value",
#              hue="family", style="instance",
#              data=f)
fig, ax = plt.subplots(figsize = (12, 12))
ax.pcolorfast(ftir.query("instance == 'before'").iloc[:, 2:], cmap = 'viridis')

In [None]:
# fig, ax = plt.subplots(figsize = (12, 12))
# sns.stripplot(y = 'perm', x = 'family', hue = 'instance', dodge = True, data = df_perm_bna, palette = 'viridis', ax = ax)
# plt.yscale('log')
# sns.set_style('darkgrid')
# plt.title('permeability before & after');
# plt.xlabel('')
# plt.ylabel('Permeability (mD)')

In [None]:
# fig, ax = plt.subplots(figsize = (12, 12))
# sns.boxplot(x = 'family', y = 'perm', hue = 'instance', data = df_perm_bna, dodge = True, width = 0.5)
# plt.yscale('log')

In [None]:
# df_perm_before_clipped = df_perm_before.copy()
# df_perm_before_clipped.loc[:, 'perm'] = df_perm_before_clipped.perm.clip(lower = 0, upper = 500)

In [None]:
# fig, ax = plt.subplots(figsize = (12, 12))
# sns.violinplot(y = 'perm', x = 'code', hue = 'family', data = df_perm_before_clipped, palette = 'viridis', ax = ax)
# sns.set_style('darkgrid')
# plt.title('permeability before');

In [None]:
# fig, ax = plt.subplots(figsize = (12, 12))
# sns.boxplot(y = 'perm', x = 'code', hue = 'family', data = df_perm_before_clipped, palette = 'viridis', ax = ax)
# sns.set_style('darkgrid')
# plt.title('permeability before');

In [None]:
# fig, ax = plt.subplots(figsize = (12, 12))
# sns.kdeplot(x = 'perm',  hue = 'code', data = df_perm_before_clipped, 
#             palette = 'viridis', shade = 'fill', ax = ax)
# sns.set_style('darkgrid')
# plt.title('permeability before');

In [None]:
# tags = ds.tag.str.split('_', expand = True)#.apply(lambda x: pp.get_material_density(x))
# tags[1] = 0.0
# unique_tags = tags[0].unique()
# # tags.set_index([0, tags.index], inplace = True)