In [1]:
import numpy as np
import os
import scipy as sp
import matplotlib.pyplot as plt

In [2]:
in_dir = "../../figures/2D/PIPNet2D_model/eval_experimental/"
fig_dir = "../../figures/2D/PIPNet2D_model/compare_linewidths/"

if not os.path.exists(fig_dir):
    os.mkdir(fig_dir)
    
peak_thresh = 1e-2

In [3]:
data = []
names = []
for f in os.listdir(in_dir):
    if f.endswith(".mat") and ("sheared" in f or "exsy" in f):
        print(f)
        data.append(sp.io.loadmat(in_dir + f))
        names.append(f.split(".mat")[0])

20220215_tyrosine_baba_vmas_9_sheared_preds.mat
20230118_tyrosine_exsy_vmas_07mm_preds.mat
20220216_ampicillin_baba_vmas_9_sheared_preds.mat


In [4]:
def find_peaks(spec, thresh=1e-3, extent=5):
    
    nx, ny = spec.shape
    
    peaks = []
    
    for i in range(extent, nx-extent):
        for j in range(extent, ny-extent):
            
            # Check for maximum:
            if (spec[i, j] > thresh and spec[i, j] >= np.max(spec[i-extent:i+extent, j-extent:j+extent])):
                peaks.append([i, j])
    
    
    return np.array(peaks).T

In [5]:
def gauss(x, mu, sig, h):
    
    return h * np.exp(-np.square(x-mu) / (2 * sig**2))

In [6]:
def fwhm(sig):
    
    return 2 * np.sqrt(2 * np.log(2)) * sig

In [7]:
def get_linewidths(x, y, x_peaks, y_peaks, xvals, yvals, tol=3, fit_ext=10):
    
    x_row_lws = []
    y_row_lws = []
    x_col_lws = []
    y_col_lws = []
    row_freq = []
    col_freq = []
    
    for yi, yj in y_peaks.T:
        
        found = False
        for i in range(yi-tol, yi+tol+1):
            for j in range(yj-tol, yj+tol+1):
                if [i, j] in x_peaks.T:
                    found = True
                    break
            
            if found:
                break
        
        # Get row linewidth
        xs = xvals[yi-fit_ext:yi+fit_ext]
        x_row = x[yi-fit_ext:yi+fit_ext, yj]
        y_row = y[yi-fit_ext:yi+fit_ext, yj]
        
        x_guess = [xs[np.argmax(x_row)], np.abs(xs[-1]-xs[0]), np.max(x_row)]
        y_guess = [xs[np.argmax(y_row)], np.abs(xs[-1]-xs[0]), np.max(y_row)]
        
        valid = False
        try:
            x_popt, _ = sp.optimize.curve_fit(gauss, xs, x_row, p0=x_guess, bounds=([np.min(xs), 0., 0.], [np.max(xs), np.inf, 1.]))
            y_popt, _ = sp.optimize.curve_fit(gauss, xs, y_row, p0=y_guess, bounds=([np.min(xs), 0., 0.], [np.max(xs), np.inf, 1.]))
            valid = True
        except:
            print(f"Unable to fit row {yi}")
        
        if valid:
            x_row_lws.append(fwhm(x_popt[1]))
            y_row_lws.append(fwhm(y_popt[1]))
            row_freq.append([xvals[yi], yvals[yj]])
        
        # Get col linewidth
        xs = yvals[yj-fit_ext:yj+fit_ext]
        x_col = x[yi, yj-fit_ext:yj+fit_ext]
        y_col = y[yi, yj-fit_ext:yj+fit_ext]
        
        x_guess = [xs[np.argmax(x_col)], np.abs(xs[-1]-xs[0]), np.max(x_col)]
        y_guess = [xs[np.argmax(y_col)], np.abs(xs[-1]-xs[0]), np.max(y_col)]
        
        valid = False
        try:
            x_popt, _ = sp.optimize.curve_fit(gauss, xs, x_col, p0=x_guess, bounds=([np.min(xs), 0., 0.], [np.max(xs), np.inf, 1.]))
            y_popt, _ = sp.optimize.curve_fit(gauss, xs, y_col, p0=y_guess, bounds=([np.min(xs), 0., 0.], [np.max(xs), np.inf, 1.]))
            valid = True
        except:
            print(f"Unable to fit col {yj}")
        
        if valid:
            x_col_lws.append(fwhm(x_popt[1]))
            y_col_lws.append(fwhm(y_popt[1]))
            col_freq.append([xvals[yi], yvals[yj]])
                
    return np.array(x_row_lws), np.array(y_row_lws), np.array(x_col_lws), np.array(y_col_lws), row_freq, col_freq

In [8]:
all_x_row_lws = []
all_y_row_lws = []
all_x_col_lws = []
all_y_col_lws = []

all_row_freqs = []
all_col_freqs = []

for mat, name in zip(data, names):
    
    ppm_x = mat["ppm_x"]
    ppm_y = mat["ppm_y"]
    X = mat["X"]
    wr = mat["wr"]
    y = mat["pred"].T
    
    # Get highest MAS spectrum
    iwr = np.argmax(wr)
    x = X[iwr].T
    
    # Print found peaks
    
    x_peaks = find_peaks(x, thresh=peak_thresh, extent=10)
    
    fig = plt.figure(figsize=(4,3))
    ax = fig.add_subplot(1,1,1)
    ax.contour(x, levels=np.logspace(-2., -0.1, 10), colors="C0", linewidths=0.5, zorder=0)
    ax.scatter(x_peaks[1], x_peaks[0], color="C1", s=10, marker="x", linewidth=0.5, zorder=1)
    fig.tight_layout()
    fig.savefig(f"{fig_dir}{name}_x.pdf")
    plt.close()
    
    y_peaks = find_peaks(y, thresh=peak_thresh, extent=10)
    
    fig = plt.figure(figsize=(4,3))
    ax = fig.add_subplot(1,1,1)
    ax.contour(y, levels=np.logspace(-2., -0.1, 10), colors="C0", linewidths=0.5, zorder=0)
    ax.scatter(y_peaks[1], y_peaks[0], color="C1", s=10, marker="x", linewidth=0.5, zorder=1)
    fig.tight_layout()
    fig.savefig(f"{fig_dir}{name}_y.pdf")
    plt.close()
    
    x_row_lws, y_row_lws, x_col_lws, y_col_lws, row_freq, col_freq = get_linewidths(x, y, x_peaks, y_peaks, ppm_x[0], ppm_y[0])
    
    all_x_row_lws.append(x_row_lws)
    all_y_row_lws.append(y_row_lws)
    all_x_col_lws.append(x_col_lws)
    all_y_col_lws.append(y_col_lws)
    
    all_row_freqs.append(row_freq)
    all_col_freqs.append(col_freq)

Unable to fit row 340
Unable to fit col 215
Unable to fit row 498
Unable to fit col 395
Unable to fit row 643
Unable to fit col 565
Unable to fit row 658
Unable to fit col 573
Unable to fit row 675
Unable to fit col 598
Unable to fit row 718
Unable to fit col 648
Unable to fit row 808
Unable to fit col 747
Unable to fit row 820
Unable to fit col 760
Unable to fit row 861
Unable to fit col 807
Unable to fit col 956


In [21]:
offsets = {
    "20220215_tyrosine_baba_vmas_9_sheared_preds": 1.7,
    "20220216_ampicillin_baba_vmas_9_sheared_preds": 1.7,
    "20230118_tyrosine_exsy_vmas_07mm_preds": 2.7,
}

In [23]:
for name, x_row_lws, y_row_lws, x_col_lws, y_col_lws, row_freq, col_freq in zip(names, all_x_row_lws, all_y_row_lws, all_x_col_lws, all_y_col_lws, all_row_freqs, all_col_freqs):
    
    print(name)
    
    for xrl, yrl, rf in zip(x_row_lws, y_row_lws, row_freq):
        
        print(f"{rf[0]+offsets[name]:6.2f} / {rf[1]+offsets[name]:6.2f} ppm: {xrl:8.4f} / {yrl:8.4f} ppm row FWHM")
    
    print("")
    
    for xcl, ycl, cf in zip(x_col_lws, y_col_lws, col_freq):
        
        print(f"{cf[0]+offsets[name]:6.2f} / {cf[1]+offsets[name]:6.2f} ppm: {xcl:8.4f} / {ycl:8.4f} ppm col FWHM")

20220215_tyrosine_baba_vmas_9_sheared_preds
 12.54 /   2.51 ppm:   0.3580 /   0.1374 ppm row FWHM
 12.47 /   6.59 ppm:   0.2690 /   0.1001 ppm row FWHM
 12.42 /   9.98 ppm:   0.2750 /   0.1826 ppm row FWHM
 12.37 /   5.07 ppm:   0.2811 /   0.2395 ppm row FWHM
 12.32 /   7.57 ppm:   0.2829 /   0.2126 ppm row FWHM
 12.27 /   4.38 ppm:   0.2889 /   0.0888 ppm row FWHM
 12.25 /   2.36 ppm:   0.3115 /   0.1153 ppm row FWHM
  9.93 /   6.55 ppm:   0.2865 /   0.1923 ppm row FWHM
  9.93 /   5.09 ppm:   0.3001 /   0.1854 ppm row FWHM
  9.91 /   4.36 ppm:   0.3066 /   0.1664 ppm row FWHM
  9.88 /   7.50 ppm:   0.3153 /   0.2873 ppm row FWHM
  9.88 /   2.38 ppm:   0.1813 /   0.0624 ppm row FWHM
  9.86 /  12.30 ppm:   0.2849 /   0.1749 ppm row FWHM
  9.83 /   9.91 ppm:   0.2644 /   0.0492 ppm row FWHM
  7.61 /  12.34 ppm:   0.4511 /   0.0958 ppm row FWHM
  7.61 /   7.59 ppm:   0.4593 /   0.3295 ppm row FWHM
  7.59 /   4.44 ppm:   0.5136 /   0.3615 ppm row FWHM
  7.56 /   2.43 ppm:   0.4666 /   0.17

In [24]:
all_x_row_lws = np.concatenate(all_x_row_lws)
all_y_row_lws = np.concatenate(all_y_row_lws)
all_x_col_lws = np.concatenate(all_x_col_lws)
all_y_col_lws = np.concatenate(all_y_col_lws)

In [11]:
print(np.mean(all_x_row_lws / all_y_row_lws), np.std(all_x_row_lws / all_y_row_lws)/np.sqrt(all_x_row_lws.shape[0]))
print(np.mean(all_x_col_lws / all_y_col_lws), np.std(all_x_col_lws / all_y_col_lws)/np.sqrt(all_x_col_lws.shape[0]))

3.971935259345726 0.9724415741652381
3.591523525644185 0.13937467814221313


In [12]:
print(np.mean(all_y_row_lws / all_x_row_lws), np.std(all_y_row_lws / all_x_row_lws)/np.sqrt(all_x_row_lws.shape[0]))
print(np.mean(all_y_col_lws / all_x_col_lws), np.std(all_y_col_lws / all_x_col_lws)/np.sqrt(all_x_col_lws.shape[0]))

0.39394307981338 0.015503592226853316
0.3260841806358969 0.012080601248510667


In [13]:
all_x_lws = np.concatenate([all_x_row_lws, all_x_col_lws])
all_y_lws = np.concatenate([all_y_row_lws, all_y_col_lws])
print(np.mean(all_x_lws / all_y_lws), np.std(all_x_lws / all_y_lws) / np.sqrt(all_y_lws.shape[0]))
print(np.mean(all_y_lws / all_x_lws), np.std(all_y_lws / all_x_lws) / np.sqrt(all_y_lws.shape[0]))

3.7825673038026677 0.49342796837406905
0.360163099165558 0.010092409218865915


In [39]:
pp = """2.5	12.5	0.56/0.09	0.36/0.14	1.8	10.2	0.52/0.11	0.48/0.09	10.0	12.5	0.19/0.09	0.18/0.10
6.6	12.5	0.34/0.08	0.27/0.10	10.1	10.1	0.79/0.43	0.58/0.30	12.4	9.9	0.17/0.09	0.18/0.07
10.0	12.4	0.35/0.19	0.28/0.18	4.8	10.1	0.61/0.27	0.52/0.24	7.6	9.9	0.35/0.17	0.28/0.14
5.1	12.4	0.47/0.20	0.28/0.24	7.5	10.0	0.57/0.29	0.44/0.21	7.3	9.9	0.33/0.10	0.26/0.08
4.4	12.3	0.43/0.16	0.29/0.09	10.1	7.5	0.67/0.16	0.48/0.14	6.5	9.9	0.25/0.06	0.23/0.07
2.4	12.3	0.36/0.08	0.31/0.12	4.6	7.4	0.54/0.24	0.50/0.31	10.0	7.7	0.26/0.12	0.37/0.14
6.6	9.9	0.49/0.11	0.29/0.19	6.9	7.1	0.76/0.23	1.22/0.35	5.4	7.6	0.31/0.12	0.37/0.14
5.1	9.9	0.39/0.11	0.30/0.19	0.6	7.1	0.86/0.17	0.72/0.17	6.6	7.6	0.38/0.14	0.53/0.17
4.4	9.9	0.44/0.13	0.31/0.17	5.2	7.0	0.74/0.31	0.64/0.26	5.0	7.6	0.50/0.11	0.48/0.15
7.5	9.9	0.80/0.10	0.32/0.29	4.0	6.5	0.47/0.14	0.44/0.13	4.6	7.6	0.39/0.06	0.47/0.12
2.4	9.9	0.76/0.30	0.18/0.06	5.2	6.5	0.50/0.23	0.61/0.25	5.1	7.3	0.31/0.11	0.35/0.10
12.3	9.9	0.40/0.07	0.28/0.17	1.7	6.5	0.42/0.12	0.39/0.13	6.6	7.3	0.29/0.12	0.34/0.12
9.9	9.9	0.36/0.15	0.26/0.05	7.0	5.2	0.75/0.29	0.51/0.26	4.3	7.3	0.35/0.08	0.33/0.07
12.3	7.6	0.25/0.06	0.45/0.10	0.6	5.2	0.49/0.17	0.41/0.21	9.9	7.3	0.23/0.07	0.40/0.07
7.6	7.6	0.51/0.17	0.46/0.33	1.5	5.0	0.40/0.15	0.38/0.12	7.6	6.6	0.56/0.12	0.39/0.14
4.4	7.6	0.73/0.41	0.51/0.36	10.1	4.7	0.54/0.19	0.43/0.16	4.4	6.6	0.58/0.17	0.34/0.12
2.4	7.6	0.59/0.11	0.47/0.17	7.4	4.6	0.61/0.38	0.45/0.17	7.3	6.6	0.29/0.12	0.28/0.13
10.0	7.5	0.52/0.12	0.60/0.24	1.8	4.2	0.34/0.19	0.30/0.14	9.9	6.5	0.20/0.05	0.23/0.05
6.5	7.2	0.53/0.11	0.46/0.17	10.1	4.1	0.45/0.18	0.54/0.12	2.6	5.5	0.34/0.08	0.32/0.05
4.9	7.2	1.23/0.22	0.55/0.19	6.4	4.1	0.38/0.14	0.42/0.14	7.6	5.4	0.35/0.15	0.29/0.10
4.4	7.2	0.70/0.19	0.64/0.34	0.6	4.0	0.32/0.11	0.25/0.10	4.6	5.4	0.48/0.12	0.45/0.12
10.0	6.7	0.45/0.12	0.35/0.10	1.4	3.8	0.33/0.22	0.29/0.21	2.5	5.1	0.42/0.10	0.77/0.11
12.3	6.6	0.39/0.09	0.29/0.07	7.1	1.7	0.49/0.18	0.51/0.08	7.3	5.1	0.29/0.09	0.36/0.09
7.2	6.6	0.48/0.12	0.38/0.11	10.0	1.7	0.53/0.15	0.38/0.11	4.5	5.1	0.83/0.35	0.61/0.20
4.3	6.6	0.54/0.09	0.39/0.15	5.2	1.7	0.41/0.22	0.34/0.17	7.6	4.6	0.39/0.06	0.29/0.05
2.4	6.6	0.59/0.07	0.34/0.08	4.0	1.7	0.34/0.22	0.31/0.14	5.3	4.6	0.41/0.18	0.43/0.16
5.2	6.6	0.59/0.16	0.35/0.18	1.6	1.7	0.46/0.28	0.40/0.25	2.5	4.5	0.45/0.13	0.53/0.14
5.1	5.4	0.50/0.13	0.35/0.12	0.6	1.7	0.52/0.25	0.39/0.22	5.1	4.4	0.40/0.16	0.58/0.22
2.5	5.4	0.50/0.15	0.36/0.14	3.8	1.4	0.33/0.19	0.31/0.18	6.6	4.3	0.30/0.12	0.44/0.17
10.0	5.3	0.57/0.11	0.52/0.13	1.6	0.7	0.60/0.25	0.49/0.27	7.3	4.3	0.28/0.06	0.30/0.07
4.3	5.3	0.60/0.07	0.49/0.16	0.6	0.7	0.61/0.35	0.48/0.29	5.4	2.5	0.29/0.07	0.32/0.09
6.5	5.2	0.53/0.13	0.47/0.23	2.7	0.6	0.77/0.16	0.45/0.14	5.1	2.5	0.38/0.08	0.35/0.10
12.4	5.2	0.50/0.13	0.76/0.14	7.0	0.6	0.66/0.14	0.44/0.17	4.5	2.5	0.52/0.12	0.42/0.14
5.3	5.1	0.70/0.15	0.62/0.15	5.1	0.6	0.52/0.27	0.41/0.21				
7.3	5.1	0.53/0.13	0.55/0.19								
4.3	4.5	0.67/0.11	0.56/0.29								
7.5	4.5	0.90/0.30	0.56/0.31								
12.2	4.4	0.53/0.08	0.84/0.08								
2.4	4.4	0.68/0.22	0.60/0.33								
6.5	4.4	0.71/0.13	0.55/0.22								
12.4	2.5	0.45/0.10	0.51/0.07								
4.4	2.5	0.67/0.22	0.45/0.23								
7.5	2.4	0.72/0.12	0.40/0.17								
5.3	2.4	0.57/0.14	0.38/0.17								
"""

In [40]:
xs = []
ys = []
for p in pp.split("\n"):
    for pi in p.split():
        if "/" in pi:
            xs.append(float(pi.split("/")[0]))
            ys.append(float(pi.split("/")[1]))

xs = np.array(xs)
ys = np.array(ys)

In [44]:
np.mean(xs / ys), np.std(xs / ys) / np.sqrt(xs.shape[0])

(3.326172974128364, 0.10004582908499104)

In [45]:
np.mean(ys / xs), np.std(ys / xs) / np.sqrt(xs.shape[0])

(0.3574354259750803, 0.009794168120234533)