In [1]:
import numpy as np
import os
import scipy as sp
import matplotlib.pyplot as plt

In [2]:
in_dir = "../../figures/2D/PIPNet2D_model/eval_experimental/"
fig_dir = "../../figures/2D/PIPNet2D_model/compare_linewidths/"

if not os.path.exists(fig_dir):
    os.mkdir(fig_dir)
    
peak_thresh = 1e-2

In [3]:
data = []
names = []
for f in os.listdir(in_dir):
    if f.endswith(".mat") and ("sheared" in f or "exsy" in f):
        print(f)
        data.append(sp.io.loadmat(in_dir + f))
        names.append(f.split(".mat")[0])

20220215_tyrosine_baba_vmas_9_sheared_preds.mat
20220216_ampicillin_baba_vmas_9_sheared_preds.mat
20211029_tyrosine_exsy_clean_9_preds.mat


In [4]:
def find_peaks(spec, thresh=1e-3, extent=5):
    
    nx, ny = spec.shape
    
    peaks = []
    
    for i in range(extent, nx-extent):
        for j in range(extent, ny-extent):
            
            # Check for maximum:
            if (spec[i, j] > thresh and spec[i, j] >= np.max(spec[i-extent:i+extent, j-extent:j+extent])):
                peaks.append([i, j])
    
    
    return np.array(peaks).T

In [5]:
def gauss(x, mu, sig, h):
    
    return h * np.exp(-np.square(x-mu) / (2 * sig**2))

In [6]:
def fwhm(sig):
    
    return 2 * np.sqrt(2 * np.log(2)) * sig

In [61]:
def get_linewidths(x, y, x_peaks, y_peaks, xvals, yvals, tol=3, fit_ext=10):
    
    x_row_lws = []
    y_row_lws = []
    x_col_lws = []
    y_col_lws = []
    row_freq = []
    col_freq = []
    
    for yi, yj in y_peaks.T:
        
        found = False
        for i in range(yi-tol, yi+tol+1):
            for j in range(yj-tol, yj+tol+1):
                if [i, j] in x_peaks.T:
                    found = True
                    break
            
            if found:
                break
        
        # Get row linewidth
        xs = xvals[yi-fit_ext:yi+fit_ext]
        x_row = x[yi-fit_ext:yi+fit_ext, yj]
        y_row = y[yi-fit_ext:yi+fit_ext, yj]
        
        x_guess = [xs[np.argmax(x_row)], np.abs(xs[-1]-xs[0]), np.max(x_row)]
        y_guess = [xs[np.argmax(y_row)], np.abs(xs[-1]-xs[0]), np.max(y_row)]
        
        valid = False
        try:
            x_popt, _ = sp.optimize.curve_fit(gauss, xs, x_row, p0=x_guess, bounds=([np.min(xs), 0., 0.], [np.max(xs), np.inf, 1.]))
            y_popt, _ = sp.optimize.curve_fit(gauss, xs, y_row, p0=y_guess, bounds=([np.min(xs), 0., 0.], [np.max(xs), np.inf, 1.]))
            valid = True
        except:
            print(f"Unable to fit row {yi}")
        
        if valid:
            x_row_lws.append(fwhm(x_popt[1]))
            y_row_lws.append(fwhm(y_popt[1]))
            row_freq.append([xvals[yi], yvals[yj]])
        
        # Get col linewidth
        xs = yvals[yj-fit_ext:yj+fit_ext]
        x_col = x[yi, yj-fit_ext:yj+fit_ext]
        y_col = y[yi, yj-fit_ext:yj+fit_ext]
        
        x_guess = [xs[np.argmax(x_col)], np.abs(xs[-1]-xs[0]), np.max(x_col)]
        y_guess = [xs[np.argmax(y_col)], np.abs(xs[-1]-xs[0]), np.max(y_col)]
        
        valid = False
        try:
            x_popt, _ = sp.optimize.curve_fit(gauss, xs, x_col, p0=x_guess, bounds=([np.min(xs), 0., 0.], [np.max(xs), np.inf, 1.]))
            y_popt, _ = sp.optimize.curve_fit(gauss, xs, y_col, p0=y_guess, bounds=([np.min(xs), 0., 0.], [np.max(xs), np.inf, 1.]))
            valid = True
        except:
            print(f"Unable to fit col {yj}")
        
        if valid:
            x_col_lws.append(fwhm(x_popt[1]))
            y_col_lws.append(fwhm(y_popt[1]))
            col_freq.append([xvals[yi], yvals[yj]])
                
    return np.array(x_row_lws), np.array(y_row_lws), np.array(x_col_lws), np.array(y_col_lws), row_freq, col_freq

In [62]:
all_x_row_lws = []
all_y_row_lws = []
all_x_col_lws = []
all_y_col_lws = []

all_row_freqs = []
all_col_freqs = []

for mat, name in zip(data, names):
    
    ppm_x = mat["ppm_x"]
    ppm_y = mat["ppm_y"]
    X = mat["X"]
    wr = mat["wr"]
    y = mat["pred"].T
    
    # Get highest MAS spectrum
    iwr = np.argmax(wr)
    x = X[iwr].T
    
    # Print found peaks
    
    x_peaks = find_peaks(x, thresh=peak_thresh, extent=10)
    
    fig = plt.figure(figsize=(4,3))
    ax = fig.add_subplot(1,1,1)
    ax.contour(x, levels=np.logspace(-2., -0.1, 10), colors="C0", linewidths=0.5, zorder=0)
    ax.scatter(x_peaks[1], x_peaks[0], color="C1", s=10, marker="x", linewidth=0.5, zorder=1)
    fig.tight_layout()
    fig.savefig(f"{fig_dir}{name}_x.pdf")
    plt.close()
    
    y_peaks = find_peaks(y, thresh=peak_thresh, extent=10)
    
    fig = plt.figure(figsize=(4,3))
    ax = fig.add_subplot(1,1,1)
    ax.contour(y, levels=np.logspace(-2., -0.1, 10), colors="C0", linewidths=0.5, zorder=0)
    ax.scatter(y_peaks[1], y_peaks[0], color="C1", s=10, marker="x", linewidth=0.5, zorder=1)
    fig.tight_layout()
    fig.savefig(f"{fig_dir}{name}_y.pdf")
    plt.close()
    
    x_row_lws, y_row_lws, x_col_lws, y_col_lws, row_freq, col_freq = get_linewidths(x, y, x_peaks, y_peaks, ppm_x[0], ppm_y[0])
    
    all_x_row_lws.append(x_row_lws)
    all_y_row_lws.append(y_row_lws)
    all_x_col_lws.append(x_col_lws)
    all_y_col_lws.append(y_col_lws)
    
    all_row_freqs.append(row_freq)
    all_col_freqs.append(col_freq)

Unable to fit row 127
Unable to fit col 31
Unable to fit row 137
Unable to fit col 80
Unable to fit row 211
Unable to fit col 79
Unable to fit row 216
Unable to fit col 35
Unable to fit row 286
Unable to fit col 122
Unable to fit row 297
Unable to fit col 166
Unable to fit row 320
Unable to fit row 323
Unable to fit col 128
Unable to fit row 372
Unable to fit col 128
Unable to fit row 372
Unable to fit col 168
Unable to fit row 388
Unable to fit col 177
Unable to fit col 138
Unable to fit row 459
Unable to fit col 216


In [63]:
for name, x_row_lws, y_row_lws, x_col_lws, y_col_lws, row_freq, col_freq in zip(names, all_x_row_lws, all_y_row_lws, all_x_col_lws, all_y_col_lws, all_row_freqs, all_col_freqs):
    
    print(name)
    
    for xrl, yrl, rf in zip(x_row_lws, y_row_lws, row_freq):
        
        print(f"{rf[0]:6.2f} / {rf[1]:6.2f} ppm: {xrl:8.4f} / {yrl:8.4f} ppm row FWHM")
    
    for xcl, ycl, cf in zip(x_col_lws, y_col_lws, col_freq):
        
        print(f"{cf[0]:6.2f} / {cf[1]:6.2f} ppm: {xcl:8.4f} / {ycl:8.4f} ppm col FWHM")

20220215_tyrosine_baba_vmas_9_sheared_preds
 10.84 /   0.81 ppm:   0.3580 /   0.1374 ppm row FWHM
 10.77 /   4.89 ppm:   0.2690 /   0.1001 ppm row FWHM
 10.72 /   8.28 ppm:   0.2750 /   0.1826 ppm row FWHM
 10.67 /   3.37 ppm:   0.2811 /   0.2395 ppm row FWHM
 10.62 /   5.87 ppm:   0.2829 /   0.2126 ppm row FWHM
 10.57 /   2.68 ppm:   0.2889 /   0.0888 ppm row FWHM
 10.55 /   0.66 ppm:   0.3115 /   0.1153 ppm row FWHM
  8.23 /   4.85 ppm:   0.2865 /   0.1923 ppm row FWHM
  8.23 /   3.39 ppm:   0.3001 /   0.1854 ppm row FWHM
  8.21 /   2.66 ppm:   0.3066 /   0.1664 ppm row FWHM
  8.18 /   5.80 ppm:   0.3153 /   0.2873 ppm row FWHM
  8.18 /   0.68 ppm:   0.1813 /   0.0624 ppm row FWHM
  8.16 /  10.60 ppm:   0.2849 /   0.1749 ppm row FWHM
  8.13 /   8.21 ppm:   0.2644 /   0.0492 ppm row FWHM
  5.91 /  10.64 ppm:   0.4511 /   0.0958 ppm row FWHM
  5.91 /   5.89 ppm:   0.4593 /   0.3295 ppm row FWHM
  5.89 /   2.74 ppm:   0.5136 /   0.3615 ppm row FWHM
  5.86 /   0.73 ppm:   0.4666 /   0.17

In [67]:
all_x_row_lws = np.concatenate(all_x_row_lws)
all_y_row_lws = np.concatenate(all_y_row_lws)
all_x_col_lws = np.concatenate(all_x_col_lws)
all_y_col_lws = np.concatenate(all_y_col_lws)

ValueError: zero-dimensional arrays cannot be concatenated

In [68]:
print(np.mean(all_x_row_lws / all_y_row_lws), np.std(all_x_row_lws / all_y_row_lws)/np.sqrt(all_x_row_lws.shape[0]))
print(np.mean(all_x_col_lws / all_y_col_lws), np.std(all_x_col_lws / all_y_col_lws)/np.sqrt(all_x_col_lws.shape[0]))

3.9130597628054185 1.2312093747167863
3.396352734004645 0.1747838215075943


In [69]:
print(np.mean(all_y_row_lws / all_x_row_lws), np.std(all_y_row_lws / all_x_row_lws)/np.sqrt(all_x_row_lws.shape[0]))
print(np.mean(all_y_col_lws / all_x_col_lws), np.std(all_y_col_lws / all_x_col_lws)/np.sqrt(all_x_col_lws.shape[0]))

0.4442723331607497 0.01905655048928996
0.37796245455235566 0.02130895276368668


In [71]:
all_x_lws = np.concatenate([all_x_row_lws, all_x_col_lws])
all_y_lws = np.concatenate([all_y_row_lws, all_y_col_lws])
print(np.mean(all_x_lws / all_y_lws), np.std(all_x_lws / all_y_lws) / np.sqrt(all_y_lws.shape[0]))
print(np.mean(all_y_lws / all_x_lws), np.std(all_y_lws / all_x_lws) / np.sqrt(all_y_lws.shape[0]))

3.654706248405031 0.6220749884580342
0.41111739385655266 0.014505614104714369
