# Analyzing nitrate data from xylem sap

In [1]:
import pandas as pd
import numpy as np
import pickle
from scipy.stats import t

In [2]:
#Re-open the curve to use to analyze our samples
with open('../02-calibration_curve/calibration_curve.pkl','rb') as f:
    cal_curve=pickle.load(f)

In [3]:
#Re-define this function so we can use it again
def uncertainty(df, cal, sample_col='sample_id',rf_col='rf'):
    """
    Uses the uncertainty in a calibration curve measurement found in Harris'
    Quantitative Chemical Analysis Eighth Edition (Equation 4-27)
    
    Takes as input our calibration curve (saved as a pickle) and a dataframe
    with multiplicate (e.g. triplicate) measurements to give a single
    mean value and its uncertainty

    Assumes dataframe already has a calculated response factor column,
    could probably be generalized to do this as well but seems unnecessary
    """
    m=cal['slope']
    a=cal['intercept']
    s_y=cal['s_yx']
    x_bar=cal['x_mean']
    Sxx=cal['Sxx']
    n_cal=cal['n']
    y_bar=a+m*x_bar
    t_val=t.ppf(0.975,n_cal-2) #Two-sided 95% confidence interval

    results=[]
    for sid, grp in df.groupby(sample_col):
        y_u=grp[rf_col].to_numpy()
        k=len(y_u)
        y_u_bar=np.mean(y_u)
        x_hat=(y_u_bar-a)/m
        s_x=(s_y/m)*np.sqrt((1/k)+(1/n_cal)+((y_u_bar-y_bar)**2)/(m**2*Sxx)) #Equation 4-27
        ci_lo=x_hat-t_val*s_x
        ci_hi=x_hat+t_val*s_x
        results.append({
            "sample":sid,
            "n_rep":k,
            "rf_mean":y_u_bar,
            "conc_mean":x_hat,
            "s_x":s_x,
            "CI_low":ci_lo,
            "CI_high":ci_hi
        })
    return pd.DataFrame(results)

In [4]:
#Read nitrate data
fs_data=pd.read_csv("../01-input_data/fruit_set.csv")
ver_data_n=pd.read_csv("../01-input_data/veraison-n.csv")
ver_data_u=pd.read_csv("../01-input_data/veraison-u.csv")

In [5]:
fs_data.head(5)

Unnamed: 0,sample,rep,no3_area,istd_area,analysis_day,n_add
0,FS-O-1,1,2.22,10.16,1,0.0
1,FS-O-1,2,2.09,10.71,1,0.0
2,FS-O-1,3,2.24,10.71,1,0.0
3,FS-O-2,1,1.27,13.22,1,0.0
4,FS-O-2,2,1.68,15.58,1,0.0


In [6]:
#Let's add response factors
fs_data['rf']=fs_data['no3_area']/fs_data['istd_area']
ver_data_n['rf']=ver_data_n['no3_area']/ver_data_n['istd_area']
ver_data_u['rf']=ver_data_u['no3_area']/ver_data_u['istd_area']

In [7]:
fs_data.head(5)

Unnamed: 0,sample,rep,no3_area,istd_area,analysis_day,n_add,rf
0,FS-O-1,1,2.22,10.16,1,0.0,0.218504
1,FS-O-1,2,2.09,10.71,1,0.0,0.195145
2,FS-O-1,3,2.24,10.71,1,0.0,0.20915
3,FS-O-2,1,1.27,13.22,1,0.0,0.096067
4,FS-O-2,2,1.68,15.58,1,0.0,0.107831


In [8]:
ver_data_n.head(5)

Unnamed: 0,sample,rep,no3_area,istd_area,n_add,rf
0,V-N-6,1,1.44,8.74,0.25,0.16476
1,V-N-6,2,1.83,10.65,0.25,0.171831
2,V-N-6,3,1.61,9.6,0.25,0.167708
3,V-5N-2,1,1.17,10.55,1.25,0.1109
4,V-5N-2,2,1.14,8.84,1.25,0.128959


In [9]:
ver_data_u.head(5)

Unnamed: 0,sample,rep,no3_area,istd_area,n_add,rf
0,V-15NU-1,1,11.04,10.85,3.75,1.017512
1,V-15NU-1,2,11.47,11.52,3.75,0.99566
2,V-15NU-1,3,11.94,11.45,3.75,1.042795
3,V-15NU-2,1,16.15,10.27,3.75,1.572541
4,V-15NU-2,2,17.72,11.52,3.75,1.538194


In [10]:
#Let's summarize analytical reps since they're less important for what we're looking at
#We'll need to keep n_add info for downstream analysis
fs_summary=uncertainty(fs_data,cal_curve,sample_col="sample",rf_col="rf")
fs_summary.head(5)

Unnamed: 0,sample,n_rep,rf_mean,conc_mean,s_x,CI_low,CI_high
0,FS-15N-1,3,1.894745,0.429943,0.032616,0.362302,0.497584
1,FS-15N-2,3,1.496906,0.338468,0.032737,0.270575,0.406361
2,FS-15N-3,3,3.035858,0.69232,0.032417,0.625091,0.759548
3,FS-15N-4,3,8.154848,1.86933,0.034238,1.798324,1.940336
4,FS-15N-5,3,3.852337,0.880053,0.032412,0.812834,0.947272


In [11]:
#Merge metadata
fs_summary_merged=fs_summary.merge(fs_data[['sample','n_add']].drop_duplicates(),on='sample',how='left')

In [12]:
#Repeat for veraison data
vern_summary=uncertainty(ver_data_n,cal_curve,sample_col="sample",rf_col="rf")
vern_summary.head(5)

Unnamed: 0,sample,n_rep,rf_mean,conc_mean,s_x,CI_low,CI_high
0,V-15N-1,3,1.347076,0.304018,0.03279,0.236016,0.372019
1,V-15N-2,3,2.111994,0.479895,0.032561,0.412369,0.547422
2,V-15N-3,3,3.729934,0.851909,0.032406,0.784704,0.919114
3,V-15N-4,3,0.58665,0.129173,0.033114,0.060498,0.197847
4,V-15N-5,3,5.387625,1.233062,0.032714,1.165217,1.300906


In [13]:
vern_summary_merged=vern_summary.merge(ver_data_n[['sample','n_add']].drop_duplicates(),on='sample',how='left')

In [14]:
#Repeat for veraison data
veru_summary=uncertainty(ver_data_u,cal_curve,sample_col="sample",rf_col="rf")
veru_summary.head(5)

Unnamed: 0,sample,n_rep,rf_mean,conc_mean,s_x,CI_low,CI_high
0,V-15NU-1,3,1.018655,0.228504,0.032918,0.160236,0.296772
1,V-15NU-2,3,1.559,0.352745,0.032716,0.284896,0.420595
2,V-15NU-3,3,1.152898,0.25937,0.032863,0.191216,0.327525
3,V-15NU-4,3,1.298515,0.292852,0.032808,0.224813,0.360891
4,V-15NU-5,3,0.99592,0.223276,0.032928,0.154989,0.291564


In [15]:
veru_summary_merged=veru_summary.merge(ver_data_u[['sample','n_add']].drop_duplicates(),on='sample',how='left')

In [16]:
fs_summary_merged.head(5)

Unnamed: 0,sample,n_rep,rf_mean,conc_mean,s_x,CI_low,CI_high,n_add
0,FS-15N-1,3,1.894745,0.429943,0.032616,0.362302,0.497584,3.75
1,FS-15N-2,3,1.496906,0.338468,0.032737,0.270575,0.406361,3.75
2,FS-15N-3,3,3.035858,0.69232,0.032417,0.625091,0.759548,3.75
3,FS-15N-4,3,8.154848,1.86933,0.034238,1.798324,1.940336,3.75
4,FS-15N-5,3,3.852337,0.880053,0.032412,0.812834,0.947272,3.75


In [17]:
vern_summary_merged.head(5)

Unnamed: 0,sample,n_rep,rf_mean,conc_mean,s_x,CI_low,CI_high,n_add
0,V-15N-1,3,1.347076,0.304018,0.03279,0.236016,0.372019,3.75
1,V-15N-2,3,2.111994,0.479895,0.032561,0.412369,0.547422,3.75
2,V-15N-3,3,3.729934,0.851909,0.032406,0.784704,0.919114,3.75
3,V-15N-4,3,0.58665,0.129173,0.033114,0.060498,0.197847,3.75
4,V-15N-5,3,5.387625,1.233062,0.032714,1.165217,1.300906,3.75


In [18]:
veru_summary_merged.head(5)

Unnamed: 0,sample,n_rep,rf_mean,conc_mean,s_x,CI_low,CI_high,n_add
0,V-15NU-1,3,1.018655,0.228504,0.032918,0.160236,0.296772,3.75
1,V-15NU-2,3,1.559,0.352745,0.032716,0.284896,0.420595,3.75
2,V-15NU-3,3,1.152898,0.25937,0.032863,0.191216,0.327525,3.75
3,V-15NU-4,3,1.298515,0.292852,0.032808,0.224813,0.360891,3.75
4,V-15NU-5,3,0.99592,0.223276,0.032928,0.154989,0.291564,3.75


In [19]:
fs_summary_merged.to_csv("fs_summary.csv",index=False)
vern_summary_merged.to_csv("vern_summary.csv",index=False)
veru_summary_merged.to_csv("veru_summary.csv",index=False)