In [14]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from scipy import stats
from scipy.optimize import curve_fit
import sys
sys.path.append('/Users/lkbayne/Desktop/python/tools')
import chem_calcs as cc

In [15]:
#importing pFI files
files = ['DAY0.txt', 'DAY7.txt', 'DAY14.txt', 'DAY21.txt', 'DAY35.txt', 'DAY96.txt', 'DAY180.txt']

df = pd.read_csv(file, sep="\t")

#filtering for only the sample name, absorbances, and sequence name
filtered_df = df[['Name', 'AVE810_1050', 'AVE880_1050', 'Sequence Name']].copy()

#correcting different sample names- I label manually 
Si_fixes = {"A2": "A2_Si", "A3": "A3_Si", "A4": "A4_Si",
            "B2": "B2_Si", "B3": "B3_Si", "B4": "B4_Si",
            "C2": "C2_Si", "C3": "C3_Si", "C4": "C4_Si",
            "D1": "D1_Si", "D2": "D2_Si"}

P_fixes = {"A1_P": "A1_PO4", "A3_P": "A3_PO4", "A4_P": "A4_PO4",
           "B1_P": "B1_PO4", "B3_P": "B3_PO4", "B4_P": "B4_PO4",
           "C1_P": "C1_PO4", "C3_P": "C3_PO4", "C4-P": "C4_PO4",
           "D1_P": "D1_PO4", "D2_P": "D2_PO4"}

#combine
name_fixes = {**Si_fixes, **P_fixes}

#update df with correct sample names
filtered_df['Name'] = filtered_df['Name'].replace(name_fixes)

#sample names
Si_samples = ["A2_Si", "A3_Si", "A4_Si", "B2_Si", "B3_Si", "B4_Si", "C2_Si", "C3_Si", "C4_Si", "D1_Si", "D2_Si"]

P_samples = ["A1_PO4", "A3_PO4", "A4_PO4", "B1_PO4", "B3_PO4", "B4_PO4", "C1_PO4", "C3_PO4", "C4_PO4", "D1_PO4", "D2_PO4"]

#average absorbance of each sample
Si_averages = {}
for sample in Si_samples:
    avg_abs = filtered_df.loc[filtered_df['Name'] == sample, 'AVE810_1050'].mean()
    Si_averages[sample] = avg_abs

In [16]:
#calculating standard concentrations
PO4_conc = cc.conc_to_dil(4997.89, 40, None, 50*1000)
print(PO4_conc)
Si_conc = cc.conc_to_dil(10255, 220, None, 50*1000)
print(Si_conc)

3.9983120000000003
45.122


In [17]:
#calibration curve pFI dilution values
dilutions = np.array([0, (600-450)/600, (600-300)/600, (600-150)/600, (600-50)/600])

#each dilution 3 times to match triplicate calibration points
dil_series = np.repeat(dilutions, 3)

#calibration concentrations
Si_cali_concs = dil_series * Si_conc
PO4_cali_concs = dil_series * PO4_conc

In [23]:
#Calculating Si concentrations and uncertainty

#get absorbances
Si_cali_abs = filtered_df.loc[filtered_df['Sequence Name']=="AutoCalibration_Silicate", 'AVE810_1050'].to_numpy()
#average absorbances
avg_abs = filtered_df.loc[filtered_df['Name'].str.contains(sample, case=False), 'AVE810_1050'].mean()

#linear regression to get slope and intercept
fit = stats.linregress(Si_cali_concs, Si_cali_abs)
Si_slope = fit.slope
Si_yint = fit.intercept
Si_slope_err = fit.stderr
Si_yint_err = fit.intercept_stderr
print('Si Slope=', Si_slope)
print('Si y-intercept=', Si_yint)

#calculate uncertainty variables
Si_predicted_abs = Si_slope * np.array(Si_cali_concs) + Si_yint
Si_residuals = np.array(Si_cali_abs) - Si_predicted_abs
n_Si = len(Si_cali_concs)  #number of calibration points
Sxo_Si = np.sqrt(np.sum(Si_residuals**2) / (n_Si - 2))  #for linear n - 2
print('Sxo_Si =', Sxo_Si)

#concentration
results = []
for sample in Si_samples:
    avg_abs = filtered_df.loc[filtered_df['Name'] == sample, 'AVE810_1050'].mean()
    
    conc, u_combined = cc.calc_Si_conc_and_uncert(avg_abs, Si_yint, Si_slope, Sxo_Si, Si_slope_err, Si_yint_err)
    
    results.append({'Sample': sample, '[Si] (uM)': conc, 'Uncertainty': u_combined})

# Convert results to a DataFrame and print
results_df = pd.DataFrame(results)
print(results_df)

Si Slope= 0.025511181304965196
Si y-intercept= 0.0011247639593910952
Sxo_Si = 0.013902124672591347
   Sample  [Si] (uM)  Uncertainty
0   A2_Si  40.835306     0.712159
1   A3_Si  40.559898     0.710760
2   A4_Si  40.938986     0.712688
3   B2_Si  40.672633     0.711332
4   B3_Si  41.131294     0.713671
5   B4_Si  40.630991     0.711120
6   C2_Si  40.957423     0.712782
7   C3_Si  41.073934     0.713378
8   C4_Si  41.187884     0.713961
9   D1_Si  39.987051     0.707870
10  D2_Si  40.554175     0.710731
