In [28]:
import pandas as pd
import numpy as np

In [11]:
df = pd.read_csv('GC.txt', sep='\t')

In [12]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 118 entries, 0 to 117
Data columns (total 20 columns):
DOY                   1 non-null float64
tray position         118 non-null int64
chamber               90 non-null object
sample interval       90 non-null float64
sample id             118 non-null object
sample date           118 non-null object
sample time           118 non-null object
process time          118 non-null object
CH4                   118 non-null object
CH4 retention time    118 non-null float64
CH4 area              118 non-null float64
CO2                   118 non-null object
CO2 retention time    118 non-null float64
CO2 area              118 non-null float64
N2O                   118 non-null object
N2O retention time    118 non-null float64
N2O area              118 non-null float64
CH4 cal ppm           2 non-null object
CO2 cal ppm           2 non-null object
N2O cal ppm           2 non-null object
dtypes: float64(8), int64(1), object(11)
memory usage: 18.

In [13]:
df.head(10)

Unnamed: 0,DOY,tray position,chamber,sample interval,sample id,sample date,sample time,process time,CH4,CH4 retention time,CH4 area,CO2,CO2 retention time,CO2 area,N2O,N2O retention time,N2O area,CH4 cal ppm,CO2 cal ppm,N2O cal ppm
0,300.0,1,,,STD,04.11.2016,23:22:33,00:05:22,CH4,2.133,39.219,CO2,3.663,3800.9564,N2O,3.393,723.1031,4.183+- 0.084 ppm,401.7+-8 ppm,403+-40 ppm
1,,2,,,STD,04.11.2016,23:27:55,00:05:21,CH4,2.14,39.3738,CO2,3.66,3798.167,N2O,3.393,721.6689,4.183,401.7,403
2,,3,,,STD,04.11.2016,23:33:16,00:05:22,CH4,2.14,38.693,CO2,3.686,3790.841,N2O,3.403,721.6617,,,
3,,4,,,STD,04.11.2016,23:38:38,00:05:22,CH4,2.14,38.6434,CO2,3.676,3798.7983,N2O,3.403,721.7388,,,
4,,5,B1,0.0,B1-300-0,04.11.2016,23:44:00,00:05:21,CH4,2.146,19.7896,CO2,3.68,5809.8816,N2O,3.41,571.8044,,,
5,,6,B1,5.0,B1-300-5,04.11.2016,23:49:21,00:05:22,CH4,2.14,20.3922,CO2,3.67,6533.1388,N2O,3.4,589.2644,,,
6,,7,B1,10.0,B1-300-10,04.11.2016,23:54:43,###############,CH4,2.143,21.1437,CO2,3.676,7290.8798,N2O,3.403,609.9897,,,
7,,8,B2,0.0,B2-300-0,05.11.2016,00:00:05,00:05:21,CH4,2.143,19.502,CO2,3.673,5585.4023,N2O,3.403,572.3217,,,
8,,9,B2,5.0,B2-300-5,05.11.2016,00:05:26,00:05:21,CH4,2.146,20.429,CO2,3.693,5756.92,N2O,3.41,583.078,,,
9,,10,B2,10.0,B2-300-10,05.11.2016,00:10:47,00:05:22,CH4,2.146,19.7458,CO2,3.683,5613.1656,N2O,3.406,590.4564,,,


In [34]:
#get the calibration value from the dataframe
co2_cal_ppm = float(df['CO2 cal ppm'][1])

In [48]:
def calculate_standard_averages(std_values_1, std_values_2):
    combined_list = std_values_1 + std_values_2
    calculated_mean = np.mean(combined_list)
    return calculated_mean

def calculate_concentrations(measurement_values, co2_cal_ppm, std_average):
    # this would be an ideal place for a list comprehension
    calculated_concentrations = []
    for measurement in measurement_values:
        calculation_result = co2_cal_ppm / std_average * measurement
        calculated_concentrations.append(calculation_result)
    return calculated_concentrations
    
def process_block(std_values_1, std_values_2, 
                  measurement_values, co2_cal_ppm, sample_ids):
    
    std_average = calculate_standard_averages(std_values_1, std_values_2)
    calc_concentrations = calculate_concentrations(measurement_values, 
                                 co2_cal_ppm, std_average)
    for concentration, sample_id in zip(calc_concentrations, sample_ids):
        print(sample_id, concentration)

In [49]:
state = 'start'

std_values_1 = []
std_values_2 = []
measurement_values = []
sample_ids = []

for index, row in df.iterrows():
    sample_id = row['sample id']
    if state == 'start' and sample_id == 'STD':  
        state = 'in_std_1'    
    elif state == 'in_std_1' and sample_id != 'STD':
        state = 'measurements'        
    elif state == 'measurements' and sample_id == 'STD':
        state = 'in_std_2'        
    elif state == 'in_std_2' and sample_id != 'STD':
        state = 'measurements'
        process_block(std_values_1, std_values_2, 
                      measurement_values, co2_cal_ppm, sample_ids)   
        std_values_1 = std_values_2
        std_values_2 = []
        measurement_values = []
        sample_ids = []
                
    if state == 'in_std_1':
        std_values_1.append(row['CO2 area'])
    elif state == 'measurements':
        measurement_values.append(row['CO2 area'])
        sample_ids.append(sample_id)
    elif state == 'in_std_2':
        std_values_2.append(row['CO2 area'])
        
process_block(std_values_1, std_values_2, 
              measurement_values, co2_cal_ppm, sample_ids)   

B1-300-0 616.244294588
B1-300-5 692.958959998
B1-300-10 773.331263631
B2-300-0 592.434155689
B2-300-5 610.626747436
B2-300-10 595.378961867
B3-300-0 581.407368931
B3-300-5 594.328174988
B3-300-10 594.343353362
B4-300-0 566.296188732
B4-300-5 586.842563452
B4-300-10 599.106456674
B5-300-0 602.304946417
B5-300-5 606.929503309
B5-300-10 633.300544761
B6-300-0 984.258072048
B6-300-5 955.945485511
B6-300-10 453.745681969
B7-300-0 584.826033626
B7-300-5 589.9716525
B7-300-10 586.186363285
B8-300-0 591.985426874
B8-300-5 608.618756766
B8-300-10 587.975258448
B9-300-0 614.249256795
B9-300-5 606.578890611
B9-300-10 605.651535435
B10-300-0 568.473855996
B10-300-5 577.469737947
B10-300-10 573.220771144
B11-300-0 616.53815148
B11-300-5 651.836894501
B11-300-10 694.409154336
B12-300-0 601.633990555
B12-300-5 599.482664772
B12-300-10 596.465925557
B13-300-0 601.204993685
B13-300-5 594.890431776
B13-300-10 604.021977762
B14-300-0 588.967383957
B14-300-5 591.647899116
B14-300-10 592.776823639
B15-300-

In [39]:
std_average = 3787.182225
measurement_value = 5809.8816

print(std_average, co2_cal_ppm, measurement_value)



3787.182225 401.7 5809.8816
