In [1]:
import pandas as pd
import numpy as np
from numpy import array
import matplotlib.pyplot as plt
import math
import openpyxl
from statistics import * 

In [5]:
#temporary for convenience. Copy external standard data with row and column headings to clipboard i.e. copy L3:P24.
#then run this cell to get peak area and concentration data in convenient list output to copy and paste into user 
#data entry cell below.
clipboard = pd.read_clipboard()
manual = np.array(clipboard).tolist()
rowlabels = clipboard.index.values
for row in range(len(manual)):
    try:
        x = float(rowlabels[row])
    except:
        print(rowlabels[row] + "_area: ", end = "")
        print(manual[row])
        print("")
    else:
        print(rowlabels[row+1] + "_conc: [" + rowlabels[row] + ", ", end = "")
        iter = 0
        for i in range(len(manual[row])-2):
            print(str(manual[row][i]) + ", ", end = "")
            iter = iter + 1
        print(str(manual[row][iter]) + "]")

C7_area: [1582248.0, 486073.0, 65713.0, 15283.0]

C8_area: [1632886.0, 342093.0, 153473.0, 24268.0]

C9_area: [1808591.0, 545347.0, 74484.0, 22175.0]

C10_conc: [872.8, 436.4, 87.28, 21.82]
C10_area: [2067861.0, 451230.0, 171337.0, 26421.0]

C11_area: [2066882.0, 580299.0, 78273.0, 20075.0]

C12_conc: [843.2, 421.6, 84.32, 21.08]
C12_area: [2170879.0, 497569.0, 159667.0, 27508.0]

C13_area: [2369550.0, 641935.0, 84914.0, 22909.0]

C14_conc: [816, 408.0, 81.6, 20.4]
C14_area: [2197805.0, 532182.0, 139582.0, 28508.0]

C15_area: [2552762.0, 683592.0, 90513.0, 25262.0]

C16_conc: [1361.6, 680.8, 136.16, 34.04]
C16_area: [4017150.0, 962416.0, 197837.0, 47836.0]

C17_area: [2048413.0, 545963.0, 76858.0, 23865.0]



In [10]:
#User enters data here

#Enter external standard peak areas. Replace individual default_conc if needed
default_conc = [1000, 500, 100, 25]
C7_area, C7_conc   = [1582248, 486073, 65713, 15283], default_conc
C8_area, C8_conc   = [1632886, 342093, 153473, 24268], default_conc
C9_area, C9_conc   = [1808591, 545347, 74484, 22175], default_conc
C10_area, C10_conc = [2067861, 451230, 171337, 26421], [872.8, 436.4, 87.28, 21.82]
C11_area, C11_conc = [2066882, 580299, 78273, 20075], default_conc
C12_area, C12_conc = [2170879, 497569, 159667, 27508], [843.2, 421.6, 84.32, 21.08]
C13_area, C13_conc = [2369550, 641935, 84914, 22909], default_conc
C14_area, C14_conc = [2197805, 532182, 139582, 28508], [816, 408, 81.6, 20.4]
C15_area, C15_conc = [2552762, 683592, 90513, 25262], default_conc
C16_area, C16_conc = [4017150, 962416, 197837, 47836], [1361.6, 680.8, 136.16, 34.04]
C17_area, C17_conc = [2048413, 545963, 76858, 23865], default_conc



#Dictionary containing linear slope fit to peak area vs concentration. Add to dictionary if needed.
area_to_conc_scale = {'C7'  : np.polyfit(C7_conc,C7_area,1)[0], 
                      'C8'  : np.polyfit(C8_conc,C8_area,1)[0],
                      'C9'  : np.polyfit(C9_conc,C9_area,1)[0],
                      'C10' : np.polyfit(C10_conc,C10_area,1)[0],
                      'C11' : np.polyfit(C11_conc,C11_area,1)[0],
                      'C12' : np.polyfit(C12_conc,C12_area,1)[0],
                      'C13' : np.polyfit(C13_conc,C13_area,1)[0],
                      'C14' : np.polyfit(C14_conc,C14_area,1)[0],
                      'C15' : np.polyfit(C15_conc,C15_area,1)[0],
                      'C16' : np.polyfit(C16_conc,C16_area,1)[0],
                      'C17' : np.polyfit(C17_conc,C17_area,1)[0]}

#Enter internal standard concentrations

internalConc = {'C7' : 100, 'C9' : 100, 'C11' : 100, 'C13' : 100, 'C15' : 20, 'C17' : 20}

file = 'GCData (4).xlsx' #file name
sheet = 'Quantification w IS,ES' #sheet with peak area and chain identification data
area_col = 3 #column number of peak area data (1st column is column 0, 2nd column is column 1, etc.)
chain_col = 5 #column number of chain identification data (1st column is column 0, 2nd column is column 1, etc.)
scale = area_to_conc_scale #dictionary of scale factors to turn area data into concentration data
int_sdt_conc = internalConc #dictionary of internal standard concentrations
saveAs = 'GCData (4) w Conc Data.xlsx' #name to save new file

#New file will be created with name given by the saveAs variable with a new worksheet called "Corrected Concentration"
getData(file, sheet, area_col, chain_col, scale, int_sdt_conc, saveAs)

Unnamed: 0,H204A-1,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Uncorrected Concentration,Scaling Factor,Averaged Scaling Factor,Corrected Concentration
0,2.41,2.353,2.572,153579.0,51369.0,C7,96.1542285198,0.961542285198,,
1,3.054,2.978,3.113,2380.0,551.0,C8,1.51174136082,,0.904967890871,1.67049171144
2,3.995,3.925,4.121,154529.0,57008.0,C9,84.8393496544,0.848393496544,,
3,5.26,5.213,5.358,1559.0,598.0,C10,0.676621862616,,0.79640402673,0.849596234959
4,6.867,6.772,6.963,154964.0,55544.0,C11,74.4414556916,0.744414556916,,
5,9.142,9.026,9.245,2559.0,699.0,C12,1.01523221107,,0.729588541951,1.39151337047
6,10.882,10.795,10.963,170500.0,85412.0,C13,71.4762526986,0.714762526986,,
7,12.051,12.031,12.134,4892.0,1734.0,C14,1.84210867103,,1.22844606695,1.49954379
8,12.871,12.829,12.932,89501.0,44651.0,C15,34.8425921381,1.74212960691,,
9,13.575,13.525,13.613,27807.0,16360.0,C16,9.48639901913,,1.36663184432,6.94144444138


In [9]:
# Calculates and creates a sheet with corrected concentration data
# file         - file name
# sheet        - sheet with data organized by sample with peak areas and chain lengths identified already
# area_col     - column number of peak area data (1st column is column 0, 2nd column is column 1, etc.)
# chain_col    - column number of chain indentification data (1st column is column 0, 2nd column is column 1, etc.)
# scale        - dictionary of scale factors to turn area data into concentration data
# int_std_conc - dictionary of internal standard concentrations

def getData (file, sheetname, area_col, chain_col, scale, int_std_conc, saveAs):
    col = [area_col, chain_col]
    wb = openpyxl.load_workbook(file)
    sheet = wb[sheetname]
    orig = pd.read_excel(file, sheet_name=sheetname)
    df = pd.read_excel(file, sheet_name=sheetname, usecols = col)
    
    #add 4 empty columns
    df['Pfleger'] = ""
    df['Lab'] = ""
    df['For'] = ""
    df['Life'] = ""
    
    dfList = df.values.tolist()
    
    # Calculate
    for row in range(len(dfList)):
        chain = dfList[row][1]
        area = dfList[row][0]
        #Calculate Uncorrected Concentration
        if math.isnan(area):
            if chain != chain:
                dfList[row][2] = ""
            else:
                dfList[row][2] = 0
        else:
            dfList[row][2] = dfList[row][0]/scale.get(chain)
        #Calculate Scaling Factor
        if dfList[row][2] != "" and int(chain[len(chain)-1])%2 == 1:
            dfList[row][3] = dfList[row][2]/int_std_conc[chain]
    
    #Calculate Averaged Scaling Factors and Corrected Concentrations
    for row in range(len(dfList)):
        chain = dfList[row][1]
        area = dfList[row][0]
        if dfList[row][2] != "" and int(chain[len(chain)-1])%2 == 0:
            dfList[row][4] = mean([dfList[row-1][3],dfList[row+1][3]])
            dfList[row][5] = dfList[row][2]/dfList[row][4]
    
    #modify data type to allow python to write to excel
    dfList = np.array(dfList)
    concDataDict = {'Uncorrected Concentration' :dfList[:,2].tolist(), 
                    'Scaling Factor':dfList[:,3].tolist(), 
                    'Averaged Scaling Factor':dfList[:,4].tolist(), 
                    'Corrected Concentration':dfList[:,5].tolist()}
    order = ['Uncorrected Concentration','Scaling Factor','Averaged Scaling Factor','Corrected Concentration']
    concData = pd.DataFrame(concDataDict, columns = order)
    excelData = pd.concat([orig, concData], axis = 1)
    
    #write to the excel file
    writer = pd.ExcelWriter(file, engine = 'openpyxl')
    writer.book = wb
    excelData.to_excel(writer, sheet_name = "Corrected Concentration", index = False, startrow = 0, startcol = 0)
    wb.save(filename = saveAs)
    return excelData