In [14]:
import os
import xlrd
import pprint
import sys
import copy
pp = pprint.PrettyPrinter()

In [2]:
home_folder = '/Users/simon'

In [4]:
sys.path.append(os.path.join(home_folder,'git/lipid_prototype'))
sys.path.append(os.path.join(home_folder,'git/mass-spec-utils'))
sys.path.append(os.path.join(home_folder,'git/pymzm'))

In [5]:
excel_folder = os.path.join(home_folder,'git/lipid_prototype/excel_input_files')
data_path = os.path.join(home_folder,'data/lipid_files/lipid_files/Condition_2/Pos')

In [6]:
PERMITTED_HEADS = {'name','formula','ion',
                   'ion_mz (optional)',
                   'mz_tolerance_ppm (optional)',
                   'ion rt (seconds)',
                   'ion rt tol (seconds)',
                   'files to exclude'}

DEFAULT_PARAMS = {'mz_tolerance': 0.01,
                  'mz_tolerance_units': 'abs', # or ppm
                  'scan_delta': 2,
                  'max_iso_n': 5}

In [18]:
def read_heads(sheet_object,permitted_vals = PERMITTED_HEADS):
    col_pos = 0
    row_pos = 0
    head_dict = {}
    for col_pos in range(sheet_object.ncols):
        heads_val = sheet_object.cell_value(row_pos,col_pos)
        if heads_val in permitted_vals:
            head_dict[sheet_object.cell_value(row_pos,col_pos)] = col_pos
        else:
            print("Warning: head {} not permitted".format(heads_val))
    return head_dict

def load_lipids(sheet_object,permitted_vals = PERMITTED_HEADS):
    head_dict = read_heads(sheet_object,permitted_vals = permitted_vals)
    lipids = {}
    for row_pos in range(sheet_object.nrows):
        if row_pos == 0:
            continue # skip the headings row
        lipid_name = sheet_object.cell_value(row_pos,head_dict['name'])
        lipids[lipid_name] = {}
        for key,col in head_dict.items():
            data_val = sheet_object.cell_value(row_pos,col)
            lipids[lipid_name][key] = data_val
        if not 'n_iso' in lipids[lipid_name]:
            lipids[lipid_name]['n_iso'] = 5
    return lipids
        
    
def load_files(sheet_object):
    assert sheet_object.cell_value(0,0) == 'filename', print("First column in files sheet must be headed filename")
    assert sheet_object.cell_value(0,1) == 'timepoint', print("Second column in files sheet must be headed timepoint")
    time_points = []
    for row_pos in range(1,sheet_object.nrows):
        file_name = sheet_object.cell_value(row_pos,0)
        timepoint = sheet_object.cell_value(row_pos,1)
        time_points.append((file_name,float(timepoint)))
    time_points.sort(key = lambda x: x[1])
    return time_points

def load_data(workbook_object,permitted_vals = PERMITTED_HEADS):
    lipid_sheet = workbook_object.sheet_by_name('lipids')
    lipids = load_lipids(lipid_sheet,permitted_vals = permitted_vals)
    files_sheet = workbook_object.sheet_by_name('files')
    time_points = load_files(files_sheet)
    
    parameters = copy.deepcopy(DEFAULT_PARAMS)
    try:
        parameter_sheet= workbook_object.sheet_by_name('parameters')
        
    except xlrd.biffh.XLRDError as e:
        # no sheet called parameters, so use defaults
        pass
    return lipids,time_points

In [20]:
excel_file = os.path.join(excel_folder,'Test_input_Condition1Neg.xlsx')
wb = xlrd.open_workbook(excel_file) 

lipids,time_points = load_data(wb)
pp.pprint(lipids)
pp.pprint(time_points)



KeyError: 'name'

In [100]:
mzml_file_objs = {}

from ms2_matching import MZMLFile
for filename,time in time_points:
    if not '.mzML' in filename:
        load_filename = filename + '.mzML'
    else:
        load_filename = filename
    full_file = os.path.join(data_path,load_filename)
    mzml_file_objs[filename] = MZMLFile(full_file)

Loaded 1968 scans
Loaded 1968 scans
Loaded 1967 scans
Loaded 1967 scans
Loaded 1968 scans
Loaded 1967 scans
Loaded 1968 scans


In [103]:
%load_ext autoreload
%autoreload 2
from lipid_kinetics import compute_lipid_kinetics,create_plot
output_dict = {}
for lipid in lipids:
#     if lipid in output_dict:
#         continue
    print(lipid,lipids[lipid]['ion'])
    rt_mean = lipids[lipid]['ion rt (seconds)']
    rt_tol = lipids[lipid]['ion rt tol (seconds)']
    lipids[lipid]['rt_range'] = [rt_mean - rt_tol,rt_mean+rt_tol]
    lipids[lipid]['adduct_type'] = lipids[lipid]['ion']
    output_dict[lipid] = compute_lipid_kinetics(lipid,lipids[lipid],time_points,mzml_file_objs)
    create_plot(lipid,output_dict[lipid])




The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Lyso PC 20:1 [M+H]+
Lyso PC 20:1 3
Lyso PC 20:1 0 4
Lyso PC 20:1 0 5
Lyso PC 20:1 1 4
Lyso PC 20:1 1 5
Lyso PC 20:1 2 4
Lyso PC 20:1 2 5
Lyso PC 20:1 3 4
Lyso PC 20:1 3 5
Lyso PC 20:1 4 4
Lyso PC 20:1 4 5
Lyso PC 20:1 5 4
Lyso PC 20:1 5 5
	Lyso PC 20:1, ignoring 120_d20_pos_2
LYSO pe18:1 [M+H]+
	LYSO pe18:1, ignoring 120_d20_pos_2
Lyso pe 20:4 [M+H]+
Lyso pe 20:4 1
Lyso pe 20:4 0 2
Lyso pe 20:4 0 3
Lyso pe 20:4 0 4
Lyso pe 20:4 0 5
Lyso pe 20:4 1 2
Lyso pe 20:4 1 3
Lyso pe 20:4 1 4
Lyso pe 20:4 1 5
Lyso pe 20:4 2 2
Lyso pe 20:4 2 3
Lyso pe 20:4 2 4
Lyso pe 20:4 2 5
Lyso pe 20:4 3 2
Lyso pe 20:4 3 3
Lyso pe 20:4 3 4
Lyso pe 20:4 3 5
Lyso pe 20:4 4 2
Lyso pe 20:4 4 3
Lyso pe 20:4 4 4
Lyso pe 20:4 4 5
Lyso pe 20:4 5 2
Lyso pe 20:4 5 3
Lyso pe 20:4 5 4
Lyso pe 20:4 5 5
	Lyso pe 20:4, ignoring 120_d20_pos_2
38.1 pc [M+H]+
	38.1 pc, ignoring 120_d20_pos_2
38.2 pc [M+H]+
	38.2 pc, ignoring 120_d20_pos_2
40

In [104]:
from lipid_kinetics import create_xlsx_output
xlsx_output_name = 'condition_2_pos.xlsx'
create_xlsx_output(output_dict,output_filename = xlsx_output_name)

Writing:  temp_0.png
Writing:  temp_1.png
Writing:  temp_2.png
Writing:  temp_3.png
Writing:  temp_4.png
Writing:  temp_5.png
Writing:  temp_6.png
Writing:  temp_7.png
Writing:  temp_8.png
Writing:  temp_9.png
Writing:  temp_10.png
Writing:  temp_11.png
Writing:  temp_12.png
Writing:  temp_13.png
Writing:  temp_14.png
Writing:  temp_15.png
Writing:  temp_16.png
Writing:  temp_17.png
Writing:  temp_18.png
Writing:  temp_19.png
Writing:  temp_20.png
Writing:  temp_21.png
Writing:  temp_22.png
Writing:  temp_23.png
Writing:  temp_24.png
Writing:  temp_25.png
Writing:  temp_26.png
Writing:  temp_27.png
Writing:  temp_28.png
Writing:  temp_29.png
Writing:  temp_30.png
Writing:  temp_31.png
