In [None]:
import pandas as pd
import re
import math

Generate sequence files

- Input LIMS number as range
- Input sample number as range
- Sample information
- Data file, involves date-time and concatenation
- Method file
- Data Path
- Method Path

Assumes only one solvent and extracted blank per batch!

Sort out vial numbers

In [None]:
def getLIMS():
    """create range of LIMS numbers;
    LIMS00..."""
    lims_reference = input('LIMS reference: ')
    num_samples = int(input('Number of samples: '))
    sample_list =  [lims_reference + "-" + str(i + 1) + " " + j for i in range(num_samples) for j in ('A', 'B')]
    return (num_samples, sample_list)

In [None]:
def sample_nums(num_samples):
    start = int(input('Initial sample number: '))
    sample_number_list = []
    for i in range(num_samples):
        sample_number_list.append(str(start + i) + '_A')
        sample_number_list.append(str(start + i) + '_B')
    return sample_number_list

In [None]:
def additions(lst):
    additions = ('Solvent Blank A', 'Solvent Blank B', 'Ext Blank A', 'Ext Blank B')
    for index, i in enumerate(additions):
        lst.insert(index, i)
    return lst

In [None]:
def add_t_factor(lst, num_samples):
    t_factor = input('T-factor calibration? [y/n] ')
    occurred = ''
    if 'y' in t_factor.lower():
        occurred = 'yes'
        for i in range(1,6):
            lst.insert(3 + i, 'T' + str(i))
    else:
        pass
    
    return (lst, occurred)

In [None]:
def add_aqc(num_samples, lst, t_factor):
    "Assumes one aqc run after batch has finished"
    num_aqcs = int(input('Number of AQCs: '))
    before_batch = int(num_aqcs/2)
    indices = []

    aqc = []
    for i in range(before_batch):
        for assay in ('A', 'B'):
            aqc.append(f'AQC{i + 1} {assay}')
            
    start = 4
    if 'y' in t_factor.lower():
        start = 9
    lst = lst[:start] + aqc + lst[start:]

    #add additional aqcs after batch has run
    for i in range(num_aqcs - before_batch):
        lst.append('AQC' + str(i + before_batch + 1) + ' A')
        lst.append('AQC' + str(i + before_batch + 1) + ' B')
    return lst, num_aqcs, before_batch

In [None]:
def add_flush(lst):
    lst.insert(0, 'flush1')
    lst.insert(1, 'flush2')
    lst.append('flush3')
    lst.append('flush4')
    return lst

In [None]:
def create_sample_list(lims, additions, t_factor, aqc, flush):
    #store each as separate lists?
    num_samples, list_one = lims()
    samples = sample_nums(num_samples)
    list_two = additions(list_one)
    list_three, t_factor = t_factor(list_two, num_samples)
    list_four, num_aqcs, before_batch = aqc(num_samples, list_three, t_factor)
    list_five = flush(list_four)
    return list_five, samples, num_aqcs, before_batch

In [None]:
def create_abbreviations(samples_list, sample_numbers):
    abb_list = []
    counter = 0
    for index, i in enumerate(samples_list):
        _samples = {
            'Solvent': 'SB' + '_' + list(i)[-1],
            'Ext': 'EB' + '_' + list(i)[-1],
            'AQC': '_'.join(i.split()),
            'flush': i,
            'LIMS': sample_numbers[counter]
        }    
        for sample, ret in _samples.items():
            if sample in i:
                abb = ret
            counter += 1
        elif re.search('T[0-9]', i):
            abb = i
        abb_list.append(abb)
        
    return abb_list

In [None]:
def sample_info(sample_lst):
    info = []
    counter = 1
    check = 0
    for i in sample_lst:
        if 'flush' in i:
            comment = 'iso-octane'
        elif 'Solvent' in i:
            comment = i
        elif 'Ext' in i:
            comment = i
        elif ('AQC' in i) and ('A' in list(i)[-1]):
            comment = 'AQC A'
        elif ('AQC' in i) and ('B' in list(i)[-1]):
            comment = 'AQC B'
        elif 'LIMS' in i:
            comment = 'Sample ' + str(counter)
            check += 1
            if check % 2 == 0:
                counter += 1
        elif re.search('T[0-9]', i):
            comment = f'T-factor {list(i)[-1]}'
        info.append(comment)
    return info

In [None]:
def data_file(abbreviations):
    import datetime
    file_lst = []
    for i in abbreviations:
        today = datetime.date.today()
        file_lst.append(f"{datetime.datetime.now():%Y%m%d}_{i}")
    return file_lst

def method_file(samples, method = 'MCPD Esters_Aug 2018_SIM.M'):
    return [method for i in range(len(samples))]
    
def data_path(samples, path = 'D:\Data\MCPD\\'):
    return [path for i in range(len(samples))]
     
def method_path(samples, path = 'D:\MassHunter\GCMS\\1\\METHODS\\'):
    return [path for i in range(len(samples))]

In [None]:
def vial_nums(samples, num_aqcs, num_aqcs_before_batch):
    vials = [0,1]
    aqc_vials = []
    for i in range(len(samples) - 2):
        num = i + 2
        vials.append(num)
        
    vial_dict = dict(zip(vials, samples))
    
    # Finding correct samples and changing vial number
    tmp = []
    for idx, sample in enumerate(samples):
        tmp.append([idx, sample])
    
    aqcs = []
    count = 0
    changed = 0
    for idx, sample in enumerate(tmp):
        if 'AQC' in sample[1]:
            count += 1
            if count > num_aqcs_before_batch * 2: #2 assays
                sample[0] = aqcs[changed][0]
                changed +=1
            aqcs.append(sample)
    
    combined = {}
    for i in range(num_aqcs_before_batch * 2):
        combined[aqcs[i][0]] = [aqcs[i][1]]

    for i in range(num_aqcs_before_batch * 2):
        combined[aqcs[num_aqcs_before_batch * 2 + i][0]].append(aqcs[num_aqcs_before_batch * 2 + i][1])

   
    vial_list = [[key, value] for key, value in vial_dict.items()]
    
# key step! Assigning the same vial number to aqc samples later in the run    
    for vial in vial_list:
        for key, value in combined.items():
            if vial[1] in value:
                vial[0] = key
    
    # flush vials
    vial_list[0][0] = 1
    vial_list[-2][0] = 1
    vial_list[-1][0] = 1
    
    vials = [i[0] for i in vial_list]
    sample_list = [i[1] for i in vial_list]
    return vials

In [None]:
def create_df(vials, names, abbreviations, info, data_file, method_file, data_path, method_path):
    #need to make sample number column, swapping LIMS references for sample numbers
    df = pd.DataFrame({
        'Vial': vials,
        'Sample Name': names,
        'Additional Information': info,
        'Data File': data_file,
        'Method File': method_file,
        'Data Path': data_path,
        'Method Path': method_path
    })
    return df

In [None]:
def print_result(df):
    print('Writing to file...')
    excel = input('Write to excel file? [y/n] ')
    if 'y' in excel.lower():
        filename = str(input('Filename: '))
        print_excel(df, filename)
        csv = input('Write to csv? [y/n] ')
        write_csv(df, filename)
    else:
        csv = input('Write to csv? [y/n] ')
        filename = str(input('Filename: '))
        print_csv(df, filename)

In [None]:
def print_excel(df, filename):
    df.to_excel(filename, index=False)

def print_csv(df, filename):
    df.to_csv(filename, index=False)

In [None]:
def change_df(df, sample_numbers):
    names = df.loc[df['Sample Name'].str.contains('LIMS'), 'Sample Name'] #= df.loc[df['Sample Name'].str.contains('LIMS'), 'Sample Name'] + " " + sample_numbers
    new_names = []
    for idx, i in enumerate(names):
        new_names.append(
            "".join(list(i)[:-1]) + "".join(list(sample_numbers[idx])[:-2]) + " " + list(i)[-1]
        )
    df.loc[df['Sample Name'].str.contains('LIMS'), 'Sample Name'] = new_names
    
    return df

In [None]:
def main():
    samples, sample_numbers, num_aqcs, aqc_before_batch = create_sample_list(getLIMS, additions,add_t_factor, add_aqc, add_flush)
    abbs = create_abbreviations(samples, sample_numbers)
    info = sample_info(samples)
    data_file_lst = data_file(abbs)
    method_file_lst = method_file(samples)
    method_path_lst = method_path(samples)
    data_path_lst = data_path(samples)
    vials = vial_nums(samples, num_aqcs, aqc_before_batch)
    df = create_df(vials, samples, abbs, info, data_file_lst, method_file_lst, data_path_lst, method_path_lst)
    df = change_df(df, sample_numbers)
    print_result(df)

main()