In [1]:
import pandas as pd
import numpy as np

In [59]:
PPM = 1e-6
def find_isotope_(a,dif=4.015,rthreshold=20*PPM,rt_threshold=.05,abs_threshold=.1):
    """
    找出a['M/Z']中相差dif的记录
    
    dif:同位素质量差
    rthreshold: 相对dif误差阈值
    abs_threshold：绝对dif误差阈值
    如果rthreshold为None 则使用绝对dif误差阈值
    """
    
    abs_err = abs(abs(a['M/Z'].values-a['M/Z'].values[:,np.newaxis])-dif)
    mask_rt = abs(a['RT'].values-a['RT'].values[:,np.newaxis])<rt_threshold
    np.fill_diagonal(mask_rt,False)
    if rthreshold is not None:
        mask_mz = abs_err<a['M/Z'].values*rthreshold
        mask_ = mask_mz&mask_rt
        mask = np.any(mask_,axis=1)
    else:
        mask_mz = abs_err<abs_threshold
        mask_ = mask_mz&mask_rt
        mask = np.any(mask_,axis=1)
    return a.loc[mask]

def find_isotope(input_,output_,dif=4.015,rthreshold=20*PPM,rt_threshold=.05):
    """
    input_:输入文件名， e.g., '数据预处理.xlsx'
    output: put_'结果-数据预处理.xlsx'
    dif: 同位素M/Z差
    rthreshold：相对M/Z误差阈值
    rt_threshold：绝对保留时间误差阈值
    """
    df_ = pd.read_excel(input_).loc[:,['M/Z','RT']]
    result = find_isotope_(df_,dif=dif,rthreshold=rthreshold,rt_threshold=rt_threshold)
    result.sort_values(['M/Z','RT']).reset_index(drop=True).to_excel(output_)
    return result

In [60]:
from find_isotope import find_isotope,PPM
input_ =  '数据预处理.xlsx'
output_ = '结果-数据预处理.xlsx'
dif=4.015
mz_rthreshold=20*PPM
rt_threshold=.05
find_isotope(input_,output_,dif=dif,rthreshold=mz_rthreshold,rt_threshold=.05)

Unnamed: 0,M/Z,RT
864,130.050,12.06
1089,134.064,12.03
1247,138.074,1.72
1321,140.068,8.58
1377,141.071,8.59
1409,142.090,1.72
1482,144.081,8.57
1560,145.084,8.58
2015,163.004,1.74
2071,167.019,1.74


- 使用相对误差阈值匹配同位素