In [1]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import os,sys,glob,time,warnings
import fcsparser
import pandas as pd
import numpy as np

In [2]:
## change working directory to the folder with FCS file and excel files containing antibody infomation
cwd = os.getcwd()
os.chdir(cwd+'/sample') ## change the workin gdirectory to the 'sample' folder

In [3]:
## readin FCS files
## fcs_namelist for the file names, fcs_data is the data matrix inside the fcs file, fcs_meta is the meta info inside the fcs file
fcs_namelist, fcs_data, fcs_meta = ([] for i in range(3))

for file in os.listdir('.'):
    if file.endswith('.fcs'):
        t_meta, t_data = fcsparser.parse(file, meta_data_only=False, reformat_meta=True)
        fcs_meta.append(t_meta)
        fcs_data.append(t_data)
        fcs_namelist.append(file[:-4]) #share the same index, used for naming output files
try:
    del t_data, t_meta
except:
    warnings.warn('No valid fcs files!')

In [4]:
fcs_namelist ## all the fcs files in the folder

['c06_20201204_organelle_mix_10E3_gated_concatenated',
 'c05_20201204_organelle_mix_10E3_gated_concatenated',
 'c08_20201204_organelle_mix_10E3_gated_concatenated',
 'c07_20201204_organelle_mix_10E3_gated_concatenated',
 'c09_20201204_organelle_mix_10E3_gated_concatenated',
 'c10_20201204_organelle_mix_10E3_gated_concatenated']

In [5]:
def do_ab_corrections(Excel, fcs_file_name, fcs_namelist, fcs_data):
    """
    return the corrected data matrix
    :param Excel: name of the excel file
    :param fcs_file_name: name of the FCS file for correction, no '.fcs' suffix
    :return: data matrix of the corrected data
    """
    excel = pd.read_excel(Excel, index_col = 0) 
    fcs = fcs_data[fcs_namelist.index(fcs_file_name)]
    
    channel_vs_antibody = dict(zip(excel.Channel.dropna(), excel.Protein.dropna()))
    antibody_vs_channel = dict(zip(excel.Protein.dropna(), excel.Channel.dropna()))
    channel_vs_host = dict(zip(excel.dropna().Channel, excel.dropna().Host))
    channel_vs_ratio = dict(zip(excel.Channel, excel.Ratio))
    id_channel = list(channel_vs_antibody.keys())

    target_fcs = fcs.loc[:,id_channel] #only select target channels 

    for i in channel_vs_host:
        p = target_fcs.loc[:,i]
        q = target_fcs.loc[:,antibody_vs_channel[channel_vs_host[i]+'_ISO']] #If you make changes to the colum name of the excel, do make changes here as well
        cp = channel_vs_ratio[i]
        cq = channel_vs_ratio[antibody_vs_channel[channel_vs_host[i]+'_ISO']]
        target_fcs.loc[:,i] = (p/cp-q/cq)*cp
    # remove channels with antibody isotypes
    for i in excel.Channel[excel.Host.isna()]:
        target_fcs =target_fcs.drop([i], axis = 1)
        
    target_fcs = target_fcs.clip(0) ## if you want to change all negative values potentially generated due to the substraction, keep this, or just dump this
    
    return target_fcs

In [6]:
target_fcs = do_ab_corrections(Excel = 'YLAP001_Ab_info_20201212.xlsx', fcs_file_name = 'c06_20201204_organelle_mix_10E3_gated_concatenated', fcs_namelist = fcs_namelist, fcs_data = fcs_data)
target_fcs

Unnamed: 0,Nd142Di,Nd143Di,Sm154Di,Gd155Di,Gd156Di,Gd160Di,Dy162Di,Yb172Di
0,0.0,0.000,2.481000,0.000,7.596001,7.901000,0.000,0.000
1,0.0,0.000,0.000000,0.000,1.266000,2.634000,1.342,0.000
2,0.0,0.000,2.481000,0.000,0.000000,0.000000,0.000,0.000
3,0.0,1.143,9.924000,1.253,2.532000,118.522011,0.000,0.000
4,0.0,0.000,0.000000,0.000,0.255222,0.000000,0.000,0.000
...,...,...,...,...,...,...,...,...
7237,0.0,3.705,4.007000,0.000,0.000000,1.421000,1.450,0.000
7238,0.0,0.000,0.000000,0.000,0.000000,0.000000,1.450,0.000
7239,0.0,0.000,1.336000,0.000,0.000000,0.000000,4.349,0.000
7240,0.0,3.705,1.612701,0.000,0.268990,13.043314,4.349,0.000


In [7]:
## write to csv file and then do analysis in either FlowJO or Cytobank or other platforms
target_file_name = 'test_20210119'
target_fcs.to_csv((cwd +'/'+ target_file_name +'.csv'))