In [20]:
import pandas as pd
import glob

def collect_salmon_quant(globpattern, output_prefix):
    for prefix in ['quant', 'quant.genes']:
        # generating templates for tpm and read count quant
        template = pd.read_csv(
            glob.glob(globpattern)[0], 
            sep = '\t'
        )
        quant_keys = ['TPM', 'NumReads']
        quant_dfs = {
            key: template[['Name']].copy() for key in quant_keys
        }

        # iterating over files and collecting quantifications
        for file in glob.glob(globpattern):
            res = pd.read_csv(
                file,
                sep = '\t'
            )
            for key in quant_keys:
                tmp = res.loc[:, ['Name', key]]
                tmp.rename(
                    columns = {key: file.split('/')[3]},
                    inplace = True
                )
                quant_dfs[key] = quant_dfs[key].merge(
                    tmp,
                    on = 'Name',
                    how = 'left'
                )

        # add metadata, reformat and output
        for key in quant_keys:
            sample_names = list(quant_dfs[key].columns[1:])
            colnames = list(template.columns[:3])
            tmp = quant_dfs[key].merge(
                template.loc[:, colnames].copy(),
                on = 'Name',
                how = 'left'
            )
            tmp.loc[:, colnames + sample_names].to_csv(
                f'../raw/{output_prefix}_salmon_{prefix}_{key}.tsv',
                sep = '\t',
                index = False
            )

In [21]:
collect_salmon_quant(
    f'../results/star_salmon/16*/{prefix}.sf',
    'scRNA'
)

In [22]:
collect_salmon_quant(
    f'../results/star_salmon/8*/{prefix}.sf',
    'RNA'
)