In [23]:
from Source import *
import pandas as pd
import os
star_genes = ['CYP2D6', 'CYP2C19', 'HLA-A', 'HLA-B', 'CYP2C9', 'CYP3A4', 'CYP3A5', 'CYP1A2', 'UGT1A4', 'UGT2B15', 'CYP2B6']


director = './validation_panels/panels/PGx-NP_FSA file/'

samples = os.listdir(director)
path_samples = [f'{director}{sample}/' for sample in samples]

# load defintion table
definition = Definition()

# reference size
liz120 = [15, 20, 25, 35, 50, 62, 80, 110, 120]
def call_by_sample(sample, qc_plot =True, intensity_plot =True, update_excel=True):
    sample_folder = f'{director}{sample}/'
    peak_file = f'{sample_folder}{sample}.xlsx'
    print(peak_file)
    s_peak_table = pd.read_excel(peak_file, sheet_name='peak_table')
    # print(s_peak_table.columns)
    target_markers = generate_markers(s_peak_table, new_form=True)
    
    fsa_files = [f'{sample_folder}{fsa}' for fsa in os.listdir(sample_folder) if "fsa" in fsa]

    genotype, support_data = call_from_fsa(sorted(fsa_files), target_markers, liz120, val_plot=False)
    
    s_data = genotype.marker_table(sample)
    

    # genes = list(dict.fromkeys(s_peak_table['gene'].tolist()))
    genes = s_peak_table['gene'].unique()
    # print(genes)
    geno = pd.DataFrame()
    for gene in genes:
        
        if gene in star_genes:
            targets = definition.get_target_marker(gene)
            s_pattern = generate_sample_pattern(s_data, targets['marker_name'])
            result = call_star_allele(s_pattern, targets['pattern'])
            tmp = pd.DataFrame({'sample': sample, 'gene': gene, 
                                'marker': '', 'label': '',
                                'genotype':result}, index=[0])
        else:
            cols = ['sample', 'gene', 'marker', 'label', 'genotype']
            tmp = s_data[s_data['gene'] == gene][cols]
        
        geno = pd.concat([geno, tmp])
    
    print(geno)
     

    if update_excel:
        # book = load_workbook(peak_file, keep_vba=True)
        with pd.ExcelWriter(peak_file, engine='openpyxl', mode='a', if_sheet_exists='replace') as writer:
            # writer.book = book
            # writer.sheets = {n: book[n] for n in book.sheetnames}
            genotype.allele_table(called_filter=False).to_excel(writer, sheet_name='allele_table', index=False)
            genotype.marker_table().to_excel(writer, sheet_name='marker_table', index=False)
            geno.to_excel(writer, sheet_name='genotype_result', index=False)
            writer.save()
        # book.save(peak_file)
        # book.close()
        
    # QC plots
    if qc_plot:
        qc_figs = genotype.plotting_qc(sample, showfig=False)
        for qc_fig in qc_figs:
            qc_figs.get(qc_fig).savefig(f'{sample_folder}{sample}-{qc_fig}-QC.png', dpi =150)

    # plot raw intensity
    if intensity_plot:
        figs = plot_intensity(genotype, support_data, base_range=(20,85))
        html_file = f'{sample_folder}{sample}_raw_intensity.html'
        if os.path.exists(html_file):
            os.remove(html_file)
        
        with open(html_file, 'a') as f:
            for fig in figs:
                f.write(fig.to_html(full_html=False, include_plotlyjs='cdn'))
                fig.show()

    return genotype, support_data

In [29]:
x,y = call_by_sample('20220526-630049')

./validation_panels/panels/PGx-NP_FSA file/20220526-630049/20220526-630049.xlsx
              sample     gene     marker            label       genotype
0    20220526-630049   CYP2D6                                  *10B/*10B
34   20220526-630049    OPRM1  OPRM1_001  OPRM1 rs1799971             AA
0    20220526-630049    HLA-A                                           
0    20220526-630049    HLA-B                              *15:02/*15:02
0    20220526-630049   CYP2B6                                      *4/*6
0    20220526-630049  CYP2C19                                     *2/*17
0    20220526-630049   CYP2C9                                           
0    20220526-630049   CYP3A4                                           
0    20220526-630049   CYP3A5                                           
73   20220526-630049    ABCB1  ABCB1_001  ABCB1 rs2032582             AC
76   20220526-630049    ABCB1  ABCB1_002  ABCB1 rs1045642             AG
78   20220526-630049    ANKK1  ANKK1_001  AN

In [30]:
_=call_by_sample('20220609-116-5093')

./validation_panels/panels/PGx-NP_FSA file/20220609-116-5093/20220609-116-5093.xlsx
                sample     gene     marker            label     genotype
0    20220609-116-5093   CYP2D6                                         
34   20220609-116-5093    OPRM1  OPRM1_001  OPRM1 rs1799971           AA
0    20220609-116-5093    HLA-A                                         
0    20220609-116-5093    HLA-B                              other/other
0    20220609-116-5093   CYP2B6                                    *1/*1
0    20220609-116-5093  CYP2C19                                         
0    20220609-116-5093   CYP2C9                                         
0    20220609-116-5093   CYP3A4                                  *22/*22
0    20220609-116-5093   CYP3A5                                         
73   20220609-116-5093    ABCB1  ABCB1_001  ABCB1 rs2032582           CC
76   20220609-116-5093    ABCB1  ABCB1_002  ABCB1 rs1045642           GG
78   20220609-116-5093    ANKK1  ANKK1_0

In [31]:
_=call_by_sample('20220609-630087')

./validation_panels/panels/PGx-NP_FSA file/20220609-630087/20220609-630087.xlsx
              sample     gene     marker            label     genotype
0    20220609-630087   CYP2D6                                         
34   20220609-630087    OPRM1  OPRM1_001  OPRM1 rs1799971           AA
0    20220609-630087    HLA-A                                         
0    20220609-630087    HLA-B                              other/other
0    20220609-630087   CYP2B6                                         
0    20220609-630087  CYP2C19                                         
0    20220609-630087   CYP2C9                                         
0    20220609-630087   CYP3A4                                  *22/*22
0    20220609-630087   CYP3A5                                         
73   20220609-630087    ABCB1  ABCB1_001  ABCB1 rs2032582           AC
76   20220609-630087    ABCB1  ABCB1_002  ABCB1 rs1045642           AG
78   20220609-630087    ANKK1  ANKK1_001  ANKK1 rs1800497           

In [32]:
_ = call_by_sample('20220623-145-5093')

./validation_panels/panels/PGx-NP_FSA file/20220623-145-5093/20220623-145-5093.xlsx
                sample     gene     marker            label     genotype
0    20220623-145-5093   CYP2D6                                         
34   20220623-145-5093    OPRM1  OPRM1_001  OPRM1 rs1799971           AG
0    20220623-145-5093    HLA-A                                         
0    20220623-145-5093    HLA-B                              other/other
0    20220623-145-5093   CYP2B6                                    *1/*1
0    20220623-145-5093  CYP2C19                                         
0    20220623-145-5093   CYP2C9                                         
0    20220623-145-5093   CYP3A4                                  *22/*22
0    20220623-145-5093   CYP3A5                                         
73   20220623-145-5093    ABCB1  ABCB1_001  ABCB1 rs2032582           AA
76   20220623-145-5093    ABCB1  ABCB1_002  ABCB1 rs1045642           AA
78   20220623-145-5093    ANKK1  ANKK1_0

In [26]:
# generate all bin setting for each sample

bin_table = pd.read_excel('./validation_panels/PGx_NP_bin_seting_table_new_form.xlsx')

director = './validation_panels/panels/PGx-NP_FSA file/'

samples = os.listdir(director)

path_samples = [f'{director}{sample}/' for sample in samples]

for sample, path in zip(samples, path_samples):
    name = f'{sample}.xlsx'
    full = f'{path}{name}'
    bin_table.to_excel(full, index=False, sheet_name='peak_table')