In [4]:
import pandas as pd
from Source import data_chanel_map
from Source import forward_color
from Source import reverse_color
from Source import reverse_map
from Source import Definition

definition = Definition("./resource/tables.pdata")

class BinFile:
    def __init__(self, file) -> None:
        self.data = self.read_file(file)
        self.process_data = self.parser()

    def read_file(self, file):
        with open(file, 'r') as f:
            return [line.strip('\n') for line in f.readlines()]

    def parser(self):
        lines = []
        for line in self.data:
            # print(line)
            if 'Version' in line:
                version = line.split('\t')[1]
            elif 'Chemistry' in line:
                kit_name = line.split('\t')[1]
            elif 'Panel Name' in line:
                panel_name = line.split('\t')[1]
            elif 'Marker Name' in line:
                marker_name = line.split('\t')[1]
            elif 'BinSet' in line:
                continue
            else:
                # print(panel_name)
                line_base = line.split('\t')
                line_base.insert(0, marker_name)
                line_base.insert(0, panel_name)
                line_base.insert(0, kit_name)
                line_base.insert(0, version)
                lines.append(line_base)

        
        df = pd.DataFrame(lines, columns=['version', 'package', 'defined_panel', 'marker', 'base', 'min_bin', 'max_bin', 'color'])
        df = df[pd.notnull(df.color)]
        df['min_bin'] = round(df['min_bin'].astype('float'),2)
        df['max_bin'] = round(df['max_bin'].astype('float'),2)
        df['color'] = df['color'].apply(lambda x: x.lower())
        
        # conver yellow to black
        df['color'] = df['color'].apply(lambda x: 'black' if x  == 'yellow' else x)
        
        # update direction
        df['direction'] = df.apply(lambda row: self.update_direction(row['base'], row['color']), axis=1)

        # update is_forward
        df['is_forward'] = df['direction'].apply(lambda x: 1 if x == 'Forward' else 0)

        # update label
        df = df.merge(definition.marker_table[['marker', 'marker_label', 'gene']], how='left')

        # update panel
        df['panel'] = df['defined_panel'].apply(lambda x: f'S{x[-1]}')

        # update basetype

        def _base_type(marker, base):
            # print(marker)
            marker_data = definition.marker_table[definition.marker_table['marker'] ==  marker]
            print(marker_data)
            wildtype = marker_data['wildtype'].to_list()[0]

            return 'wildtype' if base == wildtype else 'mutant'


        df['basetype'] = df.apply(lambda row: _base_type(row['marker'], row['base']), axis=1)
                

        # intiate min intensity
        df['min_height'] = 1000

        # process duplicate
        # get max range file of bin setting if base duplicated
        # M1 20  25
        # M1 21  26
        # --> M1 20 26 

        tmp = df.groupby(['version', 'panel', 'marker', 'base', 'color'], as_index=False).agg(bin_min=('min_bin', 'min'), bin_max=('max_bin', 'max'))

        df = df.merge(tmp)

        df.drop(columns=['min_bin', 'max_bin'], inplace=True)
        df.rename(columns={'bin_min':'min_bin', 'bin_max':'max_bin'}, inplace=True)

        # commennt the above code if we want to get the max bin range instead

        df['binrange'] = df.min_bin - df.max_bin

        # get max bin range
        df = df.sort_values('binrange', ascending=False).groupby(['version', 'panel', 'marker', 'base', 'color'], as_index=False).first()

        df.sort_values(['panel', 'gene', 'marker', 'basetype', 'min_bin'], ascending=[True, True, True, False, True], inplace=True)
       
        # reorder colums
        df = df[['package', 'version', 'defined_panel', 'panel', 'gene', 'marker',  'marker_label', 'is_forward',  'direction', 
                'base', 'basetype', 'color',  'min_height', 'min_bin', 'max_bin']]


        # check status marker

        markers = df[['panel','marker', 'direction']].drop_duplicates()
        count = markers.groupby('marker').size()

        if any(count[count > 1]):
            error_markers = "; ".join(count[count > 1].index)
            raise ValueError(f'Marker(s) info not consistent [{error_markers}]')
        
        else:
            return df
    
    def update_direction(self, base, color):
        return 'Forward' if forward_color.get(base) == color else 'Reverse'


In [8]:
bin_file = './validation_panels/panels/[2021.08 └┌╖ß] PGx-NP_PGx-NP_bins_lam_updated.txt'
bin = BinFile(bin_file)
bin.process_data.to_excel('./validation_panels/PGx_NP_bin_seting_table_new_form.xlsx', index=False)
data = bin.process_data

        marker star_allele aa_change nt_change       rsid wildtype mutant  \
20  CYP2D6_001         *14     G169R   1758G>A  rs5030865        G      A   

    gene_id  marker_id    gene marker_label  
20       14         21  CYP2D6   CYP2D6 *14  
        marker star_allele aa_change nt_change       rsid wildtype mutant  \
20  CYP2D6_001         *14     G169R   1758G>A  rs5030865        G      A   

    gene_id  marker_id    gene marker_label  
20       14         21  CYP2D6   CYP2D6 *14  
        marker star_allele aa_change nt_change       rsid wildtype mutant  \
21  CYP2D6_002        *10B      P34S    100C>T  rs1065852        C      T   

    gene_id  marker_id    gene marker_label  
21       14         22  CYP2D6  CYP2D6 *10B  
        marker star_allele aa_change nt_change       rsid wildtype mutant  \
21  CYP2D6_002        *10B      P34S    100C>T  rs1065852        C      T   

    gene_id  marker_id    gene marker_label  
21       14         22  CYP2D6  CYP2D6 *10B  
        mark

In [7]:
data

Unnamed: 0,package,version,defined_panel,panel,gene,marker,marker_label,is_forward,direction,base,basetype,color,min_height,min_bin,max_bin
1,PGx-NP,GM v 4.1,N-SPM01,S1,CYP2D6,CYP2D6_001,CYP2D6 *14,1,Forward,G,wildtype,blue,1000,25.0,35.0
0,PGx-NP,GM v 4.1,N-SPM01,S1,CYP2D6,CYP2D6_001,CYP2D6 *14,1,Forward,A,mutant,green,1000,27.0,36.0
2,PGx-NP,GM v 4.1,N-SPM01,S1,CYP2D6,CYP2D6_002,CYP2D6 *10B,1,Forward,C,wildtype,black,1000,28.0,38.0
3,PGx-NP,GM v 4.1,N-SPM01,S1,CYP2D6,CYP2D6_002,CYP2D6 *10B,1,Forward,T,mutant,red,1000,31.0,37.0
5,PGx-NP,GM v 4.1,N-SPM01,S1,CYP2D6,CYP2D6_003,CYP2D6 *49,0,Reverse,T,wildtype,green,1000,37.0,44.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
107,PGx-NP,GM v 4.1,N-SPM06,S6,MC4R,MC4R_001,MC4R rs489693,1,Forward,T,mutant,red,1000,76.0,84.0
109,PGx-NP,GM v 4.1,N-SPM06,S6,UGT1A4,UGT1A4_001,UGT1A4 142T>G,1,Forward,T,wildtype,red,1000,63.5,69.0
108,PGx-NP,GM v 4.1,N-SPM06,S6,UGT1A4,UGT1A4_001,UGT1A4 142T>G,1,Forward,G,mutant,blue,1000,61.0,67.0
110,PGx-NP,GM v 4.1,N-SPM06,S6,UGT2B15,UGT2B15_001,UGT2B15 253G>T,1,Forward,G,wildtype,blue,1000,51.8,57.0


In [6]:
df = data.groupby(['version', 'panel', 'marker', 'base', 'color'], as_index=False).agg(bin_min=('min_bin', 'min'), bin_max=('max_bin', 'max'))

In [7]:
df.head()

Unnamed: 0,version,panel,marker,base,color,bin_min,bin_max
0,GM v 4.1,S1,CYP2D6_001,A,green,27.0,36.0
1,GM v 4.1,S1,CYP2D6_001,G,blue,25.0,35.0
2,GM v 4.1,S1,CYP2D6_002,C,black,28.0,38.0
3,GM v 4.1,S1,CYP2D6_002,T,red,31.0,37.0
4,GM v 4.1,S1,CYP2D6_003,A,red,39.0,45.0


In [17]:
data = data.sort_values(['panel', 'gene', 'marker', 'basetype', 'min_bin'], ascending=[True, True, True, False, True])

In [6]:
bin.process_data

Unnamed: 0,package,version,defined_panel,panel,gene,marker,marker_label,is_forward,direction,base,basetype,color,min_height,min_bin,max_bin
1,PGx-NP,GM v 4.1,N-SPM01,S1,CYP2D6,CYP2D6_001,CYP2D6 *14,1,Forward,G,wildtype,blue,1000,25.0,35.0
0,PGx-NP,GM v 4.1,N-SPM01,S1,CYP2D6,CYP2D6_001,CYP2D6 *14,1,Forward,A,mutant,green,1000,27.0,36.0
2,PGx-NP,GM v 4.1,N-SPM01,S1,CYP2D6,CYP2D6_002,CYP2D6 *10B,1,Forward,C,wildtype,black,1000,28.0,38.0
3,PGx-NP,GM v 4.1,N-SPM01,S1,CYP2D6,CYP2D6_002,CYP2D6 *10B,1,Forward,T,mutant,red,1000,31.0,37.0
5,PGx-NP,GM v 4.1,N-SPM01,S1,CYP2D6,CYP2D6_003,CYP2D6 *49,0,Reverse,T,wildtype,green,1000,37.0,44.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
92,PGx-NP,GM v 4.1,N-SPM06,S6,CYP1A2,CYP1A2_001,CYP1A2 *1F,1,Forward,A,mutant,green,1000,68.0,75.0
93,PGx-NP,GM v 4.1,N-SPM06,S6,CYP1A2,CYP1A2_001,CYP1A2 *1F,1,Forward,C,wildtype,black,1000,68.5,75.0
106,PGx-NP,GM v 4.1,N-SPM06,S6,MC4R,MC4R_001,MC4R rs489693,1,Forward,C,wildtype,black,1000,76.0,84.0
105,PGx-NP,GM v 4.1,N-SPM06,S6,MC4R,MC4R_001,MC4R rs489693,1,Forward,A,mutant,green,1000,76.0,83.0


In [9]:

from Source import generate_markers
import pandas as pd
data = pd.read_excel("validation_panels/PGx_NP_bin_seting_table_new_form.xlsx")
markers = generate_markers(data, True)

In [2]:
markers['CYP2D6_001'].alleles[0].defined_color

'green'

In [2]:
markers['CYP2D6_001'].alleles[0].color

'green'