In [14]:
import re
import pandas as pd
from Source import data_chanel_map
from Source import forward_color
from Source import reverse_color
from Source import reverse_map
from Source import Definition

definition = Definition("./resource/tables.pdata")

class BinFile:
    def __init__(self, file) -> None:
        self.data = self.read_file(file)

    def read_file(self, file):
        with open(file, 'r') as f:
            return [line.strip('\n') for line in f.readlines()]

    def parser(self):
        lines = []
        for line in self.data:
            # print(line)
            if 'Version' in line:
                version = line.split('\t')[1]
            elif 'Chemistry' in line:
                kit_name = line.split('\t')[1]
            elif 'Panel Name' in line:
                panel_name = line.split('\t')[1]
            elif 'Marker Name' in line:
                marker_name = line.split('\t')[1]
            elif 'BinSet' in line:
                continue
            else:
                # print(panel_name)
                line_base = line.split('\t')
                line_base.insert(0, marker_name)
                line_base.insert(0, panel_name)
                line_base.insert(0, kit_name)
                line_base.insert(0, version)
                lines.append(line_base)
        df = pd.DataFrame(lines, columns=['version', 'package', 'defined_panel', 'marker', 'base', 'min_bin', 'max_bin', 'color'])
        df = df[pd.notnull(df.color)]
        df['min_bin'] = round(df['min_bin'].astype('float'),2)
        df['max_bin'] = round(df['max_bin'].astype('float'),2)
        df['color'] = df['color'].apply(lambda x: x.lower())
        
        # conver yellow to black
        df['color'] = df['color'].apply(lambda x: 'black' if x  == 'yellow' else x)
        
        # update direction
        df['direction'] = df.apply(lambda row: self.update_direction(row['base'], row['color']), axis=1)

        # update is_forward
        df['is_forward'] = df['direction'].apply(lambda x: True if x == 'forward' else False)

        # update label
        df = df.merge(definition.marker_table[['marker', 'label']], how='left')

        # update panel
        df['panel'] = df['defined_panel'].apply(lambda x: f'S{x[-1]}')

        # update basetype

        def _base_type(marker, base):
            marker_data = definition.marker_table[definition.marker_table['marker'] ==  marker]
            wildtype = marker_data['wildtype'].to_list()[0]

            return 'wildtype' if base == wildtype else 'mutant'
        df['basetype'] = df.apply(lambda row: _base_type(row['marker'], row['base']), axis=1)
                

        # intiate min intensity
        df['min_height'] = 1000

        # process duplicate
        # get max range file of bin setting if base duplicated
        df['binrange'] = df.max_bin - df.min_bin

        # get max bin range
        df = df.sort_values('binrange', ascending=False).groupby(['version', 'panel', 'marker', 'base', 'color'], as_index=False).first()

        # reorder colums
        df = df[['package', 'version', 'defined_panel', 'panel', 'marker',  'label', 'is_forward',  'direction', 
                'base', 'basetype', 'color',  'min_height', 'min_bin', 'max_bin', 'binrange']]


        return df
    
    def update_direction(self, base, color):
        return 'forward' if forward_color.get(base) == color else 'reverse'


In [15]:
bin_file = './raw_data/panels/[2021.08 └┌╖ß] PGx-NP_PGx-NP_bins_lam_updated.txt'
bin = BinFile(bin_file)
data = bin.parser()