<a href="https://colab.research.google.com/github/macorony/Bioinformatic_analysis/blob/main/Algorithms/Bayesian_selection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [11]:
import numpy as np
import pandas as pd
class BayesianSelection:
  def __init__(self, count_table, design_matrix=None, sgrna_efficiency=None):
    self.count_table = count_table
    self.design_matrix = design_matrix
    self.sgrna_efficiency = sgrna_efficiency

    self.n_sgrnas = len(count_table)
    self.genes = np.unique(count_table['gene'])
    self.n_genes = len(self.genes)

    self.normalized_counts = None
    self.fold_changes = None
    self.gene_guide_map = None

  def normalize_counts(self, count_table):
    control_sf = np.median(count_table['control'])/count_table['control']
    treatment_sf = np.median(count_table['treatment'])/count_table['treatment']
    normalized = pd.DataFrame(
        {'control': count_table['control']*control_sf,
         'treatment': count_table['treatment']*treatment_sf}
        )
    return normalized

  def group_sgrna_by_gene(self):
    gene_guide_map = {}
    for gene in self.genes:
      gene_guide_map[gene] = np.where(self.count_table['gene'] == gene)
    return gene_guide_map

  def initialize_priors(self):
    self.gene_priors = {
        'mean': np.zeros(self.n_genes),
        'variance': np.ones(self.n_genes)
    }


In [12]:
count_data = pd.DataFrame(
    {'sgrna': ['sgrna1', 'sgrna2', 'sgrna3', 'sgrna4'],
     'gene': ['geneA', 'geneA', 'geneB', 'geneB'],
     'control': [100, 200, 200, 250],
     'treatment': [50, 150, 200, 400]
     })

In [13]:
sgrna_efficiency = {'sgrna1':0, 'sgrna2':0, 'sgrna3':0, 'sgrna4':0}

In [14]:
bayes_selector = BayesianSelection(count_data, sgrna_efficiency=sgrna_efficiency)

In [16]:
bayes_selector.genes

array(['geneA', 'geneB'], dtype=object)

In [17]:
bayes_selector.group_sgrna_by_gene()

{'geneA': (array([0, 1]),), 'geneB': (array([2, 3]),)}