<a href="https://colab.research.google.com/github/macorony/Bioinformatic_analysis/blob/main/Algorithms/Bayesian_selection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [33]:
import numpy as np
import pandas as pd
class BayesianSelection:
  def __init__(self, count_table, design_matrix=None, sgrna_efficiency=None):
    self.count_table = count_table
    self.design_matrix = design_matrix
    self.sgrna_efficiency = sgrna_efficiency

    self.n_sgrnas = len(count_table)
    self.genes = np.unique(count_table['gene'])
    self.n_genes = len(self.genes)

    self.normalized_counts = None
    self.fold_changes = None
    self.gene_guide_map = None

  def normalize_counts(self, count_table):
    control_sf = np.median(count_table['control'])/count_table['control']
    treatment_sf = np.median(count_table['treatment'])/count_table['treatment']
    normalized = pd.DataFrame(
        {'control': count_table['control']*control_sf,
         'treatment': count_table['treatment']*treatment_sf}
        )
    return normalized

  def group_sgrna_by_gene(self):
    gene_guide_map = {}
    for gene in self.genes:
      gene_guide_map[gene] = np.where(self.count_table['gene'] == gene)
    return gene_guide_map

  def initialize_priors(self):
    '''
    Initialize prior distributions for Bayesian analysis.
    '''
    # 1. gene effect priors(normal distribution)
    self.gene_priors = {
        'mean': np.zeros(self.n_genes),
        'variance': np.ones(self.n_genes)
    }
    # 2.sgRNA efficiency priors
    if self.sgrna_efficiency is None:
      self.sgrna_priors = {
          'mean': np.array(list(self.sgrna_efficiency.values())),
          'variance': 0.1 * np.ones(self.n_sgrnas)
      }
    else:
      self.sgrna_priors = {
          'mean': np.ones(self.n_sgrnas),
          'variance':np.ones(self.n_sgrnas)
          }
    # 3. dispersion priors (Gamma distribution)
    self.dispersion_priors = {
        'shape': np.ones(self.n_genes),
        'scale': np.ones(self.n_genes)
        }
    # 4. size factor priors (if needed for multiple conditions)
    if self.design_matrix is not None:
      n_conditions = self.design_matrix.shape[1]
      self.size_factor_priors = {
          'mean': np.ones(n_conditions),
          'variance': 0.1 * np.ones(n_conditions)
          }

  def construct_model(self):
    """
    Construct the hierarchical model for Bayesian analysis.
    """
    self.model = {
        # likelihood function (Negative Binomial)
        'likelihood': self.contruct_likelihood(),
        # priors?
        'priors': self.contruct_priors(),
        # hyperparameters?
        'hyperparameters': self.set_hyperparameters()
    }

  def estimate_parameters(self, n_iterations=1000, burn_in=100):
    """
    Estimate the parameters of the hierarchical model.
    """
    self.mcmc_samples = {
        'gene_effect': np.zeros((n_iterations, self.n_genes)),
        'sgrna_efficiency': np.zeros((n_iterations, self.n_sgrnas)),
        'dispersion': np.zeros((n_iterations, self.n_genes))
    }
    # run gibbs sampling
    for iter in range(n_iterations):
      self.update_gene_effect()
      self.update_sgrna_efficiency()
      self.update_dispersion()
      # store sample (after burn-in)
      if iter >= burn_in:
        self.store_samples(iter)
      # monitor convergence
      if iter % 100 == 0:
        self.check_convergence(iter)

  def contruct_likelihood(self):
    """
    contruct negative binomial likelihood function
    """
    def negative_binomial_likelihood(count, mean, dispersion):
      return np






In [24]:
count_data = pd.DataFrame(
    {'sgrna': ['sgrna1', 'sgrna2', 'sgrna3', 'sgrna4'],
     'gene': ['geneA', 'geneA', 'geneB', 'geneB'],
     'control': [100, 200, 200, 250],
     'treatment': [50, 150, 200, 400]
     })

In [25]:
sgrna_efficiency = {'sgrna1':0, 'sgrna2':0, 'sgrna3':0, 'sgrna4':0}

In [21]:
np.array(list(sgrna_efficiency.values()))

array([0, 0, 0, 0])

In [26]:
bayes_selector = BayesianSelection(count_data, sgrna_efficiency=sgrna_efficiency)

In [27]:
bayes_selector.initialize_priors()

In [31]:
print(bayes_selector.gene_priors)
print(bayes_selector.sgrna_priors)
print(bayes_selector.dispersion_priors)

{'mean': array([0., 0.]), 'variance': array([1., 1.])}
{'mean': array([1., 1., 1., 1.]), 'variance': array([1., 1., 1., 1.])}
{'shape': array([1., 1.]), 'scale': array([1., 1.])}
