In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

import sys
sys.path.insert(0, "../func_py/")
import infer_noise as infn
import data_utils as dt

In [2]:
metadata = pd.read_csv('metadata/metadata.tsv', sep='\t', index_col=0)

### Leraning the three noise models for all the memory or plasmablast samples

The learned parameters are saved in the `inference/noise` folder

In [None]:
aux = metadata[metadata.cell == 'm'] # change 'm' to 'p' for the plasmablast processing
samples = aux[aux.repl_count > 1].index
for sample in samples:
    
    # Discarding the samples not used in the analysis
    if sample == 'pat1_t4_pc' or sample == 'pat1_t1_pc':
        continue
        
    print(sample)
    n_uniq, n_counts = dt.import_sample_counts(sample, metadata.loc[sample].repl_count)
    
    # Poisson
    infer = infn.infer_noise_poiss(n_uniq, n_counts, False, n_points=10000)
    infer.run(x0=(2.3, -5.5), bounds=((2, 2.7), (-7, -4.5)))
    infer.compute_errors()
    infer.write_on_file('inference/noise/', sample+'_'+infer.name+'.txt')
    print(infer.result.x)
    
    # Neg bin beta = 1
    infer = infn.infer_noise_negbin(n_uniq, n_counts, False, False, n_points=10000, n_threads=5)
    infer.run(x0=(2.3, -5.5, 0.1), bounds=((2, 2.7), (-7, -4.5), (0.001, 2)))
    infer.compute_errors()
    infer.write_on_file('inference/noise/', sample+'_'+infer.name+'.txt')
    print(infer.result.x)
    
    # Neg bin free beta
    infer = infn.infer_noise_negbin(n_uniq, n_counts, True, False, n_points=10000, n_threads=5)
    infer.run(x0=(2.3, -5.5, 0.1, 1), bounds=((2, 2.7), (-7, -4.5), (0.001, 2), (0.3, 3)))
    infer.compute_errors()
    infer.write_on_file('inference/noise/', sample+'_'+infer.name+'.txt')
    print(infer.result.x)

### Re-learning by fine-tuning the hyperparameters the samples that didn't find an error
(that possibly did not converge well)

In [None]:
sample = "pat1_t2_pc"
n_uniq, n_counts = dt.import_sample_counts(sample, metadata.loc[sample].repl_count)
infer = infn.infer_noise_negbin(n_uniq, n_counts, False, False, n_points=10000, verbose=True)
infer.ftol = 10**(-5)
infer.eps_SLSQP = 10**(-2)
infer.run(x0=(2.5, -6, 0.1), bounds=((2, 3), (-7, -4), (0.001, 1)))
infer.compute_errors()
print(infer.result)
print(infer.errors)