In [1]:
from glob import glob

filenames = sorted(list(glob('npzfiles/*.npz')))

In [2]:
import numpy as np
from simulations.concentrations.MLE import get_NB_score_function

from scipy.optimize import root_scalar

In [3]:
from analysis_utils.concentrations_MLE import get_NB_MLE

In [4]:
get_NB_MLE(filenames[0])

0

In [5]:
npzfile = np.load('concentration_ML_estimates/simulation.001.density.npz')

In [6]:
npzfile['whole_sim']

array([inf])

In [7]:
npzfile['medium_batches']

array([         inf,          inf,          inf,          inf,
       182.72346924,  14.67143429,  18.30673344,          inf,
                inf,  17.4506522 ,   9.30549054,          inf,
                inf,          inf,  82.55733542,          inf,
        27.22401464,          inf,  75.99396719,          inf,
                inf,          inf,          inf,   8.60127762,
        83.7868077 ,   7.59216147,  31.73649871,          inf,
         6.65841731,   9.09388204])

In [8]:
npzfile['small_batches']

array([2.53083655, 3.83356622, 3.02077732, ..., 1.3364964 ,        inf,
       1.36372825])

## Example

In [9]:
npzfile = np.load(filenames[0])
droplets = npzfile['droplets']

counts = np.sum(droplets, axis=1)
mean_count = np.mean(counts)
number_droplets, number_strains = droplets.shape

score_function = get_NB_score_function(counts, mean_count, number_droplets, number_strains)

In [10]:
from scipy.optimize import fsolve

`fsolve` more general framework/wrapper, but seem to get better results using more 'targeted' options

In [11]:
fsolve(score_function, 2, full_output=True)

(array([7718.34625062]),
 {'nfev': 52,
  'fjac': array([[-1.]]),
  'r': array([3.86724793e-10]),
  'qtf': array([-4.48395099e-09]),
  'fvec': array([4.48395099e-09])},
 5,
 'The iteration is not making good progress, as measured by the \n  improvement from the last ten iterations.')

In [12]:
#root_scalar(f=score_function, x0=2, bracket=(0.000001,1000000))

In [13]:
fsolve(score_function, 1, full_output=True)

(array([11713.98005144]),
 {'nfev': 53,
  'fjac': array([[-1.]]),
  'r': array([7.34380711e-09]),
  'qtf': array([-1.67418435e-09]),
  'fvec': array([1.67418435e-09])},
 5,
 'The iteration is not making good progress, as measured by the \n  improvement from the last ten iterations.')

In [14]:
fsolve(score_function, 0.5, full_output=True)

(array([14315.03104674]),
 {'nfev': 69,
  'fjac': array([[-1.]]),
  'r': array([-0.02513898]),
  'qtf': array([-2.00312655e-10]),
  'fvec': array([2.00312655e-10])},
 5,
 'The iteration is not making good progress, as measured by the \n  improvement from the last ten iterations.')

In [15]:
score_function(14315)

2.8560975806612987e-10

In [16]:
fsolve(score_function, 1.5, full_output=True)

(array([20712.58675024]),
 {'nfev': 46,
  'fjac': array([[-1.]]),
  'r': array([2.19484032e-06]),
  'qtf': array([-2.67299072e-10]),
  'fvec': array([-5.16298115e-11])},
 1,
 'The solution converged.')

In [17]:
score_function(20712)

2.226968831564591e-09

In [18]:
fsolve(score_function, 1.75, full_output=True)

(array([40376.27617048]),
 {'nfev': 47,
  'fjac': array([[-1.]]),
  'r': array([-7.49442344e-06]),
  'qtf': array([7.07675696e-10]),
  'fvec': array([1.82646787e-10])},
 1,
 'The solution converged.')

In [19]:
#root_scalar(f=score_function, x0=1.75, bracket=(0.0001,10000))

In [20]:
fsolve(score_function, 1.6, full_output=True)

(array([15467.28055991]),
 {'nfev': 48,
  'fjac': array([[-1.]]),
  'r': array([0.00137733]),
  'qtf': array([1.65119474e-10]),
  'fvec': array([6.75015599e-14])},
 1,
 'The solution converged.')

In [21]:
#root_scalar(f=score_function, x0=1.6, bracket=(0.0001,10000))

In [22]:
fsolve(score_function, 1.55, full_output=True)

(array([20472.24827151]),
 {'nfev': 55,
  'fjac': array([[-1.]]),
  'r': array([0.00078653]),
  'qtf': array([-2.64186895e-10]),
  'fvec': array([0.])},
 1,
 'The solution converged.')

In [23]:
#root_scalar(f=score_function, x0=1.55, bracket=(0.0001,10000))

In [24]:
fsolve(score_function, 1.5, full_output=True)

(array([20712.58675024]),
 {'nfev': 46,
  'fjac': array([[-1.]]),
  'r': array([2.19484032e-06]),
  'qtf': array([-2.67299072e-10]),
  'fvec': array([-5.16298115e-11])},
 1,
 'The solution converged.')

#### score function evaluated at some values

In [25]:
score_function(0.5)

287.7254638095619

In [26]:
score_function(1)

38.03719523217296

In [27]:
score_function(1.0010625)

37.91867890051799

In [28]:
score_function(1.003125)

37.690021776768845

In [29]:
score_function(1.00625)

37.3470670543029

In [30]:
score_function(1.0125)

36.67355923040304

In [31]:
score_function(1.025)

35.37443173659267

In [32]:
score_function(1.05)

32.95487449277425

In [33]:
score_function(1.1)

28.740293042035773

In [34]:
score_function(1.2)

22.24562265118584

In [35]:
score_function(2)

4.925287718884647

In [36]:
score_function(200)

1.3687089222003124e-05

In [37]:
score_function(2000)

9.254483757104026e-08

In [38]:
score_function(20000)

2.5155699745482707e-10

In [39]:
score_function(200000)

2.792834852272108e-10

In [40]:
score_function(2000000)

3.6021030602739756e-10

## Does this really work in general?

In [41]:
get_NB_MLE(filenames[17])

0

In [42]:
npzfile = np.load('concentration_ML_estimates/simulation.018.density.npz')

In [43]:
npzfile['whole_sim']

array([41.16664349])

In [44]:
npzfile['medium_batches']

array([ 23.21907063, 101.91069849,  18.71498741,   6.68996023,
                inf,          inf,   4.00084782,          inf,
         7.73431189,          inf,   3.18218438,  11.75795288,
                inf,   7.70825342,          inf,          inf,
        50.01903226,  12.87785257,          inf,          inf,
                inf,          inf,  14.81120338,   5.65988123,
         9.13338503,          inf,          inf,          inf,
                inf,   7.79158829])

In [45]:
npzfile['small_batches']

array([14.24871131, 11.67995124,         inf, ...,  1.52016366,
        4.80585608,         inf])