In [1]:
from glob import glob

filenames = sorted(list(glob('npzfiles/*.npz')))

In [2]:
import numpy as np
from simulations.concentrations.MLE import get_DM_score_function
from scipy.optimize import fsolve
from scipy.optimize import root_scalar

In [3]:
from analysis_utils.concentrations_MLE import get_DM_MLE

In [4]:
get_DM_MLE(filenames[0])

0

In [5]:
npzfile = np.load('concentration_ML_estimates/simulation.001.compositional.npz')

In [6]:
npzfile['whole_sim']

array([91.49459202])

In [7]:
npzfile['medium_batches']

array([1.04587561e+02, 3.00658184e+02, 5.94903659e+01, 5.92911795e+01,
       4.99736840e+01, 1.45886079e+02, 1.42220010e+02, 1.49163979e+02,
       1.07527125e+02, 3.65026691e+01, 1.57513108e+02, 3.93334741e+02,
       4.55642228e+01, 4.24522196e+11, 5.47745675e+01, 6.53147027e+01,
       0.00000000e+00, 4.05926582e+01, 1.31952548e+11, 7.75473988e+01,
       1.25263798e+02, 1.01572081e+02, 2.33643258e+02, 4.13022921e+01,
       3.40567524e+01, 1.37069741e+02, 5.28896889e+01, 1.20397866e+02,
       4.32745908e+02, 3.84747688e+02])

In [8]:
npzfile['small_batches']

array([           inf,            inf, 4.52850503e+01, ...,
       7.86032890e+00,            inf, 2.59201845e+12])

## Example

In [9]:
base_relative_abundances = [1e-4, 1e-3, 1e-2]

relative_abundances = [relative_abundance * number
                       for relative_abundance 
                       in base_relative_abundances
                       for number in (1,2,5) 
                       for repeat in range(10)]

relative_abundances += [1-sum(relative_abundances)]
frequencies = np.array(relative_abundances)

npzfile = np.load(filenames[0])
droplets = npzfile['droplets']

score_function = get_DM_score_function(droplets, frequencies)

`fsolve` more general framework/wrapper, but seemed to get better results using more 'targeted' options with `root_scalar` when possible

In [10]:
fsolve(score_function, 200, full_output=True)

(array([1.57366337e+11]),
 {'nfev': 84,
  'fjac': array([[-1.]]),
  'r': array([-5.70812279e-24]),
  'qtf': array([8.13151629e-20]),
  'fvec': -8.131516293641283e-20},
 5,
 'The iteration is not making good progress, as measured by the \n  improvement from the last ten iterations.')

In [11]:
root_scalar(f=score_function, x0=200, bracket=(0.0001,10000))

      converged: True
           flag: 'converged'
 function_calls: 17
     iterations: 16
           root: 91.49459202124932

In [12]:
fsolve(score_function, 100, full_output=True)

(array([91.49459203]),
 {'nfev': 10,
  'fjac': array([[-1.]]),
  'r': array([0.00750787]),
  'qtf': array([5.82076609e-11]),
  'fvec': 0.0},
 1,
 'The solution converged.')

In [13]:
fsolve(score_function, 50, full_output=True)

(array([91.49459203]),
 {'nfev': 12,
  'fjac': array([[-1.]]),
  'r': array([0.00376258]),
  'qtf': array([-1.80443749e-09]),
  'fvec': 0.0},
 1,
 'The solution converged.')

In [14]:
fsolve(score_function, 150, full_output=True)

(array([1.59367819e+11]),
 {'nfev': 83,
  'fjac': array([[-1.]]),
  'r': array([0.]),
  'qtf': array([1.08420217e-19]),
  'fvec': -1.0842021724855044e-19},
 5,
 'The iteration is not making good progress, as measured by the \n  improvement from the last ten iterations.')

In [39]:
root_scalar(f=score_function, x0=150, bracket=(0.0001,10000))

      converged: True
           flag: 'converged'
 function_calls: 17
     iterations: 16
           root: 91.49459202124932

In [15]:
fsolve(score_function, 175, full_output=True)

(array([1.07352155e+11]),
 {'nfev': 89,
  'fjac': array([[-1.]]),
  'r': array([-1.04711528e-26]),
  'qtf': array([2.16840434e-19]),
  'fvec': -2.168404344971009e-19},
 5,
 'The iteration is not making good progress, as measured by the \n  improvement from the last ten iterations.')

In [16]:
root_scalar(f=score_function, x0=175, bracket=(0.0001,10000))

      converged: True
           flag: 'converged'
 function_calls: 17
     iterations: 16
           root: 91.49459202124932

In [17]:
fsolve(score_function, 160, full_output=True)

(array([3.62569712e+11]),
 {'nfev': 75,
  'fjac': array([[-1.]]),
  'r': array([-6.8601629e-31]),
  'qtf': array([8.13151629e-20]),
  'fvec': 0.0},
 1,
 'The solution converged.')

In [18]:
root_scalar(f=score_function, x0=160, bracket=(0.0001,10000))

      converged: True
           flag: 'converged'
 function_calls: 17
     iterations: 16
           root: 91.49459202124932

In [19]:
fsolve(score_function, 155, full_output=True)

(array([2.31858051e+11]),
 {'nfev': 89,
  'fjac': array([[-1.]]),
  'r': array([-9.92917198e-23]),
  'qtf': array([2.71050543e-20]),
  'fvec': -2.710505431213761e-20},
 5,
 'The iteration is not making good progress, as measured by the \n  improvement from the last ten iterations.')

In [20]:
root_scalar(f=score_function, x0=155, bracket=(0.0001,10000))

      converged: True
           flag: 'converged'
 function_calls: 17
     iterations: 16
           root: 91.49459202124932

#### score function evaluated at some values

In [22]:
score_function(50)

0.8972832314902917

In [23]:
score_function(100)

-0.024706540018087253

In [24]:
score_function(100.10625)

-0.02493790560401976

In [25]:
score_function(100.3125)

-0.025382514810189605

In [26]:
score_function(100.625)

-0.02604495780542493

In [27]:
score_function(101.25)

-0.027330252050887793

In [28]:
score_function(102.5)

-0.029749218316283077

In [29]:
score_function(105)

-0.034030697657726705

In [30]:
score_function(110)

-0.040713346359552816

In [31]:
score_function(120)

-0.04864478114177473

In [32]:
score_function(200)

-0.04139854936511256

In [33]:
score_function(20000)

-8.14231771073537e-06

hm so this seems to have the same shape as when $\zeta_C=1$, except that the curve/whatever is much less steep/shallower (I guess), which presumably reflects how much more difficult it is to circle in on the correct value

## Does this really work in general?

In [34]:
get_DM_MLE(filenames[17])

0

In [35]:
npzfile = np.load('concentration_ML_estimates/simulation.018.compositional.npz')

In [36]:
npzfile['whole_sim']

array([102.41137757])

In [37]:
npzfile['medium_batches']

array([           inf, 1.00404512e+02, 2.36957313e+10,            inf,
       1.01065302e+02, 1.39049554e+02, 3.93718979e+01, 5.30393926e+01,
       8.79323611e+11, 1.96092853e+02, 1.90109334e+02, 1.19229740e+02,
       1.21026909e+02, 8.06011844e+01, 5.38672243e+01, 7.94434926e+01,
       3.25675627e+01, 1.17135019e+02, 7.68533608e+01, 6.38216894e+01,
       2.54083098e+02, 4.87343768e+01, 1.03115993e+02, 8.79596094e+01,
       5.51119041e+01, 6.25290278e+01, 1.96075000e+02, 1.29371506e+02,
       4.81058600e+02, 6.03377721e+01])

In [38]:
npzfile['small_batches']

array([22.57935542,         inf,         inf, ..., 24.78715696,
       21.06964914, 20.87074693])