In [21]:
import numpy as np
import scipy as scipy
from scipy.optimize import minimize
import timeit

# Try Solving 3 Ancestry problem, multiple SNPs

In this notebook we attempt to solve the following constrained, quadratic optimization problem:

$$\min_{\pi \in \mathbb{R}^3} f(\pi)=\sum_{i=1}^{N}(a_i\pi_1+b_i\pi_2+c_i\pi_3-d_i)^2$$

$$\text{subject to:} \quad \pi_1+\pi_2+\pi_3=1 \quad \pi_1\geq 0 \quad \pi_2 \geq 0 \quad \pi_3 \geq 0,$$

where $a_i,b_i,c_i, d_i \in \mathbb{R}, i=1,\ldots N$ are quantities obtained from a genetics simulation. The $a_i$'s correspond to the observed allele frequency in ancestry 1; the $b_i$'s correspond to the observed allele frequency in ancestry 2; the $c_i$'s correspond to the observed allele frequency in ancestry 3; the $d_i$'s corespond to the observed  total allele frequency in the population. Here, $N$ is the number of SNPs.

In [22]:
# Simulate 10^6 SNPS

# Megan : this is where you will replace the a, b, and c vectors I form with random draws
#         with the data from the SNPS. They should be column vectors!

M=1000000

a = np.random.uniform(low=0, high=0.6, size=(M,1))
b = np.random.uniform(low=0.2, high=0.5, size=(M,1))
c = np.random.uniform(low=0.1, high=0.5, size=(M,1))
d = .1*a + .3*b +.6*c # By choosing the mixing proportions, this makes the "answer" pi_1=.1, pi_2=.3, pi_3=.6

In [23]:
# This is the objective function!

def function(x):
    return np.sum((a*x[0]+b*x[1]+c*x[2]-d)**2,axis=0)

In [24]:
# This is a feasible initial point since its components add to 1 and are positive.

x_t=np.array((1/3,1/3,1/3))

# Make sure function works by computing f(x_t)

print(function(x_t))

[2593.38272323]


In [25]:
# Here is the gradient of the objective function



def gradfun(x):
    return np.array((np.sum(2*a*(a*x[0]+b*x[1]+c*x[2]-d),axis=0),np.sum(2*b*(a*x[0]+b*x[1]+c*x[2]-d),axis=0),np.sum(2*c*(a*x[0]+b*x[1]+c*x[2]-d),axis=0)))

In [26]:
print(gradfun(x_t))
print(gradfun((.1,.3,.6)))

[[14968.5393493 ]
 [ 1639.15836137]
 [-6148.00369839]]
[[0.]
 [0.]
 [0.]]


## SLSQP

In [27]:
cons = ({'type': 'eq', 'fun': lambda x:  x[0] + x[1] +x[2] -1},
        {'type': 'ineq', 'fun': lambda x: x[0]},
        {'type': 'ineq', 'fun': lambda x: x[1]},
        {'type': 'ineq', 'fun': lambda x: x[2]})

bnds = ((0, None), (0, None), (0, None))

In [28]:
start = timeit.default_timer()

print(scipy.optimize.minimize(function, x_t, method='SLSQP', jac=gradfun, bounds=bnds, constraints=cons))

stop = timeit.default_timer()

print('Time: ', stop - start)

     fun: 1.496343175783729e-12
     jac: array([ 0.00023344, -0.00024889, -0.00018775])
 message: 'Optimization terminated successfully.'
    nfev: 11
     nit: 6
    njev: 6
  status: 0
 success: True
       x: array([0.10000001, 0.3       , 0.6       ])
Time:  1.9427780038786295
