In [1]:
import numpy as np
import scipy as scipy
from scipy.optimize import minimize
import timeit

# Try Solving 4 Ancestry problem, multiple SNPs

In this notebook we attempt to solve the following constrained, quadratic optimization problem:

$$\min_{\pi \in \mathbb{R}^4} f(\pi)=\sum_{i=1}^{N}(a_{1,i}\pi_1+a_{2,i}\pi_2+a_{3,i}\pi_3+a_{4,i}\pi_4-\tilde{a}_i)^2$$

$$\text{subject to:} \quad \pi_1+\pi_2+\pi_3+\pi_4=1 \quad \pi_1\geq 0 \quad \pi_2 \geq 0 \quad \pi_3 \geq 0 \quad \pi_4 \geq 0,$$

where $a_{j,i} \in \mathbb{R}$, $j=1,2,3,4$; $i=1,\ldots N$ and $\tilde{a}_i \in \mathbb{R}$, $i =1, \ldots, N$ are quantities obtained from a genetics simulation. The $a_{1,i}$'s correspond to the observed allele frequency in ancestry 1; the $a_{2,i}$'s correspond to the observed allele frequency in ancestry 2; the $a_{3,i}$'s correspond to the observed allele frequency in ancestry 3; the $a_{4,i}$'s correspond to the observed allele frequency in ancestry 4; the $\tilde{a}_i$'s corespond to the observed  total allele frequency in the population. Here, $N$ is the number of SNPs.

In [2]:
# Simulate 10^6 SNPS


M=1000000

a_1 = np.random.uniform(low=0, high=0.6, size=(M,1))
a_2 = np.random.uniform(low=0.2, high=0.5, size=(M,1))
a_3 = np.random.uniform(low=0.1, high=0.5, size=(M,1))
a_4 = np.random.uniform(low=0, high=0.5, size=(M,1))
a_t = .1*a_1 + .2*a_2 +.7*a_3+0*a_4 # By choosing the mixing proportions, this makes the "answer" pi_1=.1, pi_2=.2, pi_3=.3, pi_4=.4

In [3]:
# This is the objective function!

def function(x):
    return np.sum((a_1*x[0]+a_2*x[1]+a_3*x[2]+a_4*x[3]-a_t)**2,axis=0)

In [4]:
# This is a feasible initial point since its components add to 1 and are positive.

x_t=np.array((1/4,1/4,1/4,1/4))

# Make sure function works by computing f(x_t)

print(function(x_t))

[4799.17608692]


In [5]:
# Here is the gradient of the objective function



def gradfun(x):
    return np.array((np.sum(2*a_1*(a_1*x[0]+a_2*x[1]+a_3*x[2]+a_4*x[3]-a_t),axis=0),
                     np.sum(2*a_2*(a_1*x[0]+a_2*x[1]+a_3*x[2]+a_4*x[3]-a_t),axis=0),
                     np.sum(2*a_3*(a_1*x[0]+a_2*x[1]+a_3*x[2]+a_4*x[3]-a_t),axis=0),
                     np.sum(2*a_4*(a_1*x[0]+a_2*x[1]+a_3*x[2]+a_4*x[3]-a_t),axis=0)))

In [7]:
print(gradfun(x_t))
print(gradfun((.1,.2,.7,0)))

[[  3014.06720177]
 [ -6264.53388109]
 [-18002.94914112]
 [  5432.56669653]]
[[0.]
 [0.]
 [0.]
 [0.]]


## SLSQP

In [8]:
cons = ({'type': 'eq', 'fun': lambda x:  x[0] + x[1] + x[2] + x[3] -1},
        {'type': 'ineq', 'fun': lambda x: x[0]},
        {'type': 'ineq', 'fun': lambda x: x[1]},
        {'type': 'ineq', 'fun': lambda x: x[2]},
        {'type': 'ineq', 'fun': lambda x: x[3]})

bnds = ((0, None), (0, None), (0, None), (0, None))

In [9]:
start = timeit.default_timer()

print(scipy.optimize.minimize(function, x_t, method='SLSQP', jac=gradfun, bounds=bnds, constraints=cons))

stop = timeit.default_timer()

print('Time: ', stop - start)

     fun: 4.633992294367328e-08
     jac: array([-0.05692527,  0.01636823,  0.03068833,  0.00805313])
 message: 'Optimization terminated successfully.'
    nfev: 15
     nit: 10
    njev: 10
  status: 0
 success: True
       x: array([0.09999889, 0.20000033, 0.70000078, 0.        ])
Time:  4.024079567888673
