In [5]:
import numpy as np
import scipy as scipy
from scipy.optimize import minimize
import timeit

# Try Solving Many Ancestries problem, multiple SNPs

In this notebook we attempt to solve the following constrained, quadratic optimization problem:

$$\min_{\pi \in \mathbb{R}^k} f(\pi)=\sum_{i=1}^{N}\left(\sum_{j=1}^k a_{j,i}\pi_j-\tilde{a}_i\right)^2$$

$$\text{subject to:} \sum_{j=1}^k \pi_k=1 \quad \pi_j \geq 0, j=1,\ldots,k,$$

where $a_{j,i} \in \mathbb{R}$, $j=1,\ldots, k$; $i=1,\ldots N$ and $\tilde{a}_i \in \mathbb{R}$, $i =1, \ldots, N$ are quantities obtained from a genetics simulation. The $a_{j,i}$'s correspond to the observed allele frequency in ancestry $j$ at SNP $i$. There are $k$ ancestries and $N$ SNPs.

In [197]:
N=70000 # number of SNPs
k=10 # number of ancestries

A=np.array(np.random.uniform(low=0, high=1, size=(N,1))) # initialize an array for experimental draws

for i in range(1,k):
    A=np.hstack((A,np.random.uniform(low=0, high=1, size=(N,1))))

# Time for some abbreivations!
a_1=A[:,0:1]
a_2=A[:,1:2]
a_3=A[:,2:3]
a_4=A[:,3:4]
a_5=A[:,4:5]
a_6=A[:,5:6]
a_7=A[:,6:7]
a_8=A[:,7:8]
a_9=A[:,8:9]
a_10=A[:,9:10]

# And form an answer, taf = total allele frequency

taf=0.1*a_1+0.15*a_2+0.2*a_3+0.25*a_4+0.2*a_5+0.05*a_6+0.02*a_7 +0.01*a_8 +0.005*a_9 +0.005*a_10

print(np.shape(A),np.shape(taf))

(70000, 10) (70000, 1)


In [198]:
# This is the objective function!

def function(x):
    return np.sum((x[0]*a_1+x[1]*a_2+x[2]*a_3+x[3]*a_4+x[4]*a_5
                   +x[5]*a_6+x[6]*a_7+x[7]*a_8+x[8]*a_9+x[9]*a_10-taf)**2,axis=0)[0]

In [199]:
# This is a feasible initial point since its components add to 1 and are positive.

x_t=(1/k)*np.ones((k,1))
print('check shape of x_t:', np.shape(x_t), np.sum(x_t,axis=0))

# Make sure function works by computing f(x_t)

print('our initial value is', np.transpose(x_t)) # transpose for readability only
print('which has function value', function(x_t))

check shape of x_t: (10, 1) [1.]
our initial value is [[0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1]]
which has function value 464.85034328349195


In [200]:
# Here is the gradient of the objective function

def gradfun(x):
    return np.array((np.sum(2*a_1*(x[0]*a_1+x[1]*a_2+x[2]*a_3+x[3]*a_4+x[4]*a_5 +x[5]*a_6+x[6]*a_7+x[7]*a_8+x[8]*a_9+x[9]*a_10-taf),axis=0),
                     np.sum(2*a_2*(x[0]*a_1+x[1]*a_2+x[2]*a_3+x[3]*a_4+x[4]*a_5 +x[5]*a_6+x[6]*a_7+x[7]*a_8+x[8]*a_9+x[9]*a_10-taf),axis=0),
                     np.sum(2*a_3*(x[0]*a_1+x[1]*a_2+x[2]*a_3+x[3]*a_4+x[4]*a_5 +x[5]*a_6+x[6]*a_7+x[7]*a_8+x[8]*a_9+x[9]*a_10-taf),axis=0),
                     np.sum(2*a_4*(x[0]*a_1+x[1]*a_2+x[2]*a_3+x[3]*a_4+x[4]*a_5 +x[5]*a_6+x[6]*a_7+x[7]*a_8+x[8]*a_9+x[9]*a_10-taf),axis=0),
                     np.sum(2*a_5*(x[0]*a_1+x[1]*a_2+x[2]*a_3+x[3]*a_4+x[4]*a_5 +x[5]*a_6+x[6]*a_7+x[7]*a_8+x[8]*a_9+x[9]*a_10-taf),axis=0),
                     np.sum(2*a_6*(x[0]*a_1+x[1]*a_2+x[2]*a_3+x[3]*a_4+x[4]*a_5 +x[5]*a_6+x[6]*a_7+x[7]*a_8+x[8]*a_9+x[9]*a_10-taf),axis=0),
                     np.sum(2*a_7*(x[0]*a_1+x[1]*a_2+x[2]*a_3+x[3]*a_4+x[4]*a_5 +x[5]*a_6+x[6]*a_7+x[7]*a_8+x[8]*a_9+x[9]*a_10-taf),axis=0),
                     np.sum(2*a_8*(x[0]*a_1+x[1]*a_2+x[2]*a_3+x[3]*a_4+x[4]*a_5 +x[5]*a_6+x[6]*a_7+x[7]*a_8+x[8]*a_9+x[9]*a_10-taf),axis=0),
                     np.sum(2*a_9*(x[0]*a_1+x[1]*a_2+x[2]*a_3+x[3]*a_4+x[4]*a_5 +x[5]*a_6+x[6]*a_7+x[7]*a_8+x[8]*a_9+x[9]*a_10-taf),axis=0),
                     np.sum(2*a_10*(x[0]*a_1+x[1]*a_2+x[2]*a_3+x[3]*a_4+x[4]*a_5 +x[5]*a_6+x[6]*a_7+x[7]*a_8+x[8]*a_9+x[9]*a_10-taf),axis=0)))

In [201]:
s=gradfun(x_t)
print(s, np.shape(s))

[[  307.03790492]
 [ -263.5835205 ]
 [ -840.6678123 ]
 [-1425.11238433]
 [ -855.28445328]
 [  886.57821031]
 [ 1208.79671311]
 [ 1358.28337183]
 [ 1415.05106922]
 [ 1425.8044603 ]] (10, 1)


## SLSQP

In [202]:
cons = ({'type': 'eq', 'fun': lambda x:  np.sum(x,axis=0) -1},)

for i in range(0,k-1):
    cons = cons + ({'type': 'ineq', 'fun': lambda x: x[i]},)

bnds = ((0, None),)

for i in range(0,k-1):
    bnds = bnds + ((0, None),)

In [203]:
start = timeit.default_timer()

print(scipy.optimize.minimize(function, x_t, method='SLSQP', jac=gradfun, bounds=bnds, constraints=cons))

stop = timeit.default_timer()

print('Time: ', stop - start)

     fun: 1.810056003935431
     jac: array([362.00001387, 362.01905584, 361.99823059, 362.02335113,
       362.01330128, 362.00914959, 362.03136237, 362.01496582,
       361.99470427, 362.01138293])
 message: 'Optimization terminated successfully.'
    nfev: 50
     nit: 23
    njev: 23
  status: 0
 success: True
       x: array([0.10106258, 0.15100534, 0.20105102, 0.25081354, 0.20094397,
       0.05110119, 0.02104282, 0.01097375, 0.00602187, 0.00598391])
Time:  2.3339030428733167
