In [1]:
import scipy
import numpy as np
import matplotlib
from matplotlib import pyplot
from scipy.optimize import minimize 
from scipy.optimize import least_squares

In [2]:
numgenes = 10
numtfs = 5

alpha = np.array([1.0 for i in range(numgenes)])
beta = np.array([0.0 for i in range(numgenes)])
cs = np.random.rand(numgenes, numtfs)
tfa = np.random.rand(numgenes, numtfs)

In [3]:
"""
Calculates the predicted gene expression values for the given parameters, assuming enhancers
"""
def getGeneExpression(alpha, beta, cs, tfa):
    return [beta[i] + alpha[i]*sum([tfa[i][x] / (tfa[i][x] + cs[i][x]) for x in range(len(cs[0]))]) for i in range(len(cs))]

exprs = getGeneExpression(alpha, beta, cs, tfa)
#print(exprs)

In [4]:
"""
Calculates the error in the predicted gene expression values given the true values and the parameters
"""
def getErrorFromParameters(exprs, alpha, beta, cs, tfa):
    return sum(map(lambda x: (x[1] - x[0])**2, zip(getGeneExpression(alpha, beta, cs, tfa), exprs)))

In [5]:
"""
Calculates error given predicted and actual values
"""
def getError(exprs, pred):
    return sum(map(lambda x: (x[1] - x[0])**2, zip(pred, exprs)))

cs2 = np.random.rand(numgenes, numtfs)
tfa2 = np.random.rand(numgenes, numtfs)

# print(getErrorFromParameters(exprs, alpha, beta, cs2, tfa2))

In [6]:
"""
Calculates the predicted gene expression values for the given parameters, assuming enhancers
In addition, this simulates transcription factor at index tfdel is deleted, therefore setting
the activity of that transcription factor in the tfa matrix to zero
"""
def getGeneExpressionWithDeletion(alpha: np.array, beta: np.array, cs: np.array, tfa: np.array, tfdel: int):
    updatedtfa = tfa.copy()
    newcol = np.zeros(len(tfa))
    updatedtfa[:,tfdel] = newcol
    return getGeneExpression(alpha, beta, cs, updatedtfa)

# for i in range(numtfs):
#     print(getGeneExpressionWithDeletion(alpha, beta, cs, tfa, i))

In [7]:
"""
Calculates total error from matrix of gene expressions where column i is the expression vector for
the system with transcription factor i deleted. Thus this is a numgenes * numtfs matrix
"""
def getErrorWithDeletionsFromParameters(expr_matrix, alpha, beta, cs, tfa):
    pred_matrix = np.array([getGeneExpressionWithDeletion(alpha, beta, cs, tfa, i) for i in range(len(tfa[0]))]).transpose()
    return np.square(pred_matrix - expr_matrix).sum()

In [8]:
"""
Calculates total error from matrix of gene expressions where column i is the expression vector for
the system with transcription factor i deleted. Thus this is a numgenes * numtfs matrix
"""
def getErrorWithDeletions(expr_matrix, pred_matrix):
    return np.square(pred_matrix - expr_matrix).sum()


del_exprs = np.random.rand(numgenes, numtfs)
print(getErrorWithDeletionsFromParameters(del_exprs, alpha, beta, cs, tfa))

100.98553787512503


In [9]:
csTFA = np.random.rand(numgenes, 2 * numtfs)

In [10]:
"""
Splits csTFA (numgenes x 2*numtfs) into two numgenes x numtfs matricies, so that only the 
objective function needs to be rewritten to accomodate for a singular
csTFA matrix
"""
def splitMat(csTFA):
    """
    For some reason, the minimize function turns csTFA into a
    one-dimentional array and throws an exception. Fixed this with
    try-except
    """
    try:
        cs = np.array([csTFA[i][:numtfs] for i in range(numgenes)])
        tfa = np.array([csTFA[i][numtfs:] for i in range(numgenes)])
    except:
        csTFA2 = []
        for i in range(len(csTFA)):
            if (i+1) % (2 * numtfs) == 0:
                csTFA2.append(csTFA[i-(2 * numtfs - 1):i+1])
        #print(csTFA2)
        cs = np.array([csTFA2[i][:numtfs] for i in range(numgenes)])
        tfa = np.array([csTFA2[i][numtfs:] for i in range(numgenes)])
    return cs, tfa


"""
This function and the following variable exprs2 are actually never called
(originally thoight they would be), but I'm leaving it in just in
case it could be useful in the future
"""
def GGE2(alpha, beta, csTFA):
    cs, tfa = splitMat(csTFA)
    return [beta[i] + alpha[i]*sum([tfa[i][x] / (tfa[i][x] + cs[i][x]) for x in range(len(cs[0]))]) for i in range(len(cs))]

exprs2 = GGE2(alpha, beta, csTFA)


"""
Same general functionality as original function. csTFA taken as first
parameter for minimization. This is then split into cs and tfa, and 
at this point the function (and call stack) becomes exactly the same 
as getErrorWithDeletionsFromParameters()
"""
def GEWDFP2(csTFA, expr_matrix, alpha, beta):
    cs, tfa = splitMat(csTFA)
    pred_matrix = np.array([getGeneExpressionWithDeletion(alpha, beta, cs, tfa, i) for i in range(len(tfa[0]))]).transpose()
    return np.square(pred_matrix - expr_matrix).sum()

#cs3, tfa3 = splitMat(csTFA)

In [11]:
#GEWDFP2(csTFA, del_exprs, alpha, beta)

In [12]:
minimize(GEWDFP2, csTFA, args=(del_exprs, alpha, beta))

      fun: 8.41315782329654e-13
 hess_inv: array([[ 9.27014069e-03,  3.26151367e-03,  1.83795279e-02, ...,
         1.41997588e-03, -7.54332464e-04, -5.93819288e-03],
       [ 3.26151367e-03,  1.00968763e+00,  1.36117707e-03, ...,
         7.42758069e-04,  5.45957523e-03, -5.84034257e-03],
       [ 1.83795279e-02,  1.36117707e-03,  9.45729608e-01, ...,
         3.61424903e-03,  4.33549583e-03, -2.97479507e-02],
       ...,
       [ 1.41997588e-03,  7.42758069e-04,  3.61424903e-03, ...,
         6.11685242e-02, -1.82656860e-02, -5.54114451e-02],
       [-7.54332464e-04,  5.45957523e-03,  4.33549583e-03, ...,
        -1.82656860e-02,  2.12670494e-01, -6.87315518e-02],
       [-5.93819288e-03, -5.84034257e-03, -2.97479507e-02, ...,
        -5.54114451e-02, -6.87315518e-02,  8.94932618e-01]])
      jac: array([ 2.11218942e-07, -2.10236896e-08, -2.79787893e-08,  8.04216847e-10,
       -3.07766485e-09,  6.72033142e-06,  9.28820545e-08,  9.45080348e-08,
        1.49785990e-07,  5.02315639e-08

In [13]:
#csTFA turned into 1D array to simulate minimize() behavior for testing

# csTFA2 = [0.27691325, 0.99232507, 0.46976717, 0.85174305, 0.85692299, 0.38080374,
#  0.95512648, 0.02581326, 0.35656195, 0.63976781, 0.64003303, 0.11225564,
#  0.65793692, 0.51733288, 0.69375844, 0.65627974, 0.44047224, 0.41647521,
#  0.99460043, 0.10885262, 0.70226414, 0.50358096, 0.14990523, 0.06267499,
#  0.22671739, 0.20491282, 0.08706544, 0.92807828, 0.83539689, 0.96060127,
#  0.89418744, 0.73340459, 0.15210912, 0.01798124, 0.63627997, 0.54654739,
#  0.15599326, 0.99767363, 0.58029607, 0.86386519, 0.71364172, 0.98920741,
#  0.47985258, 0.56510533, 0.09836879, 0.10551892, 0.97166377, 0.8525087,
#  0.35372028, 0.66794984, 0.89433291, 0.55443761, 0.47713519, 0.94584954,
#  0.5542143,  0.39992421, 0.80914072, 0.78323771, 0.39284604, 0.30620388,
#  0.81800698, 0.24876244, 0.69301389, 0.32659147, 0.84764601, 0.10660941,
#  0.54570602, 0.96241372, 0.35467438, 0.38459114, 0.33987296, 0.64863889,
#  0.89967746, 0.02435541, 0.16194631, 0.21925128, 0.98510521, 0.77268815,
#  0.79491996, 0.40097672, 0.87266788, 0.35881748, 0.20810176, 0.37739577,
#  0.57328298, 0.57598155, 0.61389178, 0.87038058, 0.70061459, 0.29687423,
#  0.93452916, 0.9452804,  0.94259281, 0.51779858, 0.74936858, 0.84605379,
#  0.88559085, 0.43288818, 0.2036713,  0.19473423]

#GEWDFP2(csTFA2, del_exprs, alpha, beta)
#returns same answer as GEWDFP2(csTFA,...), success

In [16]:
alphaT = np.array([1.0 for i in range(numgenes)])
betaT = np.array([0.0 for i in range(numgenes)])
csTFAT = np.random.rand(numgenes, 2 * numtfs)
csT, tfaT = splitMat(csTFAT)
exprsT = np.array([getGeneExpressionWithDeletion(alpha, beta, cs, tfa, i) for i in range(len(tfa[0]))]).transpose()
res = minimize(GEWDFP2, np.random.rand(numgenes, 2 * numtfs), args=(exprsT, alphaT, betaT)).x

print(csTFAT)
print('-------------------------------------------------------')
print(res)

[[1.70964661e-01 3.76161806e-01 7.40491502e-01 4.56280333e-01
  6.81844158e-01 3.01560343e-01 4.88038026e-01 8.98857125e-01
  6.64128013e-01 2.66224674e-01]
 [7.10805709e-01 6.41044778e-02 8.26694602e-01 3.63880663e-01
  9.86150717e-01 2.67396559e-02 2.03004101e-01 6.74115871e-01
  7.61355373e-01 6.91910088e-01]
 [9.96986411e-01 6.72491429e-01 9.19989160e-01 7.22718515e-01
  5.50623437e-01 5.28763087e-01 6.91579283e-01 6.18286640e-01
  5.31414048e-01 6.16825188e-01]
 [8.22559917e-01 9.69565244e-02 8.32855895e-01 5.14691457e-01
  5.91527453e-01 2.01536015e-01 8.62525515e-01 1.82835896e-01
  1.55443545e-01 9.25951724e-01]
 [8.66648461e-01 8.94975141e-01 6.21027565e-01 9.64620060e-04
  5.29857550e-01 3.37621214e-01 5.54344491e-02 8.70267443e-01
  3.54249029e-01 4.76081683e-01]
 [1.86025692e-01 2.20559212e-02 7.97755066e-01 9.58986461e-01
  4.04594770e-01 1.91595018e-02 9.80805696e-01 6.20937190e-01
  8.62836660e-01 5.27112617e-01]
 [4.29830554e-01 5.69524176e-01 2.68597077e-01 5.68481198e

In [24]:
#Other solvers
solvers = {
    'Nelder-Mead': None,
    'Powell': None,
    'CG': None,
    'BFGS': None,
    'Newton-CG': None,
    'L-BFGS-B': None,
    'TNC': None,
    'COBYLA': None,
    'SLSQP': None,
    'trust-const': None,
    'dogleg': None,
    'trust-ncg': None,
    'trust-exact': None,
    'trust-krylov': None
}

for solver in solvers.keys():
    try:
        solvers[solver] = minimize(GEWDFP2, np.random.rand(numgenes, 2 * numtfs), args=(exprsT, alphaT, betaT), method=solver).x
    except ValueError as ve:
        solvers[solver] = str(ve)

In [26]:
for solver in solvers.keys():
    print(f"{solver}: ")
    print(solvers[solver])
    print('-------------------------------------------------------')

Nelder-Mead: 
[ 2.64988644e-01  3.18462531e-01  3.28660217e-02  3.68855629e-01
  4.89025943e-01  5.17635892e-01  2.45710436e-01  2.66772698e-01
  9.60328150e-02  1.47364509e-01  4.38729259e-01  1.55931586e-02
  1.01029657e+00  5.97204064e-01  1.15877431e+00  2.44126154e-01
  4.84546892e-02  4.76518506e-01  1.84452976e-01  6.27036044e-01
  3.29177649e-01  4.52426225e-01  7.92388915e-01  8.60010870e-02
  2.50588591e-01  4.87078531e-02  4.81770973e-02  1.29907322e-01
  7.99119163e-01  3.55098873e-01  2.36289502e-01  1.15795164e+00
  1.41413660e-01  2.86744633e-01  6.04065075e-04  6.54104386e-01
  9.94734389e-02  1.04866189e-01  5.97645484e-02  1.80009186e+00
  1.29244058e+00  1.36341078e+00  5.66623901e-02  6.64207279e-01
  1.16686289e+00  2.77001880e-01  2.30571260e-01  1.00867081e+00
  7.60007368e-01  1.11105957e+00  1.54177379e-02  3.14712085e-01
  2.52025407e-01  3.39088464e-01  2.40674673e-02  6.96485251e-02
  6.77245585e-01  3.50729840e-01  2.63058825e-02  6.02417114e-01
  4.5504099