In [None]:
# Code implemented from : https://github.com/tansey/deep-dose-response/tree/master

"""Dose-Response Modeling in High-Throughput Cancer Drug Screenings: An end-to-end approach
W. Tansey, K. Li, H. Zhang, S. W. Linderman, D. M. Blei, R. Rabadan, and C. H. Wiggins
Preprint, December 2018. https://arxiv.org/abs/1812.05691"""

In [None]:
import os
import sys
import random
import numpy as np
import pandas as pd
import pickle as pkl
import matplotlib.pyplot as plt
import math
import seaborn as sns

from scipy.special import gammaln, gammaincc
from scipy.stats import gamma, poisson
from scipy.optimize import minimize
from scipy.stats import invwishart, poisson, multivariate_normal as mvn

In [None]:
raw_data = pd.read_csv("/content/GDSC2_public_raw_data_27Oct23.csv")
pos_ctrl_priors = np.load("/content/pos-ctrl-priors.npz")
neg_ctrl_priors = np.load("/content/neg-ctrl-priors.npz")

In [None]:
def ilogit(x):
    print("Input to ilogit:", x, "Type:", type(x))  # Debugging line
    return 1. / (1 + np.exp(-x))

"""def monotone_rejection_sampler(m, Sigma):
    beta = np.random.multivariate_normal(m, Sigma)
    while np.any(beta[:-1] > beta[1:]):
        beta = np.random.multivariate_normal(m, Sigma)
    return beta"""

def monotone_rejection_sampler(m, Sigma, index=None):
    attempts = 0
    beta = np.random.multivariate_normal(m, Sigma)
    while np.any(beta[:-1] > beta[1:]):
        beta = np.random.multivariate_normal(m, Sigma)
        attempts += 1
    print(f"Row {index}: Completed after {attempts + 1} attempts")
    return beta

In [None]:
from logging import raiseExceptions
def elliptical_slice(xx, prior, log_like_fn, cur_log_like = None, angle_range = 0, ll_args = None, mu = None):
  xx = np.copy(xx)
  D = np.size(xx)

  if np.size(prior) == D:
    nu = np.reshape(prior, (D,))
  else:
    if np.shape(prior)!=(D,D):
      raise Exception("Prior must be given by a D-element sample")
    nu = np.reshape(np.dot(prior, np.random.randn(D,1)).T, np.shape(xx))
  if mu is None:
    mu = np.zeros(D)
  elif np.size(mu)!=D:
    raise Exception("Speicifed mean does not have the correct shape")

  if (cur_log_like is None):
    cur_log_like = log_like_fn(xx, ll_args)

  init_ll = cur_log_like
  hh = np.log(np.random.rand()) + cur_log_like

  #set up the bracket of angles and pick first proposal
  # phi = theta' - theta is a change in angle
  if angle_range <= 0:
    phi = np.random.rand() * 2 * math.pi
    phi_min = phi - 2*math.pi
    phi_max = phi
  else:
    phi_min = -1 * angle_range * np.random.rand()
    phi_max = phi_min + angle_range
    phi = np.random.rand() * (phi_max - phi_min) + phi_min

  check=0
  while True:
    #compute xx for proposed angle difference and check if it's on the slice
    #Add the offset mu before computing the likelihood
    xx_prop = ((xx-mu)*np.cos(phi)) + nu*np.sin(phi) + mu
    cur_log_like = log_like_fn(xx_prop, ll_args)
    if check%20==0:
      print(f"Proposed beta: {xx_prop}, log likelihood: {cur_log_like}")

    if cur_log_like >= hh:
      print(f"Accepted new beta")
      break

    #shrink slice to rejected point
    if phi>0:
      phi_max = phi
    elif phi < 0:
      phi_min = phi
    else:
      import warnings
      assert np.allclose(xx, xx_prop)
      warnings.warn("Shrug to current position and stil rejected")
      break

    phi = np.random.rand()*(phi_max - phi_min) + phi_min
    check+=1

  return xx_prop, cur_log_like

In [None]:
def posterior_ess(Y, M, Sigma, A, B, C,
                  Beta = None,
                  lam_gridsize = 100,
                  nburn = 1000,
                  nsamples = 1000,
                  nthin = 1,
                  nthreads = 1,
                  print_freq = 100):
  Present = Y>=0
  print("Shape of Y:", Y.shape)
  print("Shape of M:", M.shape)
  print("Shape of A:", A.shape)
  print("Shape of B:", B.shape)
  print("Shape of C:", C.shape)
  if Beta is None:
    # Initialize beta to the approximate MLE where data is not missing
    # prior where data is missing
    #Beta = np.copy(M)*(1-Present) + Present*((Y - C[:,None]) / A[:,None]*B[:,None]).clip(1e-6,1e-6)
    Beta = M * (1 - Present) + Present * ((Y - C[:, None]) / (A[:, None] * B[:, None])).clip(1e-6, 1e-6)
    print("Shape of initialized Beta:", Beta.shape)
    Lam_grid, Lam_weights = [],[]

    for a,b,c in zip(A,B,C):
      grid = np.linspace(gamma.ppf(1e-3, a, scale = b), gamma.ppf(1-1e-3, a, scale=b), lam_gridsize)[np.newaxis,:]
      weights = gamma.pdf(grid, a, scale = b)
      weights /= weights.sum()
      Lam_grid.append(grid)
      Lam_weights.append(weights)

    Lam_grid = np.array(Lam_grid)
    Lam_weights = np.array(Lam_weights)


    Cur_log_likelihood = np.zeros(M.shape[0])
    chol = np.linalg.cholesky(Sigma)
    Beta_samples = np.zeros((nsamples, Beta.shape[0], Beta.shape[1]))
    Loglikelihood_samples = np.zeros(nsamples)


    #create a log_likelihood function

    def log_likelihood_fn(proposal_beta,idx):
      if np.any(proposal_beta[:-1] > proposal_beta[1:]):
        print("Rejected due to monotonicity constraint")
        return -np.inf
      present = Present[idx]
      y = Y[idx][present][:,np.newaxis]
      tau = ilogit(proposal_beta)[present][:,np.newaxis]
      grid = Lam_grid[idx]
      weights = Lam_weights[idx]
      c = C[idx]
      print(f"y: {y}, tau: {tau}, grid: {grid}, weights: {weights}, c: {c}")
      likelihood_values = poisson.pmf(y, grid * tau + c) * weights
      if np.any(likelihood_values < 1e-10):
          print("Likelihood values very small or zero")
      return np.log(likelihood_values.clip(1e-10,np.inf).sum(axis=1)).sum()

    for step in range(nburn + nsamples*nthin):
      if print_freq and step%print_freq==0:
        if step>0:
          sys.stdout.write("\033[F")
        print('MCMC step{}'.format(step))

      for idx, beta in enumerate (Beta):
        cur_ll = None if step == 0 else Cur_log_likelihood[idx]
        print("idx: ", idx, "beta: ", beta)
        print("Shape of Beta[idx]:", Beta[idx].shape)
        print("Shape of chol:", chol.shape)
        print("Shape of mu:", M[idx].shape)
        Beta[idx], Cur_log_likelihood[idx] = elliptical_slice(beta, chol,
                                                              log_likelihood_fn,
                                                              cur_log_like = cur_ll,
                                                              ll_args = idx,
                                                              mu = M[idx])
        if step < nburn or ((step - nburn)%nthin)!=0:
          continue

        sample_idx = (step-nburn)//nthin
        Beta_samples[sample_idx] = Beta
        Loglikelihood_samples[sample_idx] = Cur_log_likelihood.sum()


      return Beta_samples, Loglikelihood_samples


In [None]:
unique_barcodes = raw_data['BARCODE'].unique()

M = np.full((len(unique_barcodes), 7), np.nan)
Y = np.full((len(unique_barcodes), 7), np.nan)

for i, barcode in enumerate(unique_barcodes):
    barcode_data = raw_data[raw_data['BARCODE'] == barcode]

    # Iterate over each dose level (1 to 7)
    for dose_level in range(1, 8):
        # Build the TAG pattern for the current dose level
        tag_pattern = f'D{dose_level}-S'

        dose_data = barcode_data[barcode_data['TAG'].str.contains(tag_pattern, na=False)]

        if not dose_data.empty:
            M[i, dose_level - 1] = dose_data['CONC'].values[0]  # Concentration
            Y[i, dose_level - 1] = dose_data['INTENSITY'].values[0]  # Intensity


In [None]:
M_df = pd.DataFrame(M, index=unique_barcodes, columns=[f'Dose_{i}' for i in range(1, 8)])
Y_df = pd.DataFrame(Y, index=unique_barcodes, columns=[f'Dose_{i}' for i in range(1, 8)])

M_df.index.name = 'BARCODE'
Y_df.index.name = 'BARCODE'

#specific_barcode = unique_barcodes[0]
#M_data_for_barcode = M_df.loc[specific_barcode]
#Y_data_for_barcode = Y_df.loc[specific_barcode]

In [None]:
A = pos_ctrl_priors["a"]
B = pos_ctrl_priors["b"]
C = neg_ctrl_priors["c"]

In [None]:
# subset 100 barcodes to check the code
selected_barcodes = np.random.choice(unique_barcodes, 100, replace=False)
A_full = pd.DataFrame({'BARCODE': pos_ctrl_priors['barcodes'], 'A': pos_ctrl_priors['a']}).set_index('BARCODE')
B_full = pd.DataFrame({'BARCODE': pos_ctrl_priors['barcodes'], 'B': pos_ctrl_priors['b']}).set_index('BARCODE')
C_full = pd.DataFrame({'BARCODE': neg_ctrl_priors['barcodes'], 'C': neg_ctrl_priors['c']}).set_index('BARCODE')

A_subset = A_full.loc[selected_barcodes]
B_subset = B_full.loc[selected_barcodes]
C_subset = C_full.loc[selected_barcodes]

M_subset = M_df.loc[selected_barcodes]
M_subset.applymap(lambda x: round(x, 4))
Y_subset = Y_df.loc[selected_barcodes]


  M_subset.applymap(lambda x: round(x, 4))


In [None]:
N = M_subset.shape[0]
ndoses = 7


In [None]:
n_pos_ctrl = 40
bandwidth, kernel_scale, noise_var = 1., 2., 0.05

In [None]:
Sigma = np.array([kernel_scale*(np.exp(-0.5*(i - np.arange(ndoses))**2 / bandwidth**2)) for i in np.arange(ndoses)]) + noise_var*np.eye(ndoses) # squared exponential kernel

In [None]:
A_subset = A_subset.apply(pd.to_numeric, errors='coerce').fillna(0)
B_subset = B_subset.muapply(pd.to_numeric, errors='coerce').fillna(0)
A_subset = A_subset.to_numpy().flatten()
B_subset = B_subset.to_numpy().flatten()

In [150]:
#Beta = np.array([monotone_rejection_sampler(m, Sigma) for m in M_subset])
#Beta = np.array([monotone_rejection_sampler(m.to_numpy(), Sigma) for _, m in M_subset.iterrows()])
Beta = np.array([monotone_rejection_sampler(m.to_numpy(), Sigma, index=i) for i, (_, m) in enumerate(M_subset.iterrows())])


Row 0: Completed after 271 attempts
Row 1: Completed after 6355 attempts
Row 2: Completed after 5184 attempts
Row 3: Completed after 145 attempts
Row 4: Completed after 3027 attempts
Row 5: Completed after 2136 attempts
Row 6: Completed after 9538 attempts
Row 7: Completed after 89521 attempts
Row 8: Completed after 1448 attempts
Row 9: Completed after 4122 attempts
Row 10: Completed after 4036 attempts


KeyboardInterrupt: 

In [None]:
Tau = ilogit(Beta)
Lam_y = np.array([np.random.gamma(a, b, size=ndoses) for a, b in zip(A_subset, B_subset)])
Lam_r = np.array([np.random.gamma(a, b, size=n_pos_ctrl) for a, b in zip(A_subset, B_subset)])
R = np.random.poisson(Lam_r + np.array(C_subset).flatten()[:,np.newaxis])

In [None]:
colors = ['blue', 'orange', 'green']
[plt.plot(t, color=color) for t,color in zip(Tau, colors)]
[plt.scatter(np.arange(M_subset.shape[1])[y >= 0], ((y[y >= 0] - c) / (r.mean() - c)).clip(0,1), color=color) for y, r, c, color in zip(Y_subset, R, C_subset, colors)]
plt.show()
plt.close()

In [None]:
M_subset

In [None]:
Beta_hat = posterior_ess(np.array(Y_subset), np.array(M_subset), Sigma, np.array(A_subset), np.array(B_subset), np.array(C_subset).flatten())

Shape of Y: (10, 7)
Shape of M: (10, 7)
Shape of A: (10,)
Shape of B: (10,)
Shape of C: (10,)
Shape of initialized Beta: (10, 7)
MCMC step0
idx:  0 beta:  [1.e-06 1.e-06 1.e-06 1.e-06 1.e-06 1.e-06 1.e-06]
Shape of Beta[idx]: (7,)
Shape of chol: (7, 7)
Shape of mu: (7,)
Input to ilogit: [1.e-06 1.e-06 1.e-06 1.e-06 1.e-06 1.e-06 1.e-06] Type: <class 'numpy.ndarray'>
y: [[ 6020.]
 [46830.]
 [62031.]
 [58302.]
 [59162.]
 [76565.]
 [63847.]], tau: [[0.50000025]
 [0.50000025]
 [0.50000025]
 [0.50000025]
 [0.50000025]
 [0.50000025]
 [0.50000025]], grid: [[36091.45369723 36638.83204536 37186.2103935  37733.58874163
  38280.96708977 38828.34543791 39375.72378604 39923.10213418
  40470.48048231 41017.85883045 41565.23717858 42112.61552672
  42659.99387486 43207.37222299 43754.75057113 44302.12891926
  44849.5072674  45396.88561554 45944.26396367 46491.64231181
  47039.02065994 47586.39900808 48133.77735621 48681.15570435
  49228.53405249 49775.91240062 50323.29074876 50870.66909689
  51418.047



[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Rejected due to monotonicity constraint
Rejected due to monotonicity constraint
Proposed beta: [1.e-06 1.e-06 1.e-06 1.e-06 1.e-06 1.e-06 1.e-06], log likelihood: -inf
Rejected due to monotonicity constraint
Rejected due to monotonicity constraint
Rejected due to monotonicity constraint
Rejected due to monotonicity constraint
Rejected due to monotonicity constraint
Rejected due to monotonicity constraint
Rejected due to monotonicity constraint
Rejected due to monotonicity constraint
Rejected due to monotonicity constraint
Rejected due to monotonicity constraint
Rejected due to monotonicity constraint
Rejected due to monotonicity constraint
Rejected due to monotonicity constraint
Rejected due to monotonicity constraint
Rejected due to monotonicity constraint
Rejected due to monotonicity constraint
Rejected due to monotonicity constraint
Rejected due to monotonicity constraint
Rejected due to monotonicity constraint
Rejecte

In [None]:
Tau_hat = ilogit(Beta_hat)

Input to ilogit: (array([[[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]],

       [[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]],

       [[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]],

       ...,

       [[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0

TypeError: bad operand type for unary -: 'tuple'

In [None]:


Beta_hat2 = posterior_ess(np.array(Y_subset).flatten(), np.array(M_subset).flatten(), Sigma, np.array(A_subset).flatten(), np.array(B_subset).flatten(), np.array(C_subset).flatten())
Tau_hat2 = ilogit(Beta_hat2)

Beta_hat3 = posterior_ess(np.array(Y_subset).flatten(), np.array(M_subset).flatten(), Sigma, np.array(A_subset).flatten(), np.array(B_subset).flatten(), np.array(C_subset).flatten())
Tau_hat3 = ilogit(Beta_hat3)

with sns.axes_style('white', {'legend.frameon': True}):
    plt.rc('font', weight='bold')
    plt.rc('grid', lw=3)
    plt.rc('lines', lw=2)
    plt.rc('axes', lw=2)

    colors = ['blue', 'orange', 'green']
    fig, axarr = plt.subplots(1,3, sharex=True, sharey=True)
    for ax, y, t, t_hat, t_hat2, t_hat3, t_lower, t_upper, r, c, color in zip(axarr, Y_subset, Tau,
                                                        Tau_hat.mean(axis=0),
                                                        Tau_hat2.mean(axis=0),
                                                        Tau_hat3.mean(axis=0),
                                                        np.percentile(Tau_hat, 5, axis=0),
                                                        np.percentile(Tau_hat, 95, axis=0),
                                                        R, C,
                                                        colors):
        ax.scatter(np.arange(M_subset.shape[1])[y >= 0], ((y[y >= 0] - c) / (r.mean() - c)).clip(0,1), color=color)
        ax.plot(np.arange(M_subset.shape[1]), t, color=color, lw=3, ls='--')
        ax.plot(np.arange(M_subset.shape[1]), t_hat, color=color, lw=3)
        ax.plot(np.arange(M_subset.shape[1]), t_hat2, color=color, lw=3)
        ax.plot(np.arange(M_subset.shape[1]), t_hat3, color=color, lw=3)
        ax.fill_between(np.arange(M_subset.shape[1]), t_lower, t_upper, color=color, alpha=0.5)
        ax.set_xlabel('Dosage level', fontsize=18, weight='bold')
        ax.set_ylabel('Survival percentage', fontsize=18, weight='bold')
plt.show()


InvalidIndexError: (slice(None, None, None), None)