In [None]:
import numpy as np
import scipy.integrate as integrate
import os
import pandas

import random
import scipy.stats

import seaborn as sns
from tqdm import tqdm_notebook as tqdm
import matplotlib. pyplot as plt

import seaborn as sns

import matplotlib as mpl

# Introduction

In the main text (see **SUPLEMENTARY INFORMATION, E**), we show that the distribution of the time $t$ to see one among $N$ CTLs hitting the spheroid is exponentially distributed. Working backwards from there, we can infer the hitting rate $\lambda_{in}$ and the leaving rate  $\lambda_{out}$.

# Global arrival and leaving statistics

## Getting the arrival and leaving probabilities

In [None]:
def get_statistics_single_well(single_well_frame):
    
    '''
    
    Prepare the statistics table for a single well.

    
    '''
        
    single_well_frame['dN'] = single_well_frame['N contact'].shift(-1) - single_well_frame['N contact']
    single_well_frame['dt'] = single_well_frame['time'].shift(-1) - single_well_frame['time']

    return single_well_frame

def running_mean(x, N):
    cumsum = np.cumsum(np.insert(x.values, 0, 0)) 
    return ((cumsum[N:] - cumsum[:-N]) / float(N)).astype(int)

def get_statistics(data_frame):
    
    '''
    
    Prepare the statistics table.

    
    '''
    
    for ID in data_frame['ID'].unique():
        
        loc_frame = data_frame[data_frame['ID'] == ID]
        stat_frame = get_statistics_single_well(loc_frame)
        data_frame.loc[data_frame['ID'] == ID, 'dN'] = stat_frame['dN']
        data_frame.loc[data_frame['ID'] == ID, 'dt'] = stat_frame['dt']
        data_frame.loc[data_frame['ID'] == ID, 'N'] = stat_frame['N'].max()
        data_frame.loc[data_frame['ID'] == ID, 'N gel'] = stat_frame['N'].max() - loc_frame['N contact']
        
    return data_frame[['ID', 'frame', 'time', 'N', 'N contact', 'N gel', 'dN', 'dt']]



In [None]:
def get_probability_arrival(AnalysisFrame):
    
    '''
    
    Calculate the probability of hitting the spheroid as a function
    of the number of CTLs in the gel.
    
    Enter:
     - AnalysisFrame: pandas.Dataframe
     
     Returns:
      - properties: pandas.DataFrame object where each line corresponds
        to a given number of CTLs.

    
    '''
    
    stats_frame = get_statistics(AnalysisFrame)
    stats_frame = stats_frame.dropna()

    stats_frame['ID_time'] = stats_frame['ID'] + ' : ' + stats_frame['frame'].astype(int).astype(str)

    stats_frame_arrival = stats_frame.copy()
    stats_frame_arrival.loc[stats_frame['dN'] < 0, 'dN'] = 0
    
    probs_arrival = stats_frame_arrival.pivot(index = 'ID_time', columns = 'N gel', values = 'dN').mean(axis = 0)
    counts_arrival = stats_frame_arrival.pivot(index = 'ID_time', columns = 'N gel', values = 'dN').count(axis = 0)

    properties = pandas.concat([probs_arrival, counts_arrival], axis = 1)
    properties.columns = ['probability', 'counts']
    
    return properties

def get_lambda_arrival(prob_arrive, 
                       minN = 0, 
                       maxN = 20):
    
    '''
    
    From the probability to hit the spheroid infer lambda_arrival.
    
    Enter:
     - prob_arrive: pandas.Dataframe
     
     Returns:
      - lambda_in:float

    
    '''
    
    prob_arrive = prob_arrive[minN:maxN]
    
    value_list = [value for value in (-np.log(1 - prob_arrive['probability'])/prob_arrive.index).values if ~np.isnan(value)]
    bool_list = [~np.isnan(value) for value in (-np.log(1 - prob_arrive['probability'])/prob_arrive.index).values]
    
    # we require a minimum number of counts to assure the statistical
    # validity of the estimated figure.
    
    if np.sum(prob_arrive['counts'].values[bool_list]) > 25:
    
        return np.average(value_list, 
                      weights = prob_arrive['counts'].values[bool_list])
    
    else:
    
        return np.nan


In [None]:

def get_probability_leave(AnalysisFrame):
    
    '''
    
    Calculate the probability of hitting the spheroid as a function
    of the number of CTLs on the spheroid.
    
    Enter:
     - AnalysisFrame: pandas.Dataframe
     
     Returns:
      - properties: pandas.DataFrame object where each line corresponds
        to a given number of CTLs.

    
    '''
    
    stats_frame = get_statistics(AnalysisFrame)
    stats_frame = stats_frame.dropna()

    stats_frame['ID_time'] = stats_frame['ID'] + ' : ' + stats_frame['frame'].astype(int).astype(str)

    stats_frame_leave = stats_frame
    stats_frame_leave.loc[stats_frame['dN'] > 0, 'dN'] = 0

    probs_leave = np.abs(stats_frame_leave.pivot(index = 'ID_time', columns = 'N contact', values = 'dN').mean(axis = 0))
    counts_leave = stats_frame_leave.pivot(index = 'ID_time', columns = 'N contact', values = 'dN').count(axis = 0)

    properties = pandas.concat([probs_leave, counts_leave], axis = 1)
    properties.columns = ['probability', 'counts']
                
    return properties

def get_lambda_leave(prob_leave, minN = 0, maxN = 20):
    
    prob_leave = prob_leave[minN:maxN]
    prob_leave = prob_leave[prob_leave['probability'] < 1]
    prob_leave = prob_leave[prob_leave.index > 0]
    
    if prob_leave.empty:
        
        return np.nan
    
    else:
                
        return np.average((-np.log(1 - prob_leave['probability'])/prob_leave.index).values, 
                      weights = prob_leave['counts'].values)


## Bootstrapping

This part of the script combines the different elements from above and conducts the bootstrapping procedure.

The bootstrapping procedure works by choosing $N_{samples}$ among the total number of experiments (with discount REMISE), we estimate the lambdas, and we repeat the procedure $N_{tirages}$ times.

In [None]:
import math

def bootstrap_scheme(AnalysisFrame, N_samples, N_tirages):
    
    bootstrap_data = pandas.DataFrame()
    j = 0
    k = 0
    
    for i in range(N_tirages):            
    
        experiments = np.random.choice(list(AnalysisFrame['ID'].unique()), N_samples, replace = True)
                
        loc_analysis = pandas.DataFrame()
        
        for ID in experiments:
            loc_frame = AnalysisFrame[AnalysisFrame['ID'] == ID]
            loc_frame['ID'] = loc_frame['ID'] + ' : ' + str(j)
            j += 1
            loc_analysis = loc_analysis.append(loc_frame)
                        
        probabilities = get_probability_arrival(loc_analysis)
        bootstrap_data.loc[j, 'lambda_arrival'] = get_lambda_arrival(probabilities)
        
        if math.isnan(get_lambda_arrival(probabilities)):
            
            print(probabilities)
            
            print((-np.log(1 - probabilities['probability'])/probabilities.index).values)
        
        probabilities = get_probability_leave(loc_analysis)
        
        if probabilities.empty:
            
            bootstrap_data.loc[j, 'lambda_leave'] = np.nan
            j += 1
            
        else:
            
            bootstrap_data.loc[j, 'lambda_leave'] = get_lambda_leave(probabilities)
            j += 1
        
    return bootstrap_data
    
def get_max_N(AnalysisFrame):
    
    for ID in tqdm(AnalysisFrame['ID'].unique()):
        
        AnalysisFrame.loc[AnalysisFrame['ID'] == ID, 'N'] = AnalysisFrame.loc[AnalysisFrame['ID'] == ID, 'N'].max()
        
    return AnalysisFrame

From the experimental dataframe containing the accumulation statistics `AnalysisFrame` we use the `bootstrap_scheme` function to get statistics on the hitting and leaving rates.

In [None]:
bs_analysis = bootstrap_scheme(AnalysisFrame, int(0.7*len(AnalysisFrame['ID'].unique())), 50)

## Measuring the accumulation acceleration

We monitor here the differences in T-cell accumulation, but the model doesn't take into account the probability of coming into contact as a function of the number of T-cells already on the target. We can access this information with the following functions:

In [None]:
def get_lambda(prob, minN = 0, maxN = 20):
        
    prob = prob[minN:maxN]
    
    value_list = [value for value in (-np.log(1 - prob['probability'])/prob.index).values if ~np.isnan(value)]
    bool_list = [~np.isnan(value) for value in (-np.log(1 - prob['probability'])/prob.index).values]
    
    # We only keep the statistics if there are enough unique data points
    if np.sum(prob['counts'].values[bool_list]) > 100:
    
        return np.average(value_list, 
                      weights = prob['counts'].values[bool_list])
    
    else:
    
        return np.nan


def get_statistics(data_frame):
    
    for ID in data_frame['ID'].unique():
        
        loc_frame = data_frame[data_frame['ID'] == ID]
        
        stat_frame = get_statistics_single_well(loc_frame)
        
        data_frame.loc[data_frame['ID'] == ID, 'dN'] = stat_frame['dN']
        data_frame.loc[data_frame['ID'] == ID, 'dt'] = stat_frame['dt']
        data_frame.loc[data_frame['ID'] == ID, 'N'] = stat_frame['N'].max()
        data_frame.loc[data_frame['ID'] == ID, 'N gel'] = stat_frame['N'].max() - loc_frame['N contact']
            
    return data_frame[['ID', 'frame', 'time', 'N', 'N contact', 'N gel', 'dN', 'dt']]

def get_probability_arrival_contact(AnalysisFrame, N_contact):
    
    stats_frame = get_statistics(AnalysisFrame)
    stats_frame = stats_frame[stats_frame['N contact'] == N_contact]
    stats_frame = stats_frame.dropna()

    stats_frame['ID_time'] = stats_frame['ID'] + ' : ' + stats_frame['frame'].astype(int).astype(str)

    stats_frame_arrival = stats_frame
    stats_frame_arrival.loc[stats_frame['dN'] < 0, 'dN'] = 0

    # remove cases where the number change is beyond 1
    stats_frame_arrival = stats_frame_arrival.drop(stats_frame_arrival[stats_frame_arrival['dN'] > 1].index)

    probs_arrival = stats_frame_arrival.pivot(index = 'ID_time', columns = 'N gel', values = 'dN').mean(axis = 0)
    counts_arrival = stats_frame_arrival.pivot(index = 'ID_time', columns = 'N gel', values = 'dN').count(axis = 0)
        
    stats_frame_arrival['Date'] = stats_frame_arrival['ID'].str.split(' : ', expand = True)[0]
    stats_frame_arrival['m'] = stats_frame_arrival['ID'].str.split(' : ', expand = True)[1]
    
    stats_frame_arrival['ID_well'] = stats_frame_arrival['Date'] + ' : ' + stats_frame_arrival['m']
    
    # We only keep the probabilities if more than ten unique technical replicas are sampled
    if (len(stats_frame_arrival['ID_well'].unique()) < 10):

        properties = pandas.DataFrame(data = [[np.nan, np.nan]])
        properties.columns = ['probability', 'counts']        
        
        return properties
    

    properties = pandas.concat([probs_arrival, counts_arrival], axis = 1)
    properties.columns = ['probability', 'counts']
    
    return properties

def get_probability_leave_contact(AnalysisFrame, N_contact):
    
    stats_frame = get_statistics(AnalysisFrame)
    stats_frame = stats_frame[stats_frame['N contact'] == N_contact]
    stats_frame = stats_frame.dropna()

    stats_frame['ID_time'] = stats_frame['ID'] + ' : ' + stats_frame['frame'].astype(int).astype(str)

    stats_frame_leave = stats_frame
    stats_frame_leave.loc[stats_frame['dN'] > 0, 'dN'] = 0

    # remove cases where the number change is beyond 1
    stats_frame_leave = stats_frame_leave.drop(stats_frame_leave[stats_frame_leave['dN'] < -1].index)

    probs_leave = np.abs(stats_frame_leave.pivot(index = 'ID_time', columns = 'N contact', values = 'dN').mean(axis = 0))
    counts_leave = stats_frame_leave.pivot(index = 'ID_time', columns = 'N contact', values = 'dN').count(axis = 0)

    
    if len(stats_frame_leave)>0:
        
        stats_frame_leave['Date'] = stats_frame_leave['ID'].str.split(' : ', expand = True)[0]
        stats_frame_leave['m'] = stats_frame_leave['ID'].str.split(' : ', expand = True)[1]

        stats_frame_leave['ID_well'] = stats_frame_leave['Date'] + ' : ' + stats_frame_leave['m']
    
        if len(stats_frame_leave['ID_well'].unique()) < 10:

            properties = pandas.DataFrame(data = [[np.nan, np.nan]])
            properties.columns = ['probability', 'counts']        

            return properties
    
        properties = pandas.concat([probs_leave, counts_leave], axis = 1)
        properties.columns = ['probability', 'counts']
        
    else:
        
        properties = pandas.DataFrame(data = [[np.nan, np.nan]])
        properties.columns = ['probability', 'counts']  

    
    return properties

def bootstrap_scheme_per_n(AnalysisFrame, N_samples, N_tirages, N_contact):
    
    bootstrap_data = pandas.DataFrame()
    j = 0
    k = 0
    
    for i in range(N_tirages):            
    
        experiments = np.random.choice(list(AnalysisFrame['ID'].unique()), N_samples)
                
        loc_analysis = pandas.DataFrame()
        
        for ID in experiments:
            loc_frame = AnalysisFrame[AnalysisFrame['ID'] == ID]
            loc_frame['ID'] = loc_frame['ID'] + ' : ' + str(j)
            j += 1
            loc_analysis = loc_analysis.append(loc_frame)
                                    
        probabilities = get_probability_arrival_contact(loc_analysis, N_contact)
                
        try:
            bootstrap_data.loc[j, 'lambda_arrival'] = get_lambda(probabilities)
        except:
            bootstrap_data.loc[j, 'lambda_arrival'] = np.nan
        
        probabilities = get_probability_leave_contact(loc_analysis, N_contact)
        
        try:
            bootstrap_data.loc[j, 'lambda_leave'] = get_lambda(probabilities)
        except:
            bootstrap_data.loc[j, 'lambda_leave'] = np.nan
        j += 1
        
    return bootstrap_data

def lambda_per_contact_all(AnalysisFrame, N_contact_max,  N_samples, N_tirages):
    
    lambda_data = pandas.DataFrame()
    
    for N_contact in tqdm(range(N_contact_max)):
                        
        bs_stats = bootstrap_scheme_per_n(AnalysisFrame, N_samples, N_tirages, N_contact)
                
        bs_stats['N_contact'] = N_contact
                
        lambda_data = lambda_data.append(bs_stats)
                
    return lambda_data

def get_max_N(AnalysisFrame):
    
    for ID in tqdm(AnalysisFrame['ID'].unique()):
        
        AnalysisFrame.loc[AnalysisFrame['ID'] == ID, 'N'] = AnalysisFrame.loc[AnalysisFrame['ID'] == ID, 'N'].max()
        
    return AnalysisFrame



From the experimental dataframe containing the accumulation statistics `AnalysisFrame` we use the `lambda_per_n` function to get statistics on the hitting and leaving rates.

In [None]:
lambda_per_n = lambda_per_contact_all(AnalysisFrame, 
                   6,  int(0.7*len(AnalysisFrame['ID'].unique())), 50)