In [None]:
# Importing essential libraries for the script

# NumPy for numerical operations and array manipulations
import numpy as np
# Pandas for handling and manipulating dataframes
import pandas as pd
# Time module for time-related functions
import time
# Importing time function specifically to measure execution time
from time import time
# Pickle for serializing and saving Python objects (like dictionaries)
import pickle
# Multiprocessing Pool for parallel processing
from multiprocessing import Pool
# Joblib for parallel processing using 'Parallel' and 'delayed' functions
from joblib import Parallel, delayed

In [None]:
def Allele(MTD=48,LIF=10,prop=0.75,C=25,B=120,A=15,T=np.array(range(0,195,15))): 
    '''
    Simulates transcription and mRNA dynamics over time.

    Input:
    MTD: Mean Transcription Duration (integer) - average duration of one transcription event.
    A: Mean time between two successive transcriptions after treatment (float).
    LIF: Average lifetime for a nascent mRNA to become mature (float).
    C: Mean time between two successive transcriptions before treatment (float).
    B: Time point where transcription began before treatment (integer).
    prop: Proportion of the gene length to be transcribed for visibility (float).
    T: Time points to record the number of mRNAs (array of integers).

    Output:
    incomp: Dictionary where keys are time points and values are lists of incomplete transcriptions at those time points.
    comp: Dictionary where keys are time points and values are lists of completed transcriptions that haven't degraded.
    '''
    # Calculate the mean time when transcription becomes visible (MTV) based on proportion of gene transcribed. 
    MTV=prop * MTD   
    # Calculate the last time point of the simulation (TERM) based on the time array and transcription duration.
    TERM=np.max(T)+MTD+5
    # Estimate the size of the simulation window based on TERM and mean inter-transcription time.
    size=int((TERM+MTD)/A)
    
    #Generate transcription initiation times before treatment using an exponential distribution with mean C.
    w0=np.random.exponential(scale=C,size=size)
    t0=np.zeros(size)
    for i in range(size):
        t0[i]=sum(w0[0:i+1]) # Cumulative sum of initiation times
    t0=t0-B # Offset the initiation times by time before treatment (B)
    t0=t0[t0<1] # Keep only the times before treatment

    #Generate transcription initiation times after treatment using an exponential distribution with mean A.
    w=np.random.exponential(scale=A,size=size)
    t1=np.zeros(size)
    for i in range(size):
        t1[i]=sum(w[0:i+1]) # Cumulative sum of initiation times
       
    # Combine initiation times before and after treatment.     
    t=np.append(t0,t1)
    # Count how many transcription events occur before the final time point of the simulation (TERM).
    STOP=np.count_nonzero(t <= TERM) 

    # Generate mRNA lifetimes using an exponential distribution with mean LIF.
    S=np.random.exponential(scale=LIF,size=STOP) 
    # Initialize arrays for mRNA degradation times (D) and the times when transcription becomes visible (EMERGE).
    D=np.zeros(STOP)  
    EMERGE=np.zeros(STOP)             
    # Calculate the times when transcription becomes visible and when mRNAs degrade.
    for j in range(STOP):
        EMERGE[j]=t[j]+MTV # Time when transcription becomes visible
        D[j]=t[j]+MTD+S[j] # Time when mRNA degrades
    
    # Initialize dictionaries to store incomplete and complete transcriptions for each time point.
    incomp={}
    comp={}
    # Loop over each time point in the provided time array T.
    for k in T:
        incomp[str(k)]=[] # List for incomplete transcriptions at time point k
        comp[str(k)]=[] # List for completed transcriptions at time point k
        # Loop over each transcription event up to the STOP time.
        for j in range(STOP):
            # If transcription started but is not yet complete at time k, it's incomplete.
            if (t[j]<=k<EMERGE[j] ):
                incomp[str(k)].append(j) 
            # If transcription is complete but the mRNA hasn't degraded yet, it's complete.
            elif (EMERGE[j]<=k<D[j]):
                comp[str(k)].append(j)    
    # Return dictionaries of incomplete and complete transcriptions for each time point.
    return incomp , comp  

In [None]:
# Define grids for different parameters to be used in simulations

# `gridLIF`: Lifetimes of mRNA degradation, creating 7 evenly spaced values between 19 and 22.
gridLIF=np.linspace(19, 22, 7,endpoint=True)  
# `gridA`: Time between two successive transcriptions after treatment, creating 41 values between 5 and 25.
gridA=np.linspace(5, 25, 41,endpoint=True)
# `gridC`: Time between successive transcriptions before treatment, creating values from 10 to 35 with step 1.
gridC=np.arange(10,36,1)
# `gridB`: Starting times of transcriptions before treatment, ranging from 50 to 120 with step 5.
gridB=np.arange(50,125,5)

# `gridMTD`: Mean transcription duration, creating values from 40 to 46 with step 1.
gridMTD=np.arange(40,47,1)

# `VALUE`: A list of tuples combining all possible parameter values:
#  - lifetimes (`lif`) from `gridLIF`
#  - transcription interval after treatment (`a`) from `gridA`
#  - transcription interval before treatment (`c`) from `gridC` where c > a + 5
#  - transcription start time (`b`) from `gridB`
#  - mean transcription duration (`mtd`) from `gridMTD
VALUE=[(lif,a,c,b,mtd) for lif in gridLIF for a in gridA for c in gridC[gridC>a+5] for b in gridB for mtd in gridMTD] 

# `sim`: Array of 10,000 simulations.
sim=np.arange(1,10001)
# `ST`: Time points at which the number of mRNAs is recorded, every 15 minutes from 0 to 120 minutes.
ST=np.arange(0,135,15)

# `viz`: threshold for controlling the amount of visibility  (set to 2).
viz=2
# `prop`: Proportion of the gene length that is transcribed for visibility (75% of the gene length).
prop=0.75

In [None]:
def NF_simulation(val,sim,number):
        '''
    Simulates the transcriptional activity based on provided parameters and saves results to a CSV file.

    Input:
    val: A tuple containing parameters (LIF, A, C, B, MTD) for the simulation.
    sim: Array of simulation runs to perform.
    number: Identifier for saving the CSV file.
    
    Output:
    A CSV file named "nfsim<number>.csv" containing the simulation results.
    '''
    # Construct a descriptive name for the simulation based on input parameters
    name='LIF'+str(val[0])+'A'+str(val[1])+'C'+str(val[2])+'B'+str(val[3])+'MTD'+str(val[4])
    
    # Dictionary to store the results for each simulation run
    NAS1={}

    # Perform simulations for each run in `sim`
    for i in sim:
        NAS1[i]={}
        # Call the Allele function to get incomplete and complete transcriptions
        _,NAS1[i]=Allele(MTD=val[4],LIF=val[0],A=val[1],C=val[2],B=val[3],prop=0.75,T=ST)    

    # Create a DataFrame to store the simulation results
    # Rows: `name` (the descriptive name based on parameters)
    # Columns: Time points in `ST` (0, 15, 30, ..., 120)
    simulation=pd.DataFrame(0,index=[name],columns=np.arange(0,135,15))   
    # Calculate the average number of simulations where there are 2 or more incomplete transcriptions
    for k in ST:
        for i in sim: 
            # Check if there are 2 or more incomplete transcriptions at time `k`
            if (2<= len(NAS1[i][str(k)])):
                simulation[k]=simulation[k]+1   
            # If fewer than 2, no increment to the count
            elif (2 > len(NAS1[i][str(k)])):
                simulation[k]=simulation[k]+0
    # Normalize the results by dividing by the number of simulations and rounding to 2 decimal places                
    simulation=(simulation / (len(sim))).round(2)
    # Save the simulation results to a CSV file
    simulation.to_csv("nfsim"+str(number)+".csv")    

In [None]:
# Record the start time of the simulation
ti=time()
# Run the FV_simulation function in parallel for different sets of parameters
# `Parallel` is used to parallelize the execution across multiple CPU cores
# `n_jobs=4` specifies that 4 parallel jobs will be used
# `delayed(FV_simulation)` creates a delayed version of the `FV_simulation` function to be executed in parallel
# `enumerate(VALUE)` provides an index and the parameter tuple from `VALUE` for each call
Parallel(n_jobs=4)(delayed(FV_simulation)(val,nsim,i+1) for i,val in enumerate(VALUE))
# Print the total time taken for the parallel simulations to complete
print(time()-ti)