In [None]:
# Importing essential libraries for the script
# NumPy for numerical operations and array manipulations
import numpy as np
# Pandas for handling and manipulating dataframes
import pandas as pd
# Time module for time-related functions
import time
# Importing time function specifically to measure execution time
from time import time
# Pickle for serializing and saving Python objects (like dictionaries)
import pickle
# Multiprocessing Pool for parallel processing
from multiprocessing import Pool
# Joblib for parallel processing using 'Parallel' and 'delayed' functions
from joblib import Parallel, delayed

In [None]:
def Allele(MTD=48,A=15,LIF=10,prop=0.75,T=np.array(range(0,195,15))): 
    '''
    Simulates the transcription and maturation of mRNA over time.

    Parameters:
    MTD (int): Mean Transcription Duration. The average duration for a transcription event.
    A (float): Mean time between two successive transcriptions. This controls the spacing of transcription events.
    LIF (float): Average lifetime for a nascent mRNA to mature into a mature mRNA.
    prop (float): Proportion of the length of the gene that is transcribed for visibility. Used to determine the visibility of transcription events.
    T (array-like): Time points at which to record the number of mRNAs. This is a list of integers representing different time points.

    Returns:
    tuple: Two dictionaries:
        - `incomp`: Dictionary where keys are time points and values are lists of indices representing incomplete transcription events at that time.
        - `comp`: Dictionary where keys are time points and values are lists of indices representing complete transcription events before degradation at that time.
    '''
    # Calculate the visibility time for transcription
    MTV=prop * MTD   
    # Define the total simulation time, allowing for transcription and a buffer period
    TERM=np.max(T)+MTD+5
    # Calculate the number of transcription events to simulate
    size=int((TERM+MTD)/A)
    
    # Generate random times for transcription events using an exponential distribution
    w=np.random.exponential(scale=A,size=size) # Exponential waiting times between transcriptions
    t=np.zeros(size) # Array to store the actual transcription times
    for i in range(size):
        t[i]=sum(w[0:i+1])  #Cumulative sum to get the transcription times

    # Count how many transcription events are within the total simulation time
    STOP=np.count_nonzero(t <= TERM) 
    # Generate random lifetimes for the nascent mRNA to mature
    S=np.random.exponential(scale=LIF,size=STOP) # Exponential distribution for mRNA lifetime
    D=np.zeros(STOP)   # Array to store the degradation times
    EMERGE=np.zeros(STOP)       # Array to store the emergence times of mature mRNA      
    
    # Calculate the emergence and degradation times for each transcription event
    for j in range(STOP):
        EMERGE[j]=t[j]+MTV
        D[j]=t[j]+MTD+S[j] 
    
    # Initialize dictionaries to store incomplete and complete transcription events 
    incomp={}
    comp={}
    for k in T:
        incomp[str(k)]=[]
        comp[str(k)]=[]
        # Determine which transcription events are incomplete or complete at each time point
        for j in range(STOP):
            if (t[j]<=k<EMERGE[j] ): # Incomplete transcription
                incomp[str(k)].append(j) 
            elif (EMERGE[j]<=k<D[j]):
                comp[str(k)].append(j)  # Complete transcription before degradation        
    return incomp , comp  

In [None]:
# Define grids for different parameters to be used in simulations

# `gridLIF`: Lifetimes of mRNA degradation, creating 7 evenly spaced values between 19 and 22.
gridLIF=np.linspace(19, 22, 7,endpoint=True)  
# `gridA`: Time between two successive transcriptions after treatment, creating 41 values between 5 and 25.
gridA=np.linspace(5, 25, 41,endpoint=True)
# `gridMTD`: Mean transcription duration, creating values from 40 to 46 with step 1.
gridMTD=np.arange(40,47,1)

# `VALUE`: A list of tuples combining all possible parameter values:
#  - lifetimes (`lif`) from `gridLIF`
#  - transcription interval after treatment (`a`) from `gridA`
#  - mean transcription duration (`mtd`) from `gridMTD
VALUE=[(lif,a,mtd) for lif in gridLIF for a in gridA for mtd in gridMTD ] 

# `sim`: Array of 10,000 simulations.
nsim=np.arange(1,10001)
# `ST`: Time points at which the number of mRNAs is recorded, every 15 minutes from 0 to 120 minutes.
ST=np.arange(0,135,15)

# `viz`: threshold for controlling the amount of visibility  (set to 2).
viz=2
# `prop`: Proportion of the gene length that is transcribed for visibility (75% of the gene length).
prop=0.75

In [None]:
def FV_simulation(val,sim,number):
    """
    Simulates the number of mRNA molecules at specified time points and saves the results to a CSV file.
    
    Parameters:
    val: tuple of (LIF, A, MTD)
        LIF: Average lifetime for a nascent mRNA to mature
        A: Mean time between two successive transcriptions
        MTD: Mean Transcription Duration
    sim: array-like
        List of simulation numbers to run
    number: int
        Identifier for the simulation run, used in naming the output file
    
    Returns:
    None
    """
    # Create a name for the simulation run based on input parameters
    name='A'+str(val[1])+'LIF'+str(val[0])+'MTD'+str(val[2])
    # Dictionary to hold the results of the simulations
    NAS1={}
    # Run simulations
    for i in sim:
        NAS1[i]={}
        _,NAS1[i]=Allele(MTD=val[2],A=val[1],LIF=val[0],prop=prop,T=ST)    
    # Create a DataFrame to store the simulation results
    simulation=pd.DataFrame(0,index=[name],columns=np.arange(0,135,15))       
    # Populate the DataFrame with simulation results
    for k in ST:
        for i in sim: 
            # Check if there are 2 or more incomplete transcriptions at time `k`
            if (viz<= len(NAS1[i][str(k)])):
                simulation[k]=simulation[k]+1   
            # If fewer than viz threshold, no increment to the count
            elif (viz > len(NAS1[i][str(k)])):
                simulation[k]=simulation[k]+0
    # Normalize the counts by dividing by the number of simulations and rounding to 2 decimal places                
    simulation=(simulation / (len(sim))).round(2)
    # Save the simulation results to a CSV file
    simulation.to_csv("fvsim"+str(number)+".csv")    

In [None]:
# Record the start time of the simulation
ti=time()
# Run the FV_simulation function in parallel for different sets of parameters
# `Parallel` is used to parallelize the execution across multiple CPU cores
# `n_jobs=4` specifies that 4 parallel jobs will be used
# `delayed(FV_simulation)` creates a delayed version of the `FV_simulation` function to be executed in parallel
# `enumerate(VALUE)` provides an index and the parameter tuple from `VALUE` for each call
Parallel(n_jobs=4)(delayed(FV_simulation)(val,nsim,i+1) for i,val in enumerate(VALUE))
# Print the total time taken for the parallel simulations to complete
print(time()-ti)