# Simulation initialisation functions

### Imports

In [1]:
import tempfile
import numpy
import dask as dd
import pandas as pd
import coloredlogs, logging
import uuid
import random
from coolname import generate_slug
from functools import wraps
import inspect

### Create logger

In [2]:
logger = logging.getLogger(__name__)
coloredlogs.install(level='DEBUG')

### Function definations

These functions deals with the initilisation of simulutations based on given parameters.
The parameters are:
* Transposon count at start (tcount, int, default = 1)
* Transposon type (ttype, list ,[1 or 2], default [1])
* Population size (popsize, int, default 100)
* Number of insertion sites (insize, int, default 2000)
* Genome size(cM) (gensize, int, default 1000)
* Number of individuals with transposons (incount, int, default = 1)
* Selection penalty for transposons (tpenalty, list, default = [-0.02])
* Parent of origin for transposons (tparent, list, default = ["Mother"])

In [3]:
# StackOverflow snippet #1
###########################################################################################################################
#//////|   Decorator   |//////////////////////////////////////////////////////////////////////////////////////////////////#
###########################################################################################################################


def auto_assign_arguments(function):

    @wraps(function)
    def wrapped(self, *args, **kwargs):
        _assign_args(self, list(args), kwargs, function)
        function(self, *args, **kwargs)

    return wrapped


###########################################################################################################################
#//////|   Utils   |//////////////////////////////////////////////////////////////////////////////////////////////////////#
###########################################################################################################################


def _assign_args(instance, args, kwargs, function):

    def set_attribute(instance, parameter, default_arg):
        if not (parameter.startswith("_")):
            setattr(instance, parameter, default_arg)

    def assign_keyword_defaults(parameters, defaults):
        for parameter, default_arg in zip(reversed(parameters),
                                          reversed(defaults)):
            set_attribute(instance, parameter, default_arg)

    def assign_positional_args(parameters, args):
        for parameter, arg in zip(parameters, args.copy()):
            set_attribute(instance, parameter, arg)
            args.remove(arg)

    def assign_keyword_args(kwargs):
        for parameter, arg in kwargs.items():
            set_attribute(instance, parameter, arg)

    def assign_keyword_only_defaults(defaults):
        return assign_keyword_args(defaults)

    def assign_variable_args(parameter, args):
        set_attribute(instance, parameter, args)

    POSITIONAL_PARAMS, VARIABLE_PARAM, _, KEYWORD_DEFAULTS, _, KEYWORD_ONLY_DEFAULTS, _ = inspect.getfullargspec(
        function)
    POSITIONAL_PARAMS = POSITIONAL_PARAMS[1:]  # remove 'self'

    if (KEYWORD_DEFAULTS):
        assign_keyword_defaults(parameters=POSITIONAL_PARAMS,
                                defaults=KEYWORD_DEFAULTS)
    if (KEYWORD_ONLY_DEFAULTS):
        assign_keyword_only_defaults(defaults=KEYWORD_ONLY_DEFAULTS)
    if (args): assign_positional_args(parameters=POSITIONAL_PARAMS, args=args)
    if (kwargs): assign_keyword_args(kwargs=kwargs)
    if (VARIABLE_PARAM):
        assign_variable_args(parameter=VARIABLE_PARAM, args=args)


###########################################################################################################################

In [9]:
class initSim:
    """Class to initialize the progenitor (F0) population.
    
    initSim primarily creates an object which stores information about the 
    initial population. Moreover the inbuilt methods will return pandas
    dataframe for direct modification as desired.
    """

    @auto_assign_arguments
    def __init__(self,
                 tcount=1,
                 ttype=[1],
                 popsize=1000,
                 insize=2000,
                 gensize=1000,
                 incount=1,
                 trate=[0.02],
                 tpenalty=[-0.02],
                 tparent=["Mother"]):
        # Dataframe definations
        self.TranspFrame = pd.DataFrame(columns=[
            'TID', 'Position', 'SelCo', 'Name', 'Class', 'Traceback',
            'Generation', "Parent", "TraRate"
        ])
        self.PopFrame = pd.DataFrame(columns=[
            'PID', 'Fitness', 'Name', 'Sex', 'Lineage', 'Generation', 'TE',
            'Genome'
        ])
        self.GenFrame = pd.DataFrame(columns=[
            'GID', 'Size', 'Insertion_Father', 'Insertion_Mother', 'Recomob'
        ])

    # Init transposons
    def initT(self):
        if (self.tcount > len(self.tpenalty)):
            logger.info(
                "Mismatch between transposon count and selection penalties. Using default for each transposon count!"
            )
            self.tpenalty = [-0.02] * self.tcount
        if (self.tcount > len(self.trate)):
            logger.info(
                "Mismatch between transposon count and transposition rates. Using default for each transposon count!"
            )
            self.trate = [0.02] * self.tcount
        if (self.tcount > len(self.ttype)):
            logger.info(
                "Mismatch between transposon count and transposon types. Using default for each transposon count!"
            )
            self.ttype = [1] * self.tcount
        if (self.tcount > len(self.tparent)):
            logger.info(
                "Mismatch between transposon count and transposon parent. Using default for each transposon count!"
            )
            self.tparent = ["Mother"] * self.tcount

        # Create random filled insertion sites
        inSiteArray = random.sample(range(1, self.insize), self.tcount)

        for i in range(0, self.tcount):
            row = pd.Series({
                'TID': uuid.uuid4().hex,
                'Position': inSiteArray[i],
                'TraRate': self.trate[i],
                'SelCo': self.tpenalty[i],
                'Name': generate_slug(),
                'Class': self.ttype[i],
                'Traceback': ['0'],
                'Generation': 1,
                'Parent': self.tparent[i]
            })
            self.TranspFrame = self.TranspFrame.append(row, ignore_index=True)
        return (self.TranspFrame)

    # Init population and genome
    def initPG(self):
        # Create transposon insertions in randomly selected individuals
        IndividualToInsert = random.sample(list(range(1, self.popsize)),
                                           self.tcount)
        TIDlist = self.TranspFrame.TID.tolist()
        TIDcounter = 0
        Parent = "0"
        insertion_Father = 0
        insertion_Mother = 0
        FitnessPen = 0
        for i in range(self.popsize):
            # In case this (un)lucky individual has transposon insertion
            if (i in IndividualToInsert):
                TE = TIDlist[TIDcounter]
                TIDcounter += 1
                Parent = self.TranspFrame[self.TranspFrame['TID'] ==
                                          TE]['Parent'].values[0]
                FitnessPen = self.TranspFrame[self.TranspFrame['TID'] ==
                                              TE]['SelCo'].values[0]
                if (Parent == "Mother"):
                    insertion_Mother = self.TranspFrame[
                        self.TranspFrame['TID'] == TE]['Position'].values[0]

                if (Parent == "Father"):
                    insertion_Father = self.TranspFrame[
                        self.TranspFrame['TID'] == TE]['Position'].values[0]

            else:
                TE = '0'
                Parent = "0"
                insertion_Father = 0
                insertion_Mother = 0

            # Create unique GID to be shared between individual and genome
            GID = uuid.uuid4().hex

            # Populate the population!
            rowPop = pd.Series({
                'PID': uuid.uuid4().hex,
                'Fitness': random.uniform(0.6, 1.0),
                'Name': generate_slug(),
                'Sex': 'H',
                'Lineage': '0',
                'Generation': 1,
                'TE': TE,
                'Genome': GID
            })
            rowGen = pd.Series({
                'GID': GID,
                'Size': self.gensize,
                'Insertion_Father': insertion_Father,
                'Insertion_Mother': insertion_Mother,
                'Recomob': [0]
            })
            self.PopFrame = self.PopFrame.append(rowPop, ignore_index=True)
            self.GenFrame = self.GenFrame.append(rowGen, ignore_index=True)
        return (self.PopFrame, self.GenFrame)

    def createSim(self):
        transposon = self.initT()
        population, genome = self.initPG()
        return ([transposon, population, genome])

In [8]:
k = initSim(tcount=40)
t = k.createSim()

2019-11-26 14:05:44 siddharth-OptiPlex-7070 __main__[9626] INFO Mismatch between transposon count and selection penalties. Using default for each transposon count!
2019-11-26 14:05:44 siddharth-OptiPlex-7070 __main__[9626] INFO Mismatch between transposon count and transposition rates. Using default for each transposon count!
2019-11-26 14:05:44 siddharth-OptiPlex-7070 __main__[9626] INFO Mismatch between transposon count and transposon types. Using default for each transposon count!
2019-11-26 14:05:44 siddharth-OptiPlex-7070 __main__[9626] INFO Mismatch between transposon count and transposon parent. Using default for each transposon count!


In [48]:
t[2]

Unnamed: 0,GID,Size,Insertion_Father,Insertion_Mother,Recomob
0,f2a23a446f4f43bb84fff378ab41ceae,1000,0,0,[0]
1,fea6f52c7bf7465a93c2aed1e0f227ef,1000,0,0,[0]
2,860afc3e37324495b041a14f43bffd4a,1000,0,0,[0]
3,d22482b522964f85bd42539290f7ecae,1000,0,0,[0]
4,3939000b9dc042cd9822543081fa9aa3,1000,0,514,[0]
...,...,...,...,...,...
95,e9790f93bdd3423f8171fbdf406f26c0,1000,0,0,[0]
96,352c2366daaf43d595202e13aa2e1a7f,1000,0,995,[0]
97,70c37c45f427488b87a87cd83b843ae6,1000,0,0,[0]
98,6237c6437c0a4d54955e2a10f13cfd82,1000,0,0,[0]


In [8]:
with pd.option_context('display.max_rows', None, 'display.max_columns',
                       None):  # more options can be specified also
    print(t)

                                TID Position  SelCo  \
0  65873038ff3541d6962fdc4696327658      962  -0.02   
1  67783ed3a80144c88ae4a74f6ae8d9c4     1036  -0.02   
2  ca1744086d854a548b9dd4055ecce918      118  -0.02   
3  b8238da1780b40998b158b02816321bf      727  -0.02   

                                    Name Class Traceback Generation  Parent  
0    divergent-famous-trogon-of-infinity     1       [0]          1  Mother  
1     authentic-upbeat-perch-of-judgment     1       [0]          1  Mother  
2          wise-spiritual-tapir-of-pluck     1       [0]          1  Mother  
3  rustling-hungry-reindeer-of-fortitude     1       [0]          1  Mother  


In [None]:
with pd.option_context('display.max_rows', None, 'display.max_columns',
                       None):  # more options can be specified also
    print(t)