In [None]:
# Imports
import inspect
import random
import tempfile
import uuid
from functools import wraps
import collections
import seaborn as sns
import matplotlib.pyplot as plt
import coloredlogs
import dask as dd
import logging
import numpy as np
import pandas as pd
from coolname import generate_slug
import warnings

warnings.filterwarnings("ignore")


# Logger
logger = logging.getLogger(__name__)
coloredlogs.install(level="DEBUG")

# StackOverflow snippet #1
########################################################################################################################
# /////|   Decorator   |////////////////////////////////////////////////////////////////////////////////////////////////
########################################################################################################################


def auto_assign_arguments(function):
    @wraps(function)
    def wrapped(self, *args, **kwargs):
        _assign_args(self, list(args), kwargs, function)
        function(self, *args, **kwargs)

    return wrapped


########################################################################################################################
# /////|   Utils   |////////////////////////////////////////////////////////////////////////////////////////////////////
########################################################################################################################


def _assign_args(instance, args, kwargs, function):
    def set_attribute(instance, parameter, default_arg):
        if not (parameter.startswith("_")):
            setattr(instance, parameter, default_arg)

    def assign_keyword_defaults(parameters, defaults):
        for parameter, default_arg in zip(
            reversed(parameters), reversed(defaults)
        ):
            set_attribute(instance, parameter, default_arg)

    def assign_positional_args(parameters, args):
        for parameter, arg in zip(parameters, args.copy()):
            set_attribute(instance, parameter, arg)
            args.remove(arg)

    def assign_keyword_args(kwargs):
        for parameter, arg in kwargs.items():
            set_attribute(instance, parameter, arg)

    def assign_keyword_only_defaults(defaults):
        return assign_keyword_args(defaults)

    def assign_variable_args(parameter, args):
        set_attribute(instance, parameter, args)

    (
        POSITIONAL_PARAMS,
        VARIABLE_PARAM,
        _,
        KEYWORD_DEFAULTS,
        _,
        KEYWORD_ONLY_DEFAULTS,
        _,
    ) = inspect.getfullargspec(function)
    POSITIONAL_PARAMS = POSITIONAL_PARAMS[1:]  # remove 'self'

    if KEYWORD_DEFAULTS:
        assign_keyword_defaults(
            parameters=POSITIONAL_PARAMS, defaults=KEYWORD_DEFAULTS
        )
    if KEYWORD_ONLY_DEFAULTS:
        assign_keyword_only_defaults(defaults=KEYWORD_ONLY_DEFAULTS)
    if args:
        assign_positional_args(
            parameters=POSITIONAL_PARAMS, args=args
        )
    if kwargs:
        assign_keyword_args(kwargs=kwargs)
    if VARIABLE_PARAM:
        assign_variable_args(parameter=VARIABLE_PARAM, args=args)


########################################################################################################################

In [None]:
########################################################################################################################
# /////|   Class   |////////////////////////////////////////////////////////////////////////////////////////////////////
########################################################################################################################
class initSim:
    """Class to initialize the progenitor (F0) population.

    initSim primarily creates an object which stores information about the 
    initial population. Moreover the inbuilt methods will return pandas
    dataframe for direct modification as desired.
    
    :return: Object
    :rtype: object type initSim
    """

    @auto_assign_arguments
    def __init__(
        self,
        tcount=1,
        ttype=[1],
        popsize=100,
        insize=2000,
        trate=[0.02],
        tparent=["Mother"],
        nChr=50,
    ):
        """Class constructor
        :param tcount: Number of transposons to be present in initial population
        :param ttype: Type of transposons (class 1 or 2)
        :param popsize: Population size
        :param insize: Number of insertion sites
        :param trate: Transposition rates
        :param tparent: Parent carrying the transposon insertion (Mother/Father)
        :param nChr: Number of chromosomes
        :type tcount: int
        :type ttype: list [int]
        :type popsize: int
        :type insize: int
        :type trate: list [float]
        :type tparent: list [string]
        :type nChr: int
        """
        # Dataframe definations
        self.TranspFrame = pd.DataFrame(
            columns=[
                "TID",
                "Name",
                "Class",
                "Traceback",
                "Generation",
                "Parent",
                "TraRate",
            ]
        )
        self.PopFrame = pd.DataFrame(
            columns=[
                "PID",
                "Fitness",
                "Name",
                "Sex",
                "Lineage",
                "Generation",
                "TEfather",
                "TEmother",
                "Insertion_Father",
                "Insertion_Mother",
            ]
        )
        self.GenFrame = pd.DataFrame(
            columns=[
                "InsertionSiteID",
                "InsertionProbability",
                "RecombinationRate",
                "SelectionCoef",
                "Filled",
            ]
        )

    def generateGenome(self):
        NumberInsertionSites = self.insize
        NumberChromosomes = self.nChr
        ###-----------Function main-----------###
        genome = list(range(1, NumberInsertionSites))
        InsertionSiteColumn = list(range(1, NumberInsertionSites))
        RecombinationRate = [0.01] * len(genome)
        chrLocation = np.random.choice(
            list(range(1, NumberInsertionSites - 10)),
            NumberChromosomes,
            replace=False,
        )
        SelectionCoef = np.random.normal(-0.02, 0.01, len(genome))
        insertionProbability = np.random.uniform(
            0.01, 0.99, len(genome)
        )

        for i in chrLocation:
            RecombinationRate[i] = 0.5

        genomeDict = {
            "InsertionSiteID": InsertionSiteColumn,
            "InsertionProbability": insertionProbability,
            "RecombinationRate": RecombinationRate,
            "SelectionCoef": SelectionCoef,
            "Filled": False,
        }
        genome = pd.DataFrame(genomeDict)

        # ###-----------Function plot-----------###
        # ### Plot the simulated variables
        # sns.set(style="ticks", palette="muted", color_codes=True)
        # # Set up the matplotlib figure
        # f, axes = plt.subplots(1, 3, figsize=(15, 7))
        # chartRecombinationRates = sns.distplot(
        #     RecombinationRate,
        #     ax=axes[0],
        #     kde=False,
        #     axlabel="Histogram - Recombination rate",
        # )
        # chartInsertionProbability = sns.distplot(
        #     insertionProbability,
        #     ax=axes[1],
        #     kde=False,
        #     axlabel="Histogram - Insertion probability",
        # )
        # chartSelectionCoef = sns.distplot(
        #     SelectionCoef,
        #     ax=axes[2],
        #     kde=False,
        #     axlabel="Histogram - Selection coefficient",
        # )

        ###---------------Return--------------###
        self.GenFrame = genome
        return genome

    # Init transposons
    def initT(self):
        """Method to dataframe containing initial transposon population
        
        :return: Dataframe containing Transposon information
        :rtype: Dataframe
        """
        if self.tcount > len(self.trate):
            logger.info(
                "Mismatch between transposon count and transposition rates. Using default for each transposon count!"
            )
            self.trate = [0.02] * self.tcount
        if self.tcount > len(self.ttype):
            logger.info(
                "Mismatch between transposon count and transposon types. Using default for each transposon count!"
            )
            self.ttype = [1] * self.tcount
        if self.tcount > len(self.tparent):
            logger.info(
                "Mismatch between transposon count and transposon parent. Using default for each transposon count!"
            )
            self.tparent = ["Mother"] * self.tcount

        # Create random filled insertion sites
        inSiteArray = random.sample(
            range(1, self.insize - 10), self.tcount
        )

        for i in range(0, self.tcount):
            self.GenFrame.loc[
                self.GenFrame["InsertionSiteID"]
                == int(inSiteArray[i]),
                "Filled",
            ] = True
            row = pd.Series(
                {
                    "TID": uuid.uuid4().hex,
                    "InsertionSite": inSiteArray[i],
                    "TraRate": self.trate[i],
                    "Name": generate_slug(),
                    "Class": self.ttype[i],
                    "Traceback": ["0"],
                    "Generation": 1,
                    "Parent": self.tparent[i],
                }
            )
            self.TranspFrame = self.TranspFrame.append(
                row, ignore_index=True
            )
            self.TranspFrame["InsertionSite"] = self.TranspFrame[
                "InsertionSite"
            ].astype(int)
        return self.TranspFrame

    # Init population
    def initPG(self):
        """Method to create initial population and their respective genomes
        
        :return: tuple(population,genome)
            WHERE
            population is population dataframe
            genome is genome dataframe
        :rtype: DataFrame
        """

        # Create transposon insertions in randomly selected individuals
        IndividualToInsert = random.sample(
            list(range(1, self.popsize)), self.tcount
        )
        TIDlist = self.TranspFrame.TID.tolist()
        TIDcounter = 0
        Parent = "0"
        insertion_Father = 0
        insertion_Mother = 0
        FitnessPen = 0
        TEfather = "0"
        TEmother = "0"
        for i in range(self.popsize):
            # In case this (un)lucky individual has transposon insertion
            if i in IndividualToInsert:
                TE = TIDlist[TIDcounter]
                TIDcounter += 1
                insertionSiteID = self.TranspFrame[
                    self.TranspFrame["TID"] == TE
                ]["InsertionSite"].values[0]
                Parent = self.TranspFrame[
                    self.TranspFrame["TID"] == TE
                ]["Parent"].values[0]
                FitnessPen = self.GenFrame[
                    self.GenFrame["InsertionSiteID"]
                    == insertionSiteID
                ]["SelectionCoef"].values[0]

                if Parent == "Mother":
                    insertion_Mother = self.TranspFrame[
                        self.TranspFrame["TID"] == TE
                    ]["InsertionSite"].values[0]
                    TEmother = TE

                if Parent == "Father":
                    insertion_Father = self.TranspFrame[
                        self.TranspFrame["TID"] == TE
                    ]["InsertionSite"].values[0]
                    TEfather = TE

            else:
                TE = "0"
                Parent = "0"
                insertion_Father = 0
                insertion_Mother = 0
                FitnessPen = 0
                TEmother = 0
                TEfather = 0

            # Populate the population!
            # Define intial fitness
            fitness = random.uniform(0.6, 1.0)
            rowPop = pd.Series(
                {
                    "PID": uuid.uuid4().hex,
                    "Fitness": fitness,
                    "NetFitness": fitness + FitnessPen,
                    "Name": generate_slug(),
                    "Sex": "H",
                    "Lineage": ["0"],
                    "Generation": 1,
                    "Insertion_Father": [insertion_Father],
                    "Insertion_Mother": [insertion_Mother],
                    "TEmother": [TEmother],
                    "TEfather": [TEfather],
                }
            )
            self.PopFrame = self.PopFrame.append(
                rowPop, ignore_index=True
            )

        self.PopFrame["Lineage"] = self.PopFrame["Lineage"].astype(
            "object"
        )
        self.PopFrame["Insertion_Father"] = self.PopFrame[
            "Insertion_Father"
        ].astype("object")
        self.PopFrame["Insertion_Mother"] = self.PopFrame[
            "Insertion_Mother"
        ].astype("object")
        return self.PopFrame

    def createSim(self):
        """Method to generate the initial simulation dataset
        
        :return: List
            WHERE 
            index 0 is transposon dataframe
            index 1 is population dataframe
            index 2 is genome dataframe
        :rtype: list
        """

        genome = self.generateGenome()
        transposon = self.initT()
        population = self.initPG()
        return [transposon, population, genome]


########################################################################################################################

In [None]:
def recombination(genomeFrame, genomeOrg):
    # Create insertion list for the progeny
    TEprogeny = []
    TEid = []
    # Create a copy of genomeFrame
    genomeCopy = genomeFrame.copy(deep=True)
    TEid_Father = genomeOrg["TEfather"].copy()
    TEid_Mother = genomeOrg["TEmother"].copy()
    Insertion_Father = genomeOrg["Insertion_Father"]
    Insertion_Mother = genomeOrg["Insertion_Mother"]
    if Insertion_Father[0] == 0 and Insertion_Mother[0] == 0:
        return (
            [0],
            random.choice(
                [
                    genomeOrg["Insertion_Father"],
                    genomeOrg["Insertion_Mother"],
                ]
            ),
        )
    else:
        initParent = random.choice(["M", "F"])
        surrogateParent = "M" if ("M" != initParent) else "F"
        RandArray = np.random.uniform(0, 1.0, genomeFrame.shape[0])
        switch = genomeCopy["RecombinationRate"] > RandArray
        # counter = collections.Counter(switch)
        # print(counter)
        genomeCopy["Progeny"] = np.where(
            switch.cumsum() % 2 == 0, initParent, surrogateParent
        )
        if all(v == 0 for v in Insertion_Father):
            pass
        else:
            for i in Insertion_Father:
                se = genomeCopy[genomeCopy["InsertionSite"] == i][
                    "Progeny"
                ].values[0]
                if se == "M":
                    TEprogeny.append(i)
                    TEid.append(TEid_Father.pop(0))
        if all(v == 0 for v in Insertion_Mother):
            pass
        else:
            for i in Insertion_Mother:
                se = genomeCopy[genomeCopy["InsertionSite"] == i][
                    "Progeny"
                ].values[0]
                if se == "F":
                    TEprogeny.append(i)
                    TEid.append(TEid_Mother.pop(0))
        # print(genomeCopy['Progeny'].value_counts())
    if not TEprogeny:
        TEprogeny.append(0)
    if not TEid:
        TEid.append(0)
    return (TEprogeny, TEid)

In [None]:
def fitness(genomeFrame, populationFrame, function=1):
    TEcontent = (
        populationFrame["Insertion_Father"]
        + populationFrame["Insertion_Mother"]
    )
    selectionCoef = []
    for i in TEcontent:
        selectionCoef.append(
            genomeFrame[genomeFrame["InsertionSiteID"] == i][
                "SelectionCoef"
            ].values[0]
        )
    if function == 1:
        fitnessValue = np.exp(sum(selectionCoef))
    if function == 2:
        fitnessValue = 1 + sum(selectionCoef)
    if function == 3:
        selectionCoef = np.array(selectionCoef)
        fitnessValue = np.prod(1 + selectionCoef)
    return fitnessValue

In [None]:
def transposition(transposonFrame, GenomeFrame, TIDm, TIDf):
    if TIDm != [0] * len(TIDm):
        occupiedSitesM = transposonFrame[
            transposonFrame["TID"].isin(TIDm)
        ].InsertionSite.tolist()
        InsertionSitesAndProbM = GenomeFrame[
            ~GenomeFrame.InsertionSiteID.isin(occupiedSitesM)
        ][["InsertionSiteID", "InsertionProbability"]].copy()
    if TIDf != [0] * len(TIDf):
        occupiedSitesF = transposonFrame[
            transposonFrame["TID"].isin(TIDf)
        ].InsertionSite.tolist()
        InsertionSitesAndProbF = GenomeFrame[
            ~GenomeFrame.InsertionSiteID.isin(occupiedSitesF)
        ][["InsertionSiteID", "InsertionProbability"]].copy()

    for i in TIDm:
        if transposonFrame[transposonFrame["TID"] == i][
            "TraRate"
        ].values[0] > np.random.uniform(0, 1.0):
            row = pd.Series(
                {
                    "TID": uuid.uuid4().hex,
                    "InsertionSite": InsertionSitesAndProbM.sample(
                        n=1, weights="InsertionProbability"
                    )["Generation"].values[0],
                    "TraRate": self.trate[i],
                    "Name": generate_slug(),
                    "Class": self.ttype[i],
                    "Traceback": ["0"],
                    "Generation": 1,
                    "Parent": self.tparent[i],
                }
            )

In [26]:
def transposition(TEfather, TEmother, genomeFrame, transposonFrame, generation):
    if TEfather != [0] * len(TEfather):
        genomeFrameCopy = genomeFrame.loc[
            genomeFrame["Filled"] == False
        ].copy()
        unFilledSites = pd.Series(
            genomeFrameCopy.InsertionProbability.values,
            index=genomeFrameCopy.InsertionSiteID,
        ).to_dict()
        for i in TEfather:
            trprate = transposonFrame.loc[transposonFrame["TID"] == i]["TraRate"].values[0]
            if trprate > np.random.uniform(0, 1.0):
                row = pd.Series(
                    {
                        "TID": uuid.uuid4().hex,
                        "InsertionSite": np.choice(
                            list(unFilledSites.values()),
                            p=list(unFilledSites.keys()),
                        ),
                        "TraRate": trprate,
                        "Name": generate_slug(),
                        "Class": transposonFrame.loc[transposonFrame["TID"] == i]["Class"].values[0],
                        "Traceback": ["0"],
                        "Generation": generation,
                        "Parent": "Father",
                    }
                )

SyntaxError: invalid syntax (<ipython-input-26-0b7a25683b72>, line 6)

In [33]:
k = initSim(tcount=4, insize=200000)
k = k.createSim()

2019-12-13 15:06:06 siddharth-OptiPlex-7070 __main__[17857] INFO Mismatch between transposon count and transposition rates. Using default for each transposon count!
2019-12-13 15:06:06 siddharth-OptiPlex-7070 __main__[17857] INFO Mismatch between transposon count and transposon types. Using default for each transposon count!
2019-12-13 15:06:06 siddharth-OptiPlex-7070 __main__[17857] INFO Mismatch between transposon count and transposon parent. Using default for each transposon count!


In [31]:
z = k[0].loc[k[0]["TID"] == 'bf2e05fcbf764b47bf7d77e999811b73']["Class"].values[0]
z

1

In [None]:
pd.Series(
    k[2].InsertionProbability.values, index=k[2].InsertionSiteID
).to_dict()

In [None]:
k[1].sample(n=1, weights="NetFitness")["Generation"].values[0]

In [35]:
k[0]

Unnamed: 0,TID,Name,Class,Traceback,Generation,Parent,TraRate,InsertionSite
0,a4c606e13ae9486db41823b84a7f40cf,industrious-warm-kittiwake-of-amplitude,1,[0],1,Mother,0.02,152469
1,5035030df4b240418264b09d50ec2e09,heretic-esoteric-gibbon-of-kindness,1,[0],1,Mother,0.02,83543
2,8b69e2e716a24e33824987c29868b18e,neon-caribou-of-immortal-romance,1,[0],1,Mother,0.02,98059
3,04a21ec7f27744ddb83f3390307edbc8,devout-charcoal-narwhal-of-typhoon,1,[0],1,Mother,0.02,19639


In [36]:
k[2]

Unnamed: 0,InsertionSiteID,InsertionProbability,RecombinationRate,SelectionCoef,Filled
0,1,0.149591,0.01,-0.021344,False
1,2,0.176226,0.01,-0.021851,False
2,3,0.456080,0.01,-0.013382,False
3,4,0.154443,0.01,-0.013464,False
4,5,0.590876,0.01,-0.017587,False
...,...,...,...,...,...
199994,199995,0.484160,0.01,-0.000041,False
199995,199996,0.912018,0.01,-0.021252,False
199996,199997,0.692379,0.01,-0.006624,False
199997,199998,0.988449,0.01,-0.021382,False


In [37]:
k[1]

Unnamed: 0,PID,Fitness,Name,Sex,Lineage,Generation,TEfather,TEmother,Insertion_Father,Insertion_Mother,NetFitness
0,d91598d76ca349aaab112d007191c399,0.969122,wine-sambar-of-hypothetical-swiftness,H,[0],1,[0],[0],[0],[0],0.969122
1,09fc657e87184fce8f55b2378b432c09,0.888701,resilient-flat-chupacabra-of-rain,H,[0],1,[0],[0],[0],[0],0.888701
2,580e5cd77816494695e552c02359d2fa,0.660988,pristine-celadon-degu-of-chemistry,H,[0],1,[0],[0],[0],[0],0.660988
3,9112f146628244c4b7d4dc19bd83dae5,0.923524,axiomatic-bouncy-weasel-of-courage,H,[0],1,[0],[0],[0],[0],0.923524
4,fe2310d6374544d8aa48acd4aebce4cd,0.829293,truthful-energetic-boar-of-endurance,H,[0],1,[0],[0],[0],[0],0.829293
...,...,...,...,...,...,...,...,...,...,...,...
95,9e67ba7a845b4d73a53c3242d1bd67c5,0.740657,light-witty-lori-of-coffee,H,[0],1,[0],[0],[0],[0],0.740657
96,f8b214f67a8145c8b91c69abf51219fc,0.971741,true-smart-pronghorn-of-finesse,H,[0],1,[0],[0],[0],[0],0.971741
97,25c53f57e40c417bb1c4a1a5476c4a7c,0.641225,discerning-mature-monkey-of-feminism,H,[0],1,[0],[0],[0],[0],0.641225
98,55a3d3cda69d459580ff29b594ff7bf9,0.885103,aboriginal-naughty-crab-of-vigor,H,[0],1,[0],[0],[0],[0],0.885103
