In [90]:
# Imports
import inspect
import random
import tempfile
import uuid
from functools import wraps
import collections

import coloredlogs
import dask as dd
import logging
import numpy as np
import pandas as pd
from coolname import generate_slug

# Logger
logger = logging.getLogger(__name__)
coloredlogs.install(level='DEBUG')

# StackOverflow snippet #1
########################################################################################################################
# /////|   Decorator   |////////////////////////////////////////////////////////////////////////////////////////////////
########################################################################################################################


def auto_assign_arguments(function):

    @wraps(function)
    def wrapped(self, *args, **kwargs):
        _assign_args(self, list(args), kwargs, function)
        function(self, *args, **kwargs)

    return wrapped


########################################################################################################################
# /////|   Utils   |////////////////////////////////////////////////////////////////////////////////////////////////////
########################################################################################################################


def _assign_args(instance, args, kwargs, function):

    def set_attribute(instance, parameter, default_arg):
        if not (parameter.startswith("_")):
            setattr(instance, parameter, default_arg)

    def assign_keyword_defaults(parameters, defaults):
        for parameter, default_arg in zip(reversed(parameters),
                                          reversed(defaults)):
            set_attribute(instance, parameter, default_arg)

    def assign_positional_args(parameters, args):
        for parameter, arg in zip(parameters, args.copy()):
            set_attribute(instance, parameter, arg)
            args.remove(arg)

    def assign_keyword_args(kwargs):
        for parameter, arg in kwargs.items():
            set_attribute(instance, parameter, arg)

    def assign_keyword_only_defaults(defaults):
        return assign_keyword_args(defaults)

    def assign_variable_args(parameter, args):
        set_attribute(instance, parameter, args)

    POSITIONAL_PARAMS, VARIABLE_PARAM, _, KEYWORD_DEFAULTS, _, KEYWORD_ONLY_DEFAULTS, _ = inspect.getfullargspec(
        function)
    POSITIONAL_PARAMS = POSITIONAL_PARAMS[1:]  # remove 'self'

    if (KEYWORD_DEFAULTS):
        assign_keyword_defaults(parameters=POSITIONAL_PARAMS,
                                defaults=KEYWORD_DEFAULTS)
    if (KEYWORD_ONLY_DEFAULTS):
        assign_keyword_only_defaults(defaults=KEYWORD_ONLY_DEFAULTS)
    if (args): assign_positional_args(parameters=POSITIONAL_PARAMS, args=args)
    if (kwargs): assign_keyword_args(kwargs=kwargs)
    if (VARIABLE_PARAM):
        assign_variable_args(parameter=VARIABLE_PARAM, args=args)


########################################################################################################################


########################################################################################################################
# /////|   Class   |////////////////////////////////////////////////////////////////////////////////////////////////////
########################################################################################################################
class initSim:
    """Class to initialize the progenitor (F0) population.

    initSim primarily creates an object which stores information about the 
    initial population. Moreover the inbuilt methods will return pandas
    dataframe for direct modification as desired.
    
    :return: Object
    :rtype: object type initSim
    """

    @auto_assign_arguments
    def __init__(self,
                 tcount=1,
                 ttype=[1],
                 popsize=100,
                 insize=2000,
                 gensize=1000,
                 trate=[0.02],
                 tpenalty=[-0.02],
                 tparent=["Mother"]):
        """Class constructor
        :param tcount: Number of transposons to be present in initial population
        :param ttype: Type of transposons (class 1 or 2)
        :param popsize: Population size
        :param insize: Number of insertion sites
        :param gensize: Size of genome on cM
        :param trate: Transposition rates
        :param tpenalty: Selection penalty for each transposon
        :param tparent: Parent carrying the transposon insertion (Mother/Father)
        :type tcount: int
        :type ttype: list [int]
        :type popsize: int
        :type insize: int
        :type gensize: int
        :type trate: list [float]
        :type tpenalty: list [float]
        :type arg1: tparent [string]
        """
        # Dataframe definations
        self.TranspFrame = pd.DataFrame(columns=[
            'TID', 'Position', 'SelCo', 'Name', 'Class', 'Traceback',
            'Generation', 'Parent', "TraRate"
        ])
        self.PopFrame = pd.DataFrame(columns=[
            'PID', 'Fitness', 'Name', 'Sex', 'Lineage', 'Generation',
            'TEfather', 'TEmother', 'Insertion_Father', 'Insertion_Mother'
        ])

    # Init transposons
    def initT(self):
        """Method to dataframe containing initial transposon population
        
        :return: Dataframe containing Transposon information
        :rtype: Dataframe
        """

        if (self.tcount > len(self.tpenalty)):
            logger.info(
                "Mismatch between transposon count and selection penalties. Using default for each transposon count!"
            )
            self.tpenalty = [-0.02] * self.tcount
        if (self.tcount > len(self.trate)):
            logger.info(
                "Mismatch between transposon count and transposition rates. Using default for each transposon count!"
            )
            self.trate = [0.02] * self.tcount
        if (self.tcount > len(self.ttype)):
            logger.info(
                "Mismatch between transposon count and transposon types. Using default for each transposon count!"
            )
            self.ttype = [1] * self.tcount
        if (self.tcount > len(self.tparent)):
            logger.info(
                "Mismatch between transposon count and transposon parent. Using default for each transposon count!"
            )
            self.tparent = ["Mother"] * self.tcount

        # Create random filled insertion sites
        inSiteArray = random.sample(range(1, self.insize), self.tcount)

        for i in range(0, self.tcount):
            row = pd.Series({
                'TID': uuid.uuid4().hex,
                'Position': inSiteArray[i],
                'TraRate': self.trate[i],
                'SelCo': self.tpenalty[i],
                'Name': generate_slug(),
                'Class': self.ttype[i],
                'Traceback': ['0'],
                'Generation': 1,
                'Parent': self.tparent[i]
            })
            self.TranspFrame = self.TranspFrame.append(row, ignore_index=True)
        return (self.TranspFrame)

    # Init population and genome
    def initPG(self):
        """Method to create initial population and their respective genomes
        
        :return: tuple(population,genome)
            WHERE
            population is population dataframe
            genome is genome dataframe
        :rtype: DataFrame
        """

        # Create transposon insertions in randomly selected individuals
        IndividualToInsert = random.sample(list(range(1, self.popsize)),
                                           self.tcount)
        TIDlist = self.TranspFrame.TID.tolist()
        TIDcounter = 0
        Parent = "0"
        insertion_Father = 0
        insertion_Mother = 0
        FitnessPen = 0
        TEfather = "0"
        TEmother = "0"
        for i in range(self.popsize):
            # In case this (un)lucky individual has transposon insertion
            if (i in IndividualToInsert):
                TE = TIDlist[TIDcounter]
                TIDcounter += 1
                Parent = self.TranspFrame[self.TranspFrame['TID'] ==
                                          TE]['Parent'].values[0]
                FitnessPen = self.TranspFrame[self.TranspFrame['TID'] ==
                                              TE]['SelCo'].values[0]
                if (Parent == "Mother"):
                    insertion_Mother = self.TranspFrame[
                        self.TranspFrame['TID'] == TE]['Position'].values[0]
                    TEmother = TE

                if (Parent == "Father"):
                    insertion_Father = self.TranspFrame[
                        self.TranspFrame['TID'] == TE]['Position'].values[0]
                    TEfather = TE

            else:
                TE = '0'
                Parent = "0"
                insertion_Father = 0
                insertion_Mother = 0
                FitnessPen = 0
                TEmother = "0"
                TEfather = "0"

            # Populate the population!
            # Define intial fitness
            fitness = random.uniform(0.6, 1.0)
            rowPop = pd.Series({
                'PID': uuid.uuid4().hex,
                'Fitness': fitness,
                'NetFitness': fitness + FitnessPen,
                'Name': generate_slug(),
                'Sex': 'H',
                'Lineage': ['0'],
                'Generation': 1,
                'Insertion_Father': [insertion_Father],
                'Insertion_Mother': [insertion_Mother],
                'TEmother': [TEmother],
                'TEfather': [TEfather]
            })
            self.PopFrame = self.PopFrame.append(rowPop, ignore_index=True)

        self.PopFrame['Lineage'] = self.PopFrame['Lineage'].astype('object')
        self.PopFrame['Insertion_Father'] = self.PopFrame[
            'Insertion_Father'].astype('object')
        self.PopFrame['Insertion_Mother'] = self.PopFrame[
            'Insertion_Mother'].astype('object')
        return (self.PopFrame)

    def createSim(self):
        """Method to generate the initial simulation dataset
        
        :return: List
            WHERE 
            index 0 is transposon dataframe
            index 1 is population dataframe
            index 2 is genome dataframe
        :rtype: list
        """

        transposon = self.initT()
        genome = self.initPG()
        return ([transposon, genome])


########################################################################################################################

Genome = pd.DataFrame(
    columns=['Position', 'RecombinationRate', 'InsertionSite'])


def generateGenome(genomeSize, nInsertionSites, nChrom):
    genome = list(range(1, genomeSize + 1))
    InsertionSite = [0] * len(genome)
    RecombinationRate = [0.01] * len(genome)
    insertionLocation = np.random.choice(genomeSize,
                                         nInsertionSites,
                                         replace=False)
    chrLocation = np.random.choice(genomeSize, nChrom, replace=False)

    insertionCounter = 1
    for i in insertionLocation:
        InsertionSite[i] = insertionCounter
        insertionCounter += 1

    insertionCounter = 1
    for i in chrLocation:
        if (InsertionSite[i] != 0):
            RecombinationRate[i + 1] = 0.5
            insertionCounter += 1
        else:
            RecombinationRate[i] = 0.5
            insertionCounter += 1

    genomeDict = {
        'Position': genome,
        'RecombinationRate': RecombinationRate,
        'InsertionSite': InsertionSite
    }
    genome = pd.DataFrame(genomeDict)
    return (genome)

In [91]:
def recombination(genomeFrame, genomeOrg):
    # Create insertion list for the progeny
    TEprogeny = []
    TEid = []
    # Create a copy of genomeFrame
    genomeCopy = genomeFrame.copy(deep=True)
    TEid_Father = genomeOrg['TEfather'].copy()
    TEid_Mother = genomeOrg['TEmother'].copy()
    Insertion_Father = genomeOrg['Insertion_Father']
    Insertion_Mother = genomeOrg['Insertion_Mother']
    if (Insertion_Father[0] == 0 and Insertion_Mother[0] == 0):
        return ([0],
                random.choice([
                    genomeOrg['Insertion_Father'], genomeOrg['Insertion_Mother']
                ]))
    else:
        initParent = random.choice(["M", "F"])
        surrogateParent = "M" if ("M" != initParent) else "F"
        RandArray = np.random.uniform(0, 1.0, genomeFrame.shape[0])
        switch = genomeCopy['RecombinationRate'] > RandArray
        #counter = collections.Counter(switch)
        #print(counter)
        genomeCopy['Progeny'] = np.where(switch.cumsum() % 2 == 0, initParent,
                                         surrogateParent)
        if all(v == 0 for v in Insertion_Father):
            pass
        else:
            for i in Insertion_Father:
                se = genomeCopy[genomeCopy['InsertionSite'] ==
                                i]['Progeny'].values[0]
                if (se == "M"):
                    TEprogeny.append(i)
                    TEid.append(TEid_Father.pop(0))
        if all(v == 0 for v in Insertion_Mother):
            pass
        else:
            for i in Insertion_Mother:
                se = genomeCopy[genomeCopy['InsertionSite'] ==
                                i]['Progeny'].values[0]
                if (se == "F"):
                    TEprogeny.append(i)
                    TEid.append(TEid_Mother.pop(0))
        #print(genomeCopy['Progeny'].value_counts())
    if not TEprogeny:
        TEprogeny.append(0)
    if not TEid:
        TEid.append("0")
    return (TEprogeny, TEid)

In [92]:
genome = generateGenome(100000, 2000, 100)

In [106]:
k = initSim(tcount=80)
t = k.createSim()
t[0]

2019-12-04 17:22:43 siddharth-OptiPlex-7070 __main__[9343] INFO Mismatch between transposon count and selection penalties. Using default for each transposon count!
2019-12-04 17:22:43 siddharth-OptiPlex-7070 __main__[9343] INFO Mismatch between transposon count and transposition rates. Using default for each transposon count!
2019-12-04 17:22:43 siddharth-OptiPlex-7070 __main__[9343] INFO Mismatch between transposon count and transposon types. Using default for each transposon count!
2019-12-04 17:22:43 siddharth-OptiPlex-7070 __main__[9343] INFO Mismatch between transposon count and transposon parent. Using default for each transposon count!


Unnamed: 0,TID,Position,SelCo,Name,Class,Traceback,Generation,Parent,TraRate
0,e0d5e4cadf3f4d58a95e24d659c42169,947,-0.02,vagabond-zircon-ermine-of-tolerance,1,[0],1,Mother,0.02
1,c0a3aa4855c642c0a43a3f625b00b9ea,1483,-0.02,screeching-kind-uakari-of-discussion,1,[0],1,Mother,0.02
2,bed7cf5bf8f746acbc7a6e50fc0e65af,860,-0.02,quaint-mayfly-of-utter-tranquility,1,[0],1,Mother,0.02
3,8082b79ce9124bfa9719163c82866a96,299,-0.02,festive-civet-of-interesting-art,1,[0],1,Mother,0.02
4,2c45c3c98c5d417b92f5302539b02bf8,955,-0.02,quirky-analytic-echidna-of-art,1,[0],1,Mother,0.02
...,...,...,...,...,...,...,...,...,...
75,4bec87a6ebba4e0f8ded7fb90281df6a,895,-0.02,cream-buzzard-of-premium-opposition,1,[0],1,Mother,0.02
76,31f79efe574f4986bc1506eb331f9383,334,-0.02,zippy-delicate-capuchin-of-conversion,1,[0],1,Mother,0.02
77,6d4a1de23fa443c2a80fa230a55d5131,203,-0.02,funky-futuristic-snake-from-wonderland,1,[0],1,Mother,0.02
78,bd2e0234982a4df5be7a84a676124450,1494,-0.02,practical-yellow-chicken-of-reward,1,[0],1,Mother,0.02


In [96]:
t[1].iloc[3]['NetFitness']

0.6466766657424451

In [95]:
print(recombination(genome, t[1].iloc[3].copy()))

([744], ['437a72bd57ce40f3941b43950cbfc74b'])


In [110]:
def selection(transposonFrame, populationFrame, genomeFrame):
    parentFrame = populationFrame.sample(n=2, weights='NetFitness')
    fitness = (parentFrame.iloc[0]['Fitness'] +
               parentFrame.iloc[1]['Fitness']) / 2
    TIDm, alSetM = recombination(genome, parentFrame.iloc[0].copy())
    TIDf, alSetF = recombination(genome, parentFrame.iloc[1].copy())
    rowPop = pd.Series({
        'PID': uuid.uuid4().hex,
        'Fitness': fitness,
        'Name': generate_slug(),
        'Sex': 'H',
        'Lineage': ['0'],
        'Generation': 1,
        'Insertion_Father': alSetF,
        'Insertion_Mother': alSetM,
        'TEmother': TIDm,
        'TEfather': TIDf
    })

    # Check and create a random insertion event
    if (x > np.random.uniform(0, 1.0)):
        l = 1

    return (rowPop)

In [81]:
print(selection(t[1], genome))

[570]
['86c14f168c0f4170b616e156c48a0c75']
[159]
['04582b471f3f475caa27e7d538b6fe96']


NameError: name 'FitnessPen' is not defined

In [105]:
np.random.uniform(0, 1.0, 300)

array([0.98258243, 0.6239294 , 0.67071849, 0.73298315, 0.84276073,
       0.28273368, 0.29324417, 0.8304895 , 0.28059126, 0.93623879,
       0.02839499, 0.73993991, 0.68051414, 0.58861151, 0.34682212,
       0.22981469, 0.47033076, 0.23075537, 0.51657739, 0.34336473,
       0.54397561, 0.56123949, 0.87013298, 0.59463333, 0.84175716,
       0.3871603 , 0.73137642, 0.44012502, 0.61025677, 0.86206187,
       0.92389068, 0.72325832, 0.78632506, 0.25275506, 0.35963005,
       0.60196962, 0.22671567, 0.9576299 , 0.39315037, 0.33285621,
       0.60580803, 0.92783629, 0.18869488, 0.15104556, 0.19670781,
       0.03293804, 0.73215339, 0.50651387, 0.74082274, 0.93159477,
       0.02038116, 0.8342662 , 0.10116806, 0.50974547, 0.46167661,
       0.7662214 , 0.52329072, 0.50534611, 0.97768863, 0.32079169,
       0.08024903, 0.90734147, 0.74543472, 0.44333707, 0.35643321,
       0.3337663 , 0.39741735, 0.84888365, 0.55640055, 0.99591267,
       0.87390043, 0.50648394, 0.78217268, 0.53006326, 0.21034