In [1]:
import simuOpt
simuOpt.setOptions(alleleType='lineage', quiet=True)
import simuPOP as sim
import simuPOP.utils
import simuPOP.lutils
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import math
import pandas as pd
import numpy as np
import random
import datetime as dt
import itertools as ite
import seaborn as sns
sns.set(context='paper', style='ticks', palette="Set2", font='sans-serif')
nucleotides = ['A','C','G','T','D','I']
nucleotideTranslator = {
    'A':0,
    'C':1,
    'G':2,
    'T':3,
    '-':4,
    '+':5}
from wgs import breed, operators, parameterizer, parser
parse = parser.Parser()
param = parameterizer.Parameterizer()
f1Crosses = breed.fOne()
f2Crosses = breed.fTwo()
qtl = parameterizer.QTL()
ae = parameterizer.AE()
ld = parameterizer.LD()
mavs = pd.DataFrame()
def preparePopulation():
    '''Simulation uses genetic maps from the NAM population.'''
    N = 26 # There are 26 founders in the NAM population
    # hapmap3.txt is not an actual 'HapMap'. Poor naming choice
    header, mapdata, mapsubset=parse.mapParser('hapmap3.txt') 
    ends, chromdistances = parse.endPtFinder(mapsubset)
    founder_population=sim.Population(N, loci=chromdistances, 
                                 alleleNames=nucleotides, 
                                 infoFields=['ind_id',
                                            'father_id','mother_id',
                                            'ge','pe','fitness'])
    # neat.txt is a formatted output of hapmap3.txt. It is redundant
    # and I need to clean this up some day.
    param.genotypeSetter(founder_population,'neat.txt')
    # defaults to founders 1 and 6
    f1 = f1Crosses.fOneCross(founder_population, size=1, female_idx=1, male_idx=6)

    M = 2000 # f2 population size
    
    f2 = f2Crosses.fTwoCross(f1, M)
    # defaults to choosing qtl from segregating sites only
    absQTL, propQTL = qtl.segChooser(f2, 20)
    f2.dvars().qtl = absQTL
    f2.dvars().properQTL = propQTL
    alleleEffects, plotAlleleEffects = ae.exponential(f2, 1)
    f2.dvars().alleleEffects = alleleEffects
    f2.dvars().plottingAlleleEffects = plotAlleleEffects
    sim.tagID(f2, reset=True)
    # Parameters that are not used at this time. Will be integrated in
    # later versions of WGS.
    
    #selection_intensity = .05
    #f2.dvars().selectionIntensity = selection_intensity
    #number_of_selected = f2.popSize() * selection_intensity
    #remainder = f2.popSize() - number_of_selected
    #number_of_breeding_subpops = int(number_of_selected / 5)
    #list_of_applicable_subpops = list(range(1, number_of_breeding_subpops))
    exec('import pandas as pd', f2.vars(), f2.vars())
    exec('import itertools as ite', f2.vars(), f2.vars())
    exec('import datetime as dt', f2.vars(), f2.vars())
    exec('import matplotlib.pyplot as plt', f2.vars(), f2.vars())
    # clone() has to be used here because it is a local variable
    return f2.clone()

In [2]:
f2 = preparePopulation()

In [3]:
def configureEvolution(pop, gens_to_evolve, initial_pop_size, proportion_selected, 
            overshoot_as_proportion,
            individuals_per_breeding_subpop=5):
    """         *Notation n_* means 'number of*
    pop = sim.Population
    gens_to_evolve = evolves population for this number of generations
    initial_pop_size = evolution usually begins with the output of an
        f2 cross/random mating
    proportion_selected = proportion of individuals to save at top of phenotypic
        distribution (flexibility will be added in later versions).
    overshoot_as_proportion = current breeding scheme involves randomly
        selecting a subset of individuals from a larger pool of individuals
    individuals_per_breeding_subpop = splits breeding_subpop into groups
        of this many individuals
    
    Calculations of determined parameters from independent parameters.
    I no longer have to specify 
    '"""
    n_of_breeding_individuals = int(proportion_selected * initial_pop_size)
    total_n_individuals_bred = int(initial_pop_size * (1 + overshoot_as_proportion))
    n_of_breeding_subpops = int(initial_pop_size / n_of_breeding_individuals)
    n_offspring_per_female =\
         int(total_n_individuals_bred / \
             (individuals_per_breeding_subpop*n_of_breeding_subpops))
    n_bred_per_subpop = int(n_offspring_per_female*individuals_per_breeding_subpop)
    n_of_nonbreeding_individuals = int(initial_pop_size - n_of_breeding_individuals)
    n_of_individuals_removed = int(overshoot_as_proportion*initial_pop_size)
    
    pop.dvars().proportion_selected = proportion_selected
    pop.dvars().individuals_per_breeding_subpop = individuals_per_breeding_subpop
    pop.dvars().n_of_breeding_subpops = n_of_breeding_subpops
    pop.dvars().n_of_nonbreeding_individuals = n_of_nonbreeding_individuals
    pop.dvars().n_bred_per_subpop = n_bred_per_subpop
    pop.dvars().n_of_individuals_removed = n_of_individuals_removed
    
    pc = breed.PC(individuals_per_breeding_subpop, n_offspring_per_female)
    
    pop.evolve(
        initOps=[
        operators.InitGenerationToZero(),
        operators.InitializeMultiGenDataMatrix([
                                      'smean_ge','svar_ge','smean_pe',
                                      'svar_pe','umean_ge','uvar_ge',
                                      'umean_pe','uvar_pe']),
        operators.ParameterizationWriter(dt.datetime.now(),
                                         'allelic-effects',
                                         'for-plot-allelic-effects',
                                         'improper-qtl',
                                         'proper-qtl'),
        sim.Stat(popSize=True),
        sim.PyEval(r"'Gen\tMeanGe\tVarGe\tMeanPe\tVarPe\tSubPopulations\n'"),
        ],
        preOps=[
            operators.GenotypicEffectCalculator(g_effect_field='ge'),
            operators.TruncationSelection(0.7,proportion_selected, infoFields=['ge','pe']),
            sim.Stat(meanOfInfo=['ge','pe'], varOfInfo=['ge','pe']),
            sim.Stat(alleleFreq=sim.ALL_AVAIL),
            #sim.Stat(numOfSegSites=sim.ALL_AVAIL),
            #sim.Stat(effectiveSize=sim.ALL_AVAIL),
            #sim.Stat(neutrality=sim.ALL_AVAIL),
            sim.Stat(structure=sim.ALL_AVAIL, vars=['F_st']),
            operators.GenerateMinorAlleleList(),
            operators.MinorAlleleCounter(),
            sim.PyEval(r"'\n%d\t%.3f\t%.3f\t' % (gen, meanOfInfo['ge'], varOfInfo['ge'])"),
            sim.PyEval(r"'%.3f\t%.3f\t%s\n' % (meanOfInfo['pe'], varOfInfo['pe'], subPopSize)"),
            #operators.UniqueFileWriter(dt.datetime.now(), 'alleleFrequencies'),
            operators.Sorter('pe'),
            sim.SplitSubPops(sizes=[individuals_per_breeding_subpop]*n_of_breeding_subpops\
                            +[n_of_nonbreeding_individuals], randomize=False),
            operators.CalcSplitMavs('ge','pe','smean_ge','svar_ge','smean_pe','svar_pe',
                'umean_ge','uvar_ge','umean_pe','uvar_pe'),
            ],
        matingScheme=
                sim.HomoMating(
                    sim.PyParentsChooser(pc.recursivePairwiseParentChooser),
                    sim.OffspringGenerator(ops=[
                        sim.Recombinator(rates=0.01),
                        sim.IdTagger(),
                        sim.PedigreeTagger()],
                        numOffspring=1),
                    subPopSize=[n_bred_per_subpop]*n_of_breeding_subpops+[0],
                    subPops=list(range(1,n_of_breeding_subpops)),
                    ),
        postOps=[
            sim.MergeSubPops(),
            operators.SelectRandomSeed(n_of_individuals_removed),
            ],
    gen=gens_to_evolve)

In [5]:
configureEvolution(f2, 5, 2000, 0.05, 
            0.50,
            individuals_per_breeding_subpop=5)

Gen	MeanGe	VarGe	MeanPe	VarPe	SubPopulations

0	133.701	47.173	132.934	466.782	[2000]

1	138.500	41.553	138.424	347.596	[2000]

2	142.171	29.846	142.168	195.011	[2000]

3	146.127	18.108	146.190	76.408	[2000]

4	149.801	12.402	149.714	39.428	[2000]


In [6]:
f2.dvars().mavs

Unnamed: 0,smean_ge,svar_ge,smean_pe,svar_pe,umean_ge,uvar_ge,umean_pe,uvar_pe
0,138.585875,44.497755,195.70341,31.003474,138.850342,114.582525,189.009337,0.432937
1,142.139838,19.995224,188.694422,6.262716,145.622255,1.942393,184.331668,0.555896
2,148.396361,6.474507,181.342447,1.359502,145.424744,10.855235,178.865578,0.542155
3,151.799147,7.37974,173.375101,3.826812,150.8072,17.754763,169.547915,0.92218
4,154.393234,7.492218,168.398954,2.135985,152.969812,4.564765,165.795816,0.231533
