In [1]:
import simuOpt
simuOpt.setOptions(quiet=True, optimized=True, numThreads=4)
import simuPOP as sim
import os, numpy as np, pandas as pd, collections as col
from saegus import analyze, simulate, parameters, breed, operators
from scipy import stats
import random

In [2]:
tuson = sim.loadPopulation('tuson.pop')

In [3]:
artemis = analyze.Study('artemis')

In [4]:
sim.stat(tuson, numOfSegSites=sim.ALL_AVAIL, vars=['numOfSegSites', 'segSites', 'numOfFixedSites', 'fixedSites'])
parameters.randomly_convert_fixed_sites(tuson, tuson.dvars().fixedSites)
sim.stat(tuson, numOfSegSites=sim.ALL_AVAIL, vars=['numOfSegSites', 'segSites', 'numOfFixedSites', 'fixedSites'])

In [5]:
tuson.dvars().numOfFixedSites

0

In [6]:
sim.stat(tuson, alleleFreq=sim.ALL_AVAIL)

In [7]:
sim.stat(tuson, homoFreq=sim.ALL_AVAIL)

In [8]:
sim.stat(tuson, heteroFreq=sim.ALL_AVAIL)

In [9]:
alleles  = np.array([list(tuson.dvars().alleleFreq[locus].keys()) for locus in range(tuson.totNumLoci())], dtype=np.int8)

In [10]:
alleles

array([[1, 2],
       [2, 3],
       [2, 3],
       ..., 
       [1, 2],
       [1, 3],
       [1, 3]], dtype=int8)

## Alleles for Fixed Sites chosen at random.

In [11]:
np.savetxt('alleles_of_tuson_founders.txt', alleles, fmt='%d', delimiter='\t')

In [12]:
af = analyze.allele_data(tuson, alleles, range(tuson.totNumLoci()))

In [13]:
def expanded_allele_data(pop, allele_data_structure):
    sim.stat(pop, heteroFreq=sim.ALL_AVAIL)
    hetero_frqs = np.array(list(pop.dvars().heteroFreq.values()))
    hetero_column = pd.DataFrame(hetero_frqs, columns=['heterozygote_frequency'])
    return allele_data_structure.join(hetero_column)

In [14]:
eaf = expanded_allele_data(tuson, af)

In [15]:
formed_mia = np.array(af['minor_allele'], dtype=np.int8)
formed_maj = np.array(af['major_allele'], dtype=np.int8)

In [16]:
eaf['minor_allele'] = formed_mia
eaf['major_allele'] = formed_maj

In [17]:
eaf.to_csv('expanded_tuson_founder_allele_frqs.txt', sep='\t')

In [18]:
tuson.addInfoFields(['generation', 'replicate', 'g', 'p'])

In [19]:
tuson.save('working_tuson.pop')

In [20]:
tuson.asPedigree()

In [21]:
tuson.save("tuson_pedigree.txt", infoFields=['g', 'p'], loci=sim.ALL_AVAIL)

In [22]:
tuson.popSize()

105

In [23]:
tuson.asPopulation()

## The Tuson Genetic Map

In [24]:
def parse_recombination_rates(genetic_map_filename):
    """
    Returns a list of crossover probabilities from a genetic map measured in centimorgans.
    """
    genetic_map = pd.read_csv(genetic_map_filename, sep='\t', index_col=None)
    genetic_map.drop(['locus', 'agpv2', 'namZmPRDA', 'namZmPRDS'], axis=1, inplace=True)
    genetic_map = np.array(genetic_map)
    recombination_rates = col.OrderedDict()
    for i in range(1, len(genetic_map), 1):
        if genetic_map[i-1][0] == genetic_map[i][0]:
            recombination_rates[i] = np.divide(np.abs(genetic_map[i][1] - genetic_map[i-1][1]), 100)
        elif genetic_map[i-1][0] != genetic_map[i][0]:
            recombination_rates[i] = 0.0
    recombination_rates[len(genetic_map)] = 0.0
    return list(recombination_rates.values())


In [25]:
recom_rates = parse_recombination_rates('raw_genetic_map.txt')

### Using the parameters.PopulationStructure class

In [26]:
popst = parameters.PopulationStructure(tuson, 'population_structure_matrix.xlsx', 0.01, 1.0)

In [27]:
struct_mating_probs = popst.generate_population_structure()

In [28]:
def format_mating_pmfs(population_structure_dict):
    mating_pmfs = {}
    for ind, probabilities in population_structure_dict.items():
        for i, prob in enumerate(probabilities):
            values = []
            probabilites = []
            for i, prob in enumerate(struct_mating_probs[ind]):
                values.append(i)
                probabilites.append(prob)
            pmf_values = (values, probabilites)
            mating_pmfs[ind] = stats.rv_discrete(values=pmf_values)
    return mating_pmfs

In [29]:
formed_mating_pmfs = format_mating_pmfs(struct_mating_probs)

In [30]:
def assign_primary_subpopulation(pop, struct_mating_probabilities):
    primary_subpop = {}
    for ind_id, inheritance_proportions in struct_mating_probabilities.items():
        primary_subpop[ind_id] = float(np.argmax(inheritance_proportions))
    for ind in pop.individuals():
        ind.primary = primary_subpop[ind.ind_id]

In [31]:
assign_primary_subpopulation(tuson, struct_mating_probs)

In [32]:
tuson.dvars().mating_pmfs = formed_mating_pmfs

In [33]:
pop_struct_expansion = breed.ForcedPopulationStructureParentChooser(10000, formed_mating_pmfs)

In [34]:
primary_subpopulation_splitter = sim.InfoSplitter(field='primary',
                                                  values=[0.0, 1.0, 2.0, 3.0,
                                                          4.0, 5.0])
tuson.setVirtualSplitter(primary_subpopulation_splitter)


In [35]:
tuson.numVirtualSubPop()

6

In [36]:
sim.tagID(tuson, reset=False)

In [37]:
multi_son = sim.Simulator(tuson, rep=5)

In [38]:
multi_son.evolve(
    matingScheme=sim.HomoMating(
        sim.PyParentsChooser(pop_struct_expansion.forced_structure_parent_chooser),
        sim.OffspringGenerator(ops=[sim.IdTagger(), sim.ParentsTagger(), sim.PedigreeTagger(),
                                   sim.Recombinator(recom_rates)], numOffspring=1),
            subPopSize=1000),
    gen=1
)

(1, 1, 1, 1, 1)

In [39]:
multi_son.evolve(
    matingScheme=sim.RandomMating(ops=[sim.IdTagger(), sim.ParentsTagger(), sim.PedigreeTagger(),
                                   sim.Recombinator(recom_rates)], numOffspring=1,
            subPopSize=1000),
    gen=1,
)

(1, 1, 1, 1, 1)

In [40]:
qtl = random.sample(range(100), 10)

In [41]:
additive_trait = parameters.Trait()

In [42]:
allele_effects = additive_trait.assign_allele_effects(alleles, qtl, random.expovariate, 1, multiplicity=1)

In [None]:
import importlib as imp
imp.reload(simulate)
imp.reload(operators)

In [43]:
sampling_generations = [i for i in range(2, 10, 2)]

In [44]:
sampling_generations

[2, 4, 6, 8]

In [46]:
sample_sizes = {i: 100 for i in range(11)}

In [47]:
meta_populations = {rep: [] for rep in range(5)}

In [48]:
trun = simulate.Truncation(10, 1, 1000, 0.05, 0.50, 5, 0.7, sample_sizes, 1)

In [49]:
def print_pop_sizes(multi_pop):
    for pop in multi_pop.populations():
        print(pop.popSize())

In [50]:
print_pop_sizes(multi_son)

1000
1000
1000
1000
1000


In [51]:
sample_sizes

{0: 100,
 1: 100,
 2: 100,
 3: 100,
 4: 100,
 5: 100,
 6: 100,
 7: 100,
 8: 100,
 9: 100,
 10: 100}

In [52]:
trun.replicate_selection(multi_son, meta_populations, qtl, allele_effects, recom_rates)

Initial: Sampled 100 individuals from generation 0 Replicate: 0.
Initial: Sampled 100 individuals from generation 0 Replicate: 1.
Initial: Sampled 100 individuals from generation 0 Replicate: 2.
Initial: Sampled 100 individuals from generation 0 Replicate: 3.
Initial: Sampled 100 individuals from generation 0 Replicate: 4.
Generation: 0
Generation: 0
Generation: 0
Generation: 0
Generation: 0
Generation: 1
Generation: 1
Generation: 1
Generation: 1
Generation: 1
Generation: 2
Generation: 2
Generation: 2
Generation: 2
Generation: 2
Generation: 3
Generation: 3
Generation: 3
Generation: 3
Generation: 3
Generation: 4
Generation: 4
Generation: 4
Generation: 4
Generation: 4
Generation: 5
Generation: 5
Generation: 5
Generation: 5
Generation: 5
Generation: 6
Generation: 6
Generation: 6
Generation: 6
Generation: 6
Generation: 7
Generation: 7
Generation: 7
Generation: 7
Generation: 7
Generation: 8
Generation: 8
Generation: 8
Generation: 8
Generation: 8
Generation: 9
Generation: 9
Generation: 9
Gen

In [91]:
meta_populations

{0: [<simuPOP.Population>,
  <simuPOP.Population>,
  <simuPOP.Population>,
  <simuPOP.Population>,
  <simuPOP.Population>,
  <simuPOP.Population>],
 1: [<simuPOP.Population>,
  <simuPOP.Population>,
  <simuPOP.Population>,
  <simuPOP.Population>,
  <simuPOP.Population>,
  <simuPOP.Population>],
 2: [<simuPOP.Population>,
  <simuPOP.Population>,
  <simuPOP.Population>,
  <simuPOP.Population>,
  <simuPOP.Population>,
  <simuPOP.Population>],
 3: [<simuPOP.Population>,
  <simuPOP.Population>,
  <simuPOP.Population>,
  <simuPOP.Population>,
  <simuPOP.Population>,
  <simuPOP.Population>],
 4: [<simuPOP.Population>,
  <simuPOP.Population>,
  <simuPOP.Population>,
  <simuPOP.Population>,
  <simuPOP.Population>,
  <simuPOP.Population>]}

In [76]:
sample_pop.sortIndividuals('p')

In [87]:
g_and_p = np.array((sample_pop.indInfo('ind_id'),
                    sample_pop.indInfo('replicate'),
                    sample_pop.indInfo('generation'),
                    sample_pop.indInfo('g'), 
                    sample_pop.indInfo('p'))).T

In [90]:
selected_inds = pd.DataFrame(g_and_p, columns=['ind_id', 'replicate', 'generation', 'g', 'p'])

In [92]:
for rep in range(5):
    for i in range(6):
        sim.stat(meta_populations[rep][i], alleleFreq=sim.ALL_AVAIL)

In [95]:
meta_populations[0][0].dvars().alleleFreq

{0: defdict({1: 0.345, 2: 0.655}),
 1: defdict({2: 0.19, 3: 0.81}),
 2: defdict({2: 0.94, 3: 0.06}),
 3: defdict({1: 0.06, 3: 0.94}),
 4: defdict({1: 0.73, 3: 0.27}),
 5: defdict({1: 0.96, 3: 0.04}),
 6: defdict({1: 0.185, 3: 0.815}),
 7: defdict({1: 0.12, 3: 0.88}),
 8: defdict({1: 0.13, 3: 0.87}),
 9: defdict({2: 0.85, 3: 0.15}),
 10: defdict({1: 0.55, 3: 0.45}),
 11: defdict({1: 0.45, 2: 0.55}),
 12: defdict({1: 0.08, 3: 0.92}),
 13: defdict({1: 0.88, 3: 0.12}),
 14: defdict({1: 0.99, 3: 0.01}),
 15: defdict({0: 0.005, 3: 0.995}),
 16: defdict({0: 0.01, 1: 0.99}),
 17: defdict({1: 0.95, 2: 0.05}),
 18: defdict({2: 0.125, 3: 0.875}),
 19: defdict({1: 0.01, 2: 0.99}),
 20: defdict({1: 0.28, 3: 0.72}),
 21: defdict({1: 0.61, 3: 0.39}),
 22: defdict({1: 0.195, 3: 0.805}),
 23: defdict({1: 0.015, 3: 0.985}),
 24: defdict({1: 0.995, 3: 0.005}),
 25: defdict({1: 0.28, 3: 0.72}),
 26: defdict({1: 0.215, 3: 0.785}),
 27: defdict({1: 0.005, 3: 0.995}),
 28: defdict({1: 0.09, 3: 0.91}),
 29: d

In [98]:
tuson

<simuPOP.Pedigree>

In [111]:
import importlib as imp
imp.reload(analyze)

<module 'saegus.analyze' from 'C:\\Anaconda3\\lib\\site-packages\\saegus\\analyze.py'>

In [106]:
meta_populations[0][0].dvars().alleleFreq

{0: defdict({1: 0.345, 2: 0.655}),
 1: defdict({2: 0.19, 3: 0.81}),
 2: defdict({2: 0.94, 3: 0.06}),
 3: defdict({1: 0.06, 3: 0.94}),
 4: defdict({1: 0.73, 3: 0.27}),
 5: defdict({1: 0.96, 3: 0.04}),
 6: defdict({1: 0.185, 3: 0.815}),
 7: defdict({1: 0.12, 3: 0.88}),
 8: defdict({1: 0.13, 3: 0.87}),
 9: defdict({2: 0.85, 3: 0.15}),
 10: defdict({1: 0.55, 3: 0.45}),
 11: defdict({1: 0.45, 2: 0.55}),
 12: defdict({1: 0.08, 3: 0.92}),
 13: defdict({1: 0.88, 3: 0.12}),
 14: defdict({1: 0.99, 3: 0.01}),
 15: defdict({0: 0.005, 3: 0.995}),
 16: defdict({0: 0.01, 1: 0.99}),
 17: defdict({1: 0.95, 2: 0.05}),
 18: defdict({2: 0.125, 3: 0.875}),
 19: defdict({1: 0.01, 2: 0.99}),
 20: defdict({1: 0.28, 3: 0.72}),
 21: defdict({1: 0.61, 3: 0.39}),
 22: defdict({1: 0.195, 3: 0.805}),
 23: defdict({1: 0.015, 3: 0.985}),
 24: defdict({1: 0.995, 3: 0.005}),
 25: defdict({1: 0.28, 3: 0.72}),
 26: defdict({1: 0.215, 3: 0.785}),
 27: defdict({1: 0.005, 3: 0.995}),
 28: defdict({1: 0.09, 3: 0.91}),
 29: d

In [107]:
alleles

array([[1, 2],
       [2, 3],
       [2, 3],
       ..., 
       [1, 2],
       [1, 3],
       [1, 3]], dtype=int8)

In [228]:
loci = range(44445)

In [178]:
minor_alleles = []
minor_frqs = []

In [171]:
revaf

{0: {0.345: 1, 0.655: 2},
 1: {0.19: 2, 0.81: 3},
 2: {0.06: 3, 0.94: 2},
 3: {0.06: 1, 0.94: 3},
 4: {0.27: 3, 0.73: 1},
 5: {0.04: 3, 0.96: 1},
 6: {0.185: 1, 0.815: 3},
 7: {0.12: 1, 0.88: 3},
 8: {0.13: 1, 0.87: 3},
 9: {0.15: 3, 0.85: 2},
 10: {0.45: 3, 0.55: 1},
 11: {0.45: 1, 0.55: 2},
 12: {0.08: 1, 0.92: 3},
 13: {0.12: 3, 0.88: 1},
 14: {0.01: 3, 0.99: 1},
 15: {0.005: 0, 0.995: 3},
 16: {0.01: 0, 0.99: 1},
 17: {0.05: 2, 0.95: 1},
 18: {0.125: 2, 0.875: 3},
 19: {0.01: 1, 0.99: 2},
 20: {0.28: 1, 0.72: 3},
 21: {0.39: 3, 0.61: 1},
 22: {0.195: 1, 0.805: 3},
 23: {0.015: 1, 0.985: 3},
 24: {0.005: 3, 0.995: 1},
 25: {0.28: 1, 0.72: 3},
 26: {0.215: 1, 0.785: 3},
 27: {0.005: 1, 0.995: 3},
 28: {0.09: 1, 0.91: 3},
 29: {0.005: 0, 0.995: 3},
 30: {0.1: 1, 0.9: 3},
 31: {0.105: 1, 0.895: 3},
 32: {0.28: 1, 0.72: 3},
 33: {0.18: 1, 0.82: 3},
 34: {0.275: 1, 0.725: 3},
 35: {0.045: 1, 0.955: 2},
 36: {0.37: 3, 0.63: 1},
 37: {0.41: 3, 0.59: 1},
 38: {0.365: 1, 0.635: 3},
 39: {0.0

In [179]:
for locus in loci:
    min_index = np.argmin(list(revaf[locus].keys()))
    min_allele = alleles[locus, np.argmin(list(example_pop.dvars().alleleFreq[locus].values()))]
    allelefr = example_pop.dvars().alleleFreq[locus][min_allele]
    minor_alleles.append(revaf[locus][example_pop.dvars().alleleFreq[locus][min_allele]])
    minor_frqs.append(allelefr)

In [180]:
max(minor_frqs)

1.0

In [167]:
minor = np.array((minor_alleles, minor_frqs)).T

In [149]:
example_pop.dvars().alleleFreq[1]

defdict({2: 0.19, 3: 0.81})

In [155]:
min_index = np.argmin(list(revaf[2].keys()))

In [156]:
min_index

1

In [151]:
np.argmin(list(example_pop.dvars().alleleFreq[1].values()))

0

In [153]:
alleles[1, np.argmin(list(example_pop.dvars().alleleFreq[1].values()))]

2

In [None]:
bnp.argmin()

In [134]:
for locus in loci:
    temp_frq = []
    for allele in alleles[locus]:
        temp_frq.append(example_pop.dvars().alleleFreq[locus][allele])
    
    allele_frq[0, locus] = temp_frq

ValueError: setting an array element with a sequence.

In [126]:
revaf

{0: {0.345: 1, 0.655: 2},
 1: {0.19: 2, 0.81: 3},
 2: {0.06: 3, 0.94: 2},
 3: {0.06: 1, 0.94: 3},
 4: {0.27: 3, 0.73: 1},
 5: {0.04: 3, 0.96: 1},
 6: {0.185: 1, 0.815: 3},
 7: {0.12: 1, 0.88: 3},
 8: {0.13: 1, 0.87: 3},
 9: {0.15: 3, 0.85: 2},
 10: {0.45: 3, 0.55: 1},
 11: {0.45: 1, 0.55: 2},
 12: {0.08: 1, 0.92: 3},
 13: {0.12: 3, 0.88: 1},
 14: {0.01: 3, 0.99: 1},
 15: {0.005: 0, 0.995: 3},
 16: {0.01: 0, 0.99: 1},
 17: {0.05: 2, 0.95: 1},
 18: {0.125: 2, 0.875: 3},
 19: {0.01: 1, 0.99: 2},
 20: {0.28: 1, 0.72: 3},
 21: {0.39: 3, 0.61: 1},
 22: {0.195: 1, 0.805: 3},
 23: {0.015: 1, 0.985: 3},
 24: {0.005: 3, 0.995: 1},
 25: {0.28: 1, 0.72: 3},
 26: {0.215: 1, 0.785: 3},
 27: {0.005: 1, 0.995: 3},
 28: {0.09: 1, 0.91: 3},
 29: {0.005: 0, 0.995: 3},
 30: {0.1: 1, 0.9: 3},
 31: {0.105: 1, 0.895: 3},
 32: {0.28: 1, 0.72: 3},
 33: {0.18: 1, 0.82: 3},
 34: {0.275: 1, 0.725: 3},
 35: {0.045: 1, 0.955: 2},
 36: {0.37: 3, 0.63: 1},
 37: {0.41: 3, 0.59: 1},
 38: {0.365: 1, 0.635: 3},
 39: {0.0

In [127]:
allele_frq = np.zeros((5, 44445))

In [None]:
for locus in loci:
    

In [120]:
example_pop = meta_populations[0][0]

In [229]:

af = analyze.allele_data(meta_populations[0][0], alleles, range(44445))

In [234]:
generations = [i for i in range(0, 11, 2)]

In [235]:
af_storage = pd.HDFStore('tuson_01_allele_frequencies.h5')

In [238]:
for rep in range(5):
    for i, gen in enumerate(generations):
        af_storage.put('/'+str(rep)+'/'+str(gen), analyze.allele_data(meta_populations[rep][i], alleles, range(44445)))



In [239]:
af_storage.close()