In [1]:
import pytest
import simuOpt
simuOpt.setOptions(alleleType='short', quiet=True)
import simuPOP as sim
import pandas as pd
from saegus import breed, operators, simulate, analyze, parse, parameters
import shelve
import numpy as np
import random
np.set_printoptions(suppress=True, precision=3)

In [2]:

pf_map = shelve.open('pf_map')
misc_gmap = shelve.open('misc_gmap')
uniparams = shelve.open('uniparams')

locus_names = uniparams['locus_names']
pos_column = uniparams['pos_column']
allele_names = uniparams['allele_names']
snp_to_integer = uniparams['snp_to_integer']
integer_to_snp = uniparams['integer_to_snp']

alleles = misc_gmap['alleles']
chr_cM_positions = misc_gmap['chr_cM_positions']
cM_positions = misc_gmap['cM_positions']
integral_valued_loci = misc_gmap['integral_valued_loci']
relative_integral_valued_loci = misc_gmap['relative_integral_valued_loci']
recombination_rates = misc_gmap['recombination_rates']






In [3]:
list(uniparams.keys())

['meta_pop_sample_sizes',
 'allele_effect_parameters',
 'snp_to_integer',
 'proportion_of_individuals_saved',
 'number_of_qtl',
 'generations_of_random_mating',
 'number_of_replicates',
 'heritability',
 'integer_to_snp',
 'generations_of_selection',
 'founders',
 'allele_names',
 'prefounder_file_name',
 'individuals_per_breeding_subpop',
 'overshoot_as_proportion',
 'operating_population_size',
 'locus_names',
 'pos_column']

In [4]:
uniparams['founders'] = [[1, 2], [3, 4], [5, 6], [7, 8]]

In [5]:
uniparams['founders']

[[1, 2], [3, 4], [5, 6], [7, 8]]

In [6]:
uniparams['operating_population_size']

2000

In [7]:
offspring_per_founder = 2000/8

In [8]:
nam = sim.loadPopulation(uniparams['prefounder_file_name'])
sim.tagID(nam, reset=True)
nam.setSubPopName('maize_nam_prefounders', 0)

"""
selection_statistics = {
    'aggregate': {},
    'selected': {},
    'non-selected': {}
}
"""
ind_names_for_gwas = {i: {} for i in range(uniparams[
    'number_of_replicates'])}
uniparams['meta_pop_sample_sizes'] = {i: 100 for i in
                                      range(0, uniparams['generations_of_selection'] + 1, 2)
                                      }

s = simulate.Truncation(uniparams['generations_of_selection'],
                       uniparams['generations_of_random_mating'],
                       uniparams['operating_population_size'],
                        uniparams[
                            'proportion_of_individuals_saved'],
                       uniparams['overshoot_as_proportion'],
                   uniparams['individuals_per_breeding_subpop'],
                       uniparams['heritability'],
                       uniparams['meta_pop_sample_sizes'],
                       uniparams['number_of_replicates'])

ind_names_for_gwas = {i: {} for i in range(uniparams[
    'number_of_replicates'])}

founders = uniparams['founders']
replicated_nam = sim.Simulator(nam, rep=2, stealPops=False)
pop = replicated_nam.extract(0)

assert pop.popSize() == 26, "Population is too large."

s.generate_f_one(pop, recombination_rates, founders, 500)


Generation: 0


In [16]:
mother_ids = [random.choice(pop.indInfo('ind_id')) for i in range(2000)]
father_ids = [random.choice(pop.indInfo('ind_id')) for i in range(2000)]

In [21]:
mother_ids == father_ids

False

In [24]:
breeding_parameters = dict(offspring_per_founder_pair=uniparams['operating_population_size']/(len(founders)))

In [26]:
breeding_parameters

{'offspring_per_founder_pair': 500.0}

In [34]:
expected_f_one_mother_ids = [1.0 for i in range(500)] + [3.0 for i in range(500)] + [5.0 for i in range(500)] + [7.0 for i in range(500)]

In [30]:
mothers == list(pop.indInfo('mother_id'))

True

In [32]:
sum(np.equal(mothers, list(pop.indInfo('mother_id'))))

2000

In [33]:
observed_f_one_mother_ids = list(pop.indInfo('mother_id'))

In [35]:
breeding_parameters['expected_f_one_mother_ids'] = expected_f_one_mother_ids
breeding_parameters['observed_f_one_mother_ids'] = observed_f_one_mother_ids
breeding_parameters['number_of_matches_f_one_mother_ids'] = sum(np.equal(expected_f_one_mother_ids, observed_f_one_mother_ids))

In [37]:
expected_f_one_father_ids = [2.0 for i in range(500)] + [4.0 for i in range(500)] + [6.0 for i in range(500)] + [8.0 for i in range(500)]

In [38]:
observed_f_one_father_ids = list(pop.indInfo('father_id'))

In [39]:
breeding_parameters['expected_f_father_mother_ids'] = expected_f_one_mother_ids
breeding_parameters['observed_f_father_mother_ids'] = observed_f_one_mother_ids
breeding_parameters['number_of_matches_f_one_father_ids'] = sum(np.equal(expected_f_one_mother_ids, observed_f_one_mother_ids))

In [40]:
breeding_parameters['number_of_matches_f_one_father_ids']

2000

In [9]:
pop.popSize()

2000

In [43]:
expected_f_two_mother_ids = [random.choice(pop.indInfo('ind_id')) for i in range(uniparams['operating_population_size'])]
expected_f_two_father_ids = [random.choice(pop.indInfo('ind_id')) for i in range(uniparams['operating_population_size'])]

### Split the Offspring for Convienience

In [71]:
pop.splitSubPop(0, [1000] * 2)

(0, 1)

In [72]:
pop.subPopSizes()

(1000, 1000)

In [50]:
first_sp_mothers = [random.choice(pop.indInfo('ind_id', 0)) for i in range(1000)]
first_sp_fathers = [random.choice(pop.indInfo('ind_id', 0)) for i in range(1000)]

second_sp_mothers = [random.choice(pop.indInfo('ind_id', 0)) for i in range(1000, 2000)]
second_sp_fathers = [random.choice(pop.indInfo('ind_id', 0)) for i in range(1000, 2000)]

### Merge Offspring After Choosing Parents

In [73]:
pop.mergeSubPops()

0

In [74]:
pop.subPopSizes()

(2000,)

In [56]:
expected_f_two_mother_ids = first_sp_mothers + second_sp_mothers
expected_f_two_father_ids = first_sp_fathers + second_sp_fathers

In [75]:
breeding_parameters['expected_f_two_mother_ids'] = expected_f_two_mother_ids
breeding_parameters['expected_f_two_father_ids'] = expected_f_two_father_ids

In [11]:

assert pop.popSize() == 2000, "Population should have size: {} after the F_1 mating " \
                                           "procedure." \
                                           "".format(len(founders) * 500)

#pop.splitSubPop(0, [100] * 4)
#subpop_list = list(range(pop.numSubPop()))

#intmd_os_struct = s.restructure_offspring(pop, 100, 4)
snd_order = breed.SecondOrderPairIDChooser(intmd_os_struct, 1)



ValueError: population.cpp: 1166 Sum of parameter sizes should be 1 or the size of subpopulation 0

In [None]:
pop.evolve(
    preOps=[sim.MergeSubPops()],
    matingScheme=sim.HomoMating(
        sim.PyParentsChooser(snd_order.snd_ord_id_pairs),
        sim.OffspringGenerator(ops=[
            sim.IdTagger(),
            sim.ParentsTagger(),
            sim.PedigreeTagger(),
            sim.Recombinator(rates=recombination_rates)
        ],
            numOffspring=1),
        subPopSize=[200],
    ),
    gen=1,
)

assert pop.popSize() == 1, "Population does not have correct size after second round of mating."

second_intmd_os_struct = s.restructure_offspring(pop, 100, 2)
third_order = breed.SecondOrderPairIDChooser(second_intmd_os_struct, 1)


pop.evolve(
    preOps=[sim.MergeSubPops()],
    matingScheme=sim.HomoMating(
        sim.PyParentsChooser(third_order.snd_ord_id_pairs),
        sim.OffspringGenerator(ops=[
            sim.IdTagger(),
            sim.ParentsTagger(),
            sim.PedigreeTagger(),
            sim.Recombinator(rates=recombination_rates)
        ],
            numOffspring=1),
        subPopSize=[100],
    ),
    gen=1,
)

assert pop.popSize() == 100, "Second merge of breeding sub-populations. Offspring population does not have " \
                             "correct size"


In [None]:
intmd_os_struct[0][0]

In [None]:
pop.popSize()

In [None]:
pop.numSubPop()

In [None]:
pop.subPopSizes()

In [None]:
expected_mother_ids = np.append(intmd_os_struct[0, 0], intmd_os_struct[1, 0])

In [None]:
expected_mother_ids.shape

In [None]:
expected_father_ids = intmd_os_struct[0, 1]

In [None]:
brcancer_data = pd.read_csv('hedenfalk_brcancer_data.txt', sep='\t')

In [None]:
brcancer_data.ix[0, :]