In [1]:
import simuOpt
simuOpt.setOptions(quiet=True, optimized=True, numThreads=4)
import simuPOP as sim
import os, numpy as np, pandas as pd, collections as col
from saegus import analyze, simulate, parameters, breed
from scipy import stats
import random

In [2]:
tuson = sim.loadPopulation('tuson.pop')

In [3]:
artemis = analyze.Study('artemis')

In [4]:
sim.stat(tuson, numOfSegSites=sim.ALL_AVAIL, vars=['numOfSegSites', 'segSites', 'numOfFixedSites', 'fixedSites'])
parameters.randomly_convert_fixed_sites(tuson, tuson.dvars().fixedSites)
sim.stat(tuson, numOfSegSites=sim.ALL_AVAIL, vars=['numOfSegSites', 'segSites', 'numOfFixedSites', 'fixedSites'])

In [5]:
tuson.dvars().numOfFixedSites

0

In [6]:
sim.stat(tuson, alleleFreq=sim.ALL_AVAIL)

In [7]:
sim.stat(tuson, homoFreq=sim.ALL_AVAIL)

In [8]:
sim.stat(tuson, heteroFreq=sim.ALL_AVAIL)

In [13]:
alleles  = np.array([list(tuson.dvars().alleleFreq[locus].keys()) for locus in range(tuson.totNumLoci())], dtype=np.int8)

In [14]:
alleles

array([[1, 2],
       [2, 3],
       [2, 3],
       ..., 
       [1, 2],
       [1, 3],
       [1, 3]], dtype=int8)

## Alleles for Fixed Sites chosen at random.

In [15]:
np.savetxt('alleles_of_tuson_founders.txt', alleles, fmt='%d', delimiter='\t')

In [16]:
af = analyze.allele_data(tuson, alleles, range(tuson.totNumLoci()))

In [17]:
def expanded_allele_data(pop, allele_data_structure):
    sim.stat(pop, heteroFreq=sim.ALL_AVAIL)
    hetero_frqs = np.array(list(pop.dvars().heteroFreq.values()))
    hetero_column = pd.DataFrame(hetero_frqs, columns=['heterozygote_frequency'])
    return allele_data_structure.join(hetero_column)

In [18]:
eaf = expanded_allele_data(tuson, af)

In [20]:
eaf.to_csv("example_expanded_allele_frequency.txt", sep='\t')

In [21]:
formed_mia = np.array(af['minor_allele'], dtype=np.int8)
formed_maj = np.array(af['major_allele'], dtype=np.int8)

In [22]:
af['minor_allele'] = formed_mia
af['major_allele'] = formed_maj

In [23]:
af

Unnamed: 0,minor_allele,minor_frequency,major_allele,major_frequency
0,1,0.319048,2,0.680952
1,2,0.219048,3,0.780952
2,3,0.061905,2,0.938095
3,1,0.061905,3,0.938095
4,3,0.309524,1,0.690476
5,3,0.052381,1,0.947619
6,1,0.204762,3,0.795238
7,1,0.128571,3,0.871429
8,1,0.133333,3,0.866667
9,3,0.180952,2,0.819048


In [24]:
af.to_csv('tuson_founder_allele_frqs.txt', sep='\t')

In [25]:
eaf

Unnamed: 0,minor_allele,minor_frequency,major_allele,major_frequency,heterozygote_frequency
0,1.0,0.319048,2.0,0.680952,0.371429
1,2.0,0.219048,3.0,0.780952,0.266667
2,3.0,0.061905,2.0,0.938095,0.104762
3,1.0,0.061905,3.0,0.938095,0.104762
4,3.0,0.309524,1.0,0.690476,0.619048
5,3.0,0.052381,1.0,0.947619,0.085714
6,1.0,0.204762,3.0,0.795238,0.314286
7,1.0,0.128571,3.0,0.871429,0.200000
8,1.0,0.133333,3.0,0.866667,0.209524
9,3.0,0.180952,2.0,0.819048,0.266667


In [None]:
tuson.dvars().homoFreq

In [26]:
tuson.infoFields()

('ind_id',
 'father_id',
 'mother_id',
 'female_fitness',
 'male_fitness',
 'primary',
 'father_idx',
 'mother_idx')

In [27]:
tuson.addInfoFields(['generation', 'g', 'p'])

In [28]:
tuson.save('working_tuson.pop')

In [29]:
tuson.popSize()

105

## The Tuson Genetic Map

In [30]:
def parse_recombination_rates(genetic_map_filename):
    """
    Returns a list of crossover probabilities from a genetic map measured in centimorgans.
    """
    genetic_map = pd.read_csv(genetic_map_filename, sep='\t', index_col=None)
    genetic_map.drop(['locus', 'agpv2', 'namZmPRDA', 'namZmPRDS'], axis=1, inplace=True)
    genetic_map = np.array(genetic_map)
    recombination_rates = col.OrderedDict()
    for i in range(1, len(genetic_map), 1):
        if genetic_map[i-1][0] == genetic_map[i][0]:
            recombination_rates[i] = np.divide(np.abs(genetic_map[i][1] - genetic_map[i-1][1]), 100)
        elif genetic_map[i-1][0] != genetic_map[i][0]:
            recombination_rates[i] = 0.0
    recombination_rates[len(genetic_map)] = 0.0
    return list(recombination_rates.values())


In [31]:
recom_rates = parse_recombination_rates('raw_genetic_map.txt')

In [32]:
recom_rates

[0.0020926625899999962,
 2.2615580000007186e-05,
 0.00042822784999999361,
 0.00031254837999999729,
 0.0014689310100000075,
 0.00020776456000000111,
 0.0012046017399999975,
 0.0004001773199999992,
 0.0023329853400000022,
 0.00084844494999999573,
 0.00020627060000000697,
 0.0034117589199999989,
 0.00055784244999999898,
 1.7850339999991859e-05,
 0.0021015491200000016,
 0.0015734830000000023,
 0.0040656016799999993,
 0.00026672478000000055,
 3.0703610000002435e-05,
 0.0001470012899999995,
 3.271269999998161e-06,
 4.3016000000006823e-06,
 5.2572210000003671e-05,
 4.1393219999998899e-05,
 0.005212608379999999,
 0.00023272412999999936,
 0.0047767046199999989,
 0.0042987637000000013,
 2.5242900000002065e-05,
 2.2332239999995896e-05,
 0.0025030401100000032,
 0.00052706154999999867,
 0.001750234850000001,
 0.00018942481999999927,
 0.00069979514999999996,
 0.0046152015499999997,
 3.9796209999999333e-05,
 0.001775915640000001,
 2.8127809999999308e-05,
 8.8994119999999949e-05,
 2.3671659999999762e-

### Using the parameters.PopulationStructure class

In [33]:
popst = parameters.PopulationStructure(tuson, 'population_structure_matrix.xlsx', 0.01, 1.0)

In [34]:
struct_mating_probs = popst.generate_population_structure()

In [35]:
def format_mating_pmfs(population_structure_dict):
    mating_pmfs = {}
    for ind, probabilities in population_structure_dict.items():
        for i, prob in enumerate(probabilities):
            values = []
            probabilites = []
            for i, prob in enumerate(struct_mating_probs[ind]):
                values.append(i)
                probabilites.append(prob)
            pmf_values = (values, probabilites)
            mating_pmfs[ind] = stats.rv_discrete(values=pmf_values)
    return mating_pmfs

In [36]:
formed_mating_pmfs = format_mating_pmfs(struct_mating_probs)

In [37]:
def assign_primary_subpopulation(pop, struct_mating_probabilities):
    primary_subpop = {}
    for ind_id, inheritance_proportions in struct_mating_probabilities.items():
        primary_subpop[ind_id] = float(np.argmax(inheritance_proportions))
    for ind in pop.individuals():
        ind.primary = primary_subpop[ind.ind_id]

In [38]:
assign_primary_subpopulation(tuson, struct_mating_probs)

In [39]:
tuson.dvars().mating_pmfs = formed_mating_pmfs

In [40]:
pop_struct_expansion = breed.ForcedPopulationStructureParentChooser(10000, formed_mating_pmfs)

In [41]:
primary_subpopulation_splitter = sim.InfoSplitter(field='primary',
                                                  values=[0.0, 1.0, 2.0, 3.0,
                                                          4.0, 5.0])
tuson.setVirtualSplitter(primary_subpopulation_splitter)


In [42]:
sim.tagID(tuson, reset=False)

In [43]:
multi_son = sim.Simulator(tuson, rep=5)

In [44]:
multi_son.evolve(
    matingScheme=sim.HomoMating(
        sim.PyParentsChooser(pop_struct_expansion.forced_structure_parent_chooser),
        sim.OffspringGenerator(ops=[sim.IdTagger(), sim.ParentsTagger(), sim.PedigreeTagger(),
                                   sim.Recombinator(recom_rates)], numOffspring=1),
            subPopSize=10000),
    gen=1
)

(1, 1, 1, 1, 1)

In [45]:
multi_son.evolve(
    matingScheme=sim.RandomMating(ops=[sim.IdTagger(), sim.ParentsTagger(), sim.PedigreeTagger(),
                                   sim.Recombinator(recom_rates)], numOffspring=1,
            subPopSize=10000),
    gen=1,
)

(1, 1, 1, 1, 1)

In [46]:
for pop in multi_son.populations():
    print(pop.popSize())

10000
10000
10000
10000
10000


In [None]:
number_of_replicates = 5
sample_sizes = [250, 500, 750]

In [None]:
run_id = 'artemis'    

In [None]:
artemis = analyze.Study(run_id)

In [None]:
sample_library = artemis.collect_samples(multi_son, sample_sizes)

In [None]:
sample_library

In [None]:
artemis.save_sample_populations(sample_library)

In [None]:
artemis.store_allele_frequencies(sample_library, alleles)

In [None]:
tuson.popSize()