In [1]:
import simuOpt
simuOpt.setOptions(quiet=True, optimized=True, numThreads=4)
import simuPOP as sim
import os, numpy as np, pandas as pd, collections as col
from saegus import analyze, simulate, parameters, breed
from scipy import stats
import random

In [2]:
tuson = sim.loadPopulation('tuson.pop')

In [3]:
artemis = analyze.Study('artemis')

In [4]:
sim.stat(tuson, numOfSegSites=sim.ALL_AVAIL, vars=['numOfSegSites', 'segSites', 'numOfFixedSites', 'fixedSites'])
parameters.randomly_convert_fixed_sites(tuson, tuson.dvars().fixedSites)
sim.stat(tuson, numOfSegSites=sim.ALL_AVAIL, vars=['numOfSegSites', 'segSites', 'numOfFixedSites', 'fixedSites'])

In [5]:
tuson.dvars().numOfFixedSites

0

In [6]:
sim.stat(tuson, alleleFreq=sim.ALL_AVAIL)

In [7]:
sim.stat(tuson, homoFreq=sim.ALL_AVAIL)

In [8]:
sim.stat(tuson, heteroFreq=sim.ALL_AVAIL)

In [9]:
alleles  = np.array([list(tuson.dvars().alleleFreq[locus].keys()) for locus in range(tuson.totNumLoci())], dtype=np.int8)

In [10]:
alleles

array([[1, 2],
       [2, 3],
       [2, 3],
       ..., 
       [1, 2],
       [1, 3],
       [1, 3]], dtype=int8)

## Alleles for Fixed Sites chosen at random.

In [11]:
np.savetxt('alleles_of_tuson_founders.txt', alleles, fmt='%d', delimiter='\t')

In [12]:
af = analyze.allele_data(tuson, alleles, range(tuson.totNumLoci()))

In [13]:
def expanded_allele_data(pop, allele_data_structure):
    sim.stat(pop, heteroFreq=sim.ALL_AVAIL)
    hetero_frqs = np.array(list(pop.dvars().heteroFreq.values()))
    hetero_column = pd.DataFrame(hetero_frqs, columns=['heterozygote_frequency'])
    return allele_data_structure.join(hetero_column)

In [14]:
eaf = expanded_allele_data(tuson, af)

In [15]:
eaf.to_csv("example_expanded_allele_frequency.txt", sep='\t')

In [16]:
formed_mia = np.array(af['minor_allele'], dtype=np.int8)
formed_maj = np.array(af['major_allele'], dtype=np.int8)

In [17]:
eaf['minor_allele'] = formed_mia
eaf['major_allele'] = formed_maj

In [18]:
eaf.to_csv('expanded_tuson_founder_allele_frqs.txt', sep='\t')

In [None]:
tuson.dvars().homoFreq

In [None]:
tuson.infoFields()

In [None]:
tuson.addInfoFields(['generation', 'g', 'p'])

In [None]:
tuson.save('working_tuson.pop')

In [None]:
tuson.popSize()

## The Tuson Genetic Map

In [None]:
def parse_recombination_rates(genetic_map_filename):
    """
    Returns a list of crossover probabilities from a genetic map measured in centimorgans.
    """
    genetic_map = pd.read_csv(genetic_map_filename, sep='\t', index_col=None)
    genetic_map.drop(['locus', 'agpv2', 'namZmPRDA', 'namZmPRDS'], axis=1, inplace=True)
    genetic_map = np.array(genetic_map)
    recombination_rates = col.OrderedDict()
    for i in range(1, len(genetic_map), 1):
        if genetic_map[i-1][0] == genetic_map[i][0]:
            recombination_rates[i] = np.divide(np.abs(genetic_map[i][1] - genetic_map[i-1][1]), 100)
        elif genetic_map[i-1][0] != genetic_map[i][0]:
            recombination_rates[i] = 0.0
    recombination_rates[len(genetic_map)] = 0.0
    return list(recombination_rates.values())


In [None]:
recom_rates = parse_recombination_rates('raw_genetic_map.txt')

In [None]:
recom_rates

### Using the parameters.PopulationStructure class

In [None]:
popst = parameters.PopulationStructure(tuson, 'population_structure_matrix.xlsx', 0.01, 1.0)

In [None]:
struct_mating_probs = popst.generate_population_structure()

In [None]:
def format_mating_pmfs(population_structure_dict):
    mating_pmfs = {}
    for ind, probabilities in population_structure_dict.items():
        for i, prob in enumerate(probabilities):
            values = []
            probabilites = []
            for i, prob in enumerate(struct_mating_probs[ind]):
                values.append(i)
                probabilites.append(prob)
            pmf_values = (values, probabilites)
            mating_pmfs[ind] = stats.rv_discrete(values=pmf_values)
    return mating_pmfs

In [None]:
formed_mating_pmfs = format_mating_pmfs(struct_mating_probs)

In [None]:
def assign_primary_subpopulation(pop, struct_mating_probabilities):
    primary_subpop = {}
    for ind_id, inheritance_proportions in struct_mating_probabilities.items():
        primary_subpop[ind_id] = float(np.argmax(inheritance_proportions))
    for ind in pop.individuals():
        ind.primary = primary_subpop[ind.ind_id]

In [None]:
assign_primary_subpopulation(tuson, struct_mating_probs)

In [None]:
tuson.dvars().mating_pmfs = formed_mating_pmfs

In [None]:
pop_struct_expansion = breed.ForcedPopulationStructureParentChooser(10000, formed_mating_pmfs)

In [None]:
primary_subpopulation_splitter = sim.InfoSplitter(field='primary',
                                                  values=[0.0, 1.0, 2.0, 3.0,
                                                          4.0, 5.0])
tuson.setVirtualSplitter(primary_subpopulation_splitter)


In [None]:
sim.tagID(tuson, reset=False)

In [None]:
multi_son = sim.Simulator(tuson, rep=5)

In [None]:
multi_son.evolve(
    matingScheme=sim.HomoMating(
        sim.PyParentsChooser(pop_struct_expansion.forced_structure_parent_chooser),
        sim.OffspringGenerator(ops=[sim.IdTagger(), sim.ParentsTagger(), sim.PedigreeTagger(),
                                   sim.Recombinator(recom_rates)], numOffspring=1),
            subPopSize=10000),
    gen=1
)

In [None]:
multi_son.evolve(
    matingScheme=sim.RandomMating(ops=[sim.IdTagger(), sim.ParentsTagger(), sim.PedigreeTagger(),
                                   sim.Recombinator(recom_rates)], numOffspring=1,
            subPopSize=10000),
    gen=1,
)

In [None]:
for pop in multi_son.populations():
    print(pop.popSize())

In [None]:
number_of_replicates = 5
sample_sizes = [250, 500, 750]

In [None]:
run_id = 'artemis'    

In [None]:
artemis = analyze.Study(run_id)

In [None]:
sample_library = artemis.collect_samples(multi_son, sample_sizes)

In [None]:
sample_library

In [None]:
artemis.save_sample_populations(sample_library)

In [None]:
artemis.store_allele_frequencies(sample_library, alleles)

In [None]:
tuson.popSize()