## Run: daoko_girl

In [1]:
import pytest
import simuOpt
simuOpt.setOptions(alleleType='short', numThreads=4, quiet=True)
import simuPOP as sim
import pandas as pd
from saegus import breed, operators, simulate, analyze, parse, parameters
import shelve
import numpy as np
import random
np.set_printoptions(suppress=True, precision=3)

In [2]:
magic1478 = sim.loadPopulation('populations\\magic_1478.pop')

In [3]:
magic1478.setSubPopName('daoko_girl', 0)

In [4]:
magic1478.dvars()

{'rep': 0, 'gen': 3}

In [5]:
magic1478.subPopNames()

('daoko_girl',)

### Analysis Parameters

In [None]:
analysis_parameters = shelve.open('analysis_parameters')
analysis_parameters['population_name'] = 'daoko_girl'
analysis_parameters['scenario'] = 'random_mating'
analysis_parameters['generations'] = 3
analysis_parameters['output_prefix'] = 'daoko_girl'

In [6]:
sim.tagID(magic1478, reset=False)

In [7]:
genetic_map = pd.read_hdf('parameters\\genetic_map_1478.hdf')

In [8]:
trait = shelve.open('daoko_girl_trait_parameters')
alleles = np.array(pd.read_hdf('parameters\\alleles_at_1478_loci.hdf'))

In [9]:
recombination_rates = np.array(list(genetic_map['recom_rate']))

In [11]:
breed_magic_1478 = breed.MAGIC(magic1478, recombination_rates)

In [12]:
breed_magic_1478.interim_random_mating(3, 2000)

Initiating interim random mating for 3 generations.
Generation: 3
Generation: 4
Generation: 5


In [56]:
sim.stat(daoko_girl_random_sample, numOfSegSites=sim.ALL_AVAIL, vars=['segSites', 'numOfSegSites'])

In [58]:
len(daoko_girl_random_sample.dvars().segSites)

866

In [15]:
sim.stat(magic1478, alleleFreq=magic1478.dvars().segSites)

In [17]:
qtl = sorted(random.sample(magic1478.dvars().segSites, 10))

In [18]:
qtl

[208, 379, 402, 431, 601, 796, 1055, 1121, 1261, 1323]

In [19]:
def assign_allele_effects(alleles, qtl, distribution_function,
                         *distribution_function_parameters, multiplicity):
    allele_effects = {}
    for locus in qtl:
        allele_effects[locus] = {}
        for allele in alleles[locus]:
            allele_effects[locus][allele] = sum([distribution_function(*distribution_function_parameters) 
                                      for i in range(multiplicity)])
    return allele_effects

In [20]:
allele_effects = assign_allele_effects(alleles, qtl, random.expovariate, 1, multiplicity=3)

In [59]:
allele_effects

{208: {1: 2.0933114252608496, 3: 1.6325996509932121},
 379: {0: 1.3478014732702746, 1: 1.3719395220166295},
 402: {0: 3.3654196081519365, 3: 2.879400812370666},
 431: {1: 2.0474155981427407, 3: 5.5236001122369816},
 601: {4: 5.088741590999549, 5: 3.714503856469204},
 796: {0: 8.413465717882207, 2: 2.9758320654921353},
 1055: {0: 5.036642294085972, 1: 5.531009308749388},
 1121: {2: 4.621541179178365, 3: 5.723502483585158},
 1261: {0: 4.360914842300594, 2: 6.408102889722818},
 1323: {0: 1.7106606770829418, 2: 2.550205863633268}}

In [22]:
trait['number_of_qtl'] = len(qtl)
trait['qtl'] = qtl
trait['allele_effects'] = allele_effects
trait['allele_effect_distribution'] = random.expovariate.__name__
trait['heritability'] = 0.7

In [23]:
heritability = 0.7

In [24]:
def assign_additive_g(pop, qtl, allele_effects):
    """
    Calculates genotypic contribution ``g`` by summing the effect of each
    allele at each QTL triplet.
    """
    for ind in pop.individuals():
        genotypic_contribution = \
            sum([
                    allele_effects[locus][ind.genotype(ploidy=0)[locus]] +\
                    allele_effects[locus][ind.genotype(ploidy=1)[locus]]
                 for locus
                 in qtl])
        ind.g = genotypic_contribution

In [60]:
assign_additive_g(daoko_girl_random_sample, qtl, allele_effects)

In [61]:
def calculate_error_variance(pop, heritability):
    """
    Calculates the parameter ``epsilon`` to be used as the variance
    of the error distribution. The error distribution generates noise
    found in real experiments.
    """
    variance_of_g = np.var(pop.indInfo('g'))
    epsilon = variance_of_g*(1/heritability - 1)
    pop.dvars().epsilon = epsilon

In [62]:

def phenotypic_effect_calculator(pop):
    """
    Simulate measurement error by adding random error to genotypic
    contribution.
    """
    for ind in pop.individuals():
        ind.p = ind.g + random.normalvariate(0, pop.dvars().epsilon)

In [63]:
calculate_error_variance(daoko_girl_random_sample, heritability)

In [65]:
daoko_girl_random_sample.dvars().epsilon

7.9090004348989007

In [66]:
phenotypic_effect_calculator(daoko_girl_random_sample)

In [None]:
synthesis_parameters = shelve.open('synthesis_parameters')
#synthesis_parameters['prefounder_names'] = prefounder_names

In [None]:
#synthesis_parameters['founders'] = simulation_parameters['founders']
#synthesis_parameters['operating_population_size'] = 2000
#synthesis_parameters['snp_to_integer'] = simulation_parameters['snp_to_integer']
#synthesis_parameters['integer_to_snp'] = simulation_parameters['integer_to_snp']
#synthesis_parameters['prefounder_file_name'] = 'prefounders_1478.pop'
#synthesis_parameters['mating_scheme'] = 'MAGIC'

In [34]:
def generate_allele_effects_table(qtl, alleles, allele_effects):
    """
    Creates a simple pd.DataFrame for allele effects. Hard-coded
    for bi-allelic case.
    
    :parameter list qtl: List of loci declared as QTL
    :parameter np.array alleles: Array of alleles at each locus
    :parameter dict allele_effects: Mapping of effects for alleles at each QTLocus
    
    """
    ae_table = {
        'locus': [],
        'alpha_allele': [],
        'alpha_effect': [],
        'beta_allele': [],
        'beta_effect': [],
    }

    for locus in qtl:
        ae_table['locus'].append(locus)
        alpha_allele, beta_allele = alleles[locus]
        ae_table['alpha_allele'].append(alpha_allele)
        ae_table['beta_allele'].append(beta_allele)
        alpha_effect = allele_effects[locus][alpha_allele]
        ae_table['alpha_effect'].append(alpha_effect)
        beta_effect = allele_effects[locus][beta_allele]
        ae_table['beta_effect'].append(beta_effect)
    order_of_columns = ['locus', 'alpha_allele', 'alpha_effect', 'beta_allele', 'beta_effect']
    allele_effect_frame = pd.DataFrame(ae_table, columns=order_of_columns)
    return allele_effect_frame

In [35]:
aeframe = generate_allele_effects_table(qtl, alleles, allele_effects)

In [67]:
trait['epsilon'] = daoko_girl_random_sample.dvars().epsilon

In [68]:
segregating_loci = daoko_girl_random_sample.dvars().segSites

In [69]:
af = analyze.allele_data(daoko_girl_random_sample, alleles, list(range(1478)))

In [105]:
import importlib as imp
imp.reload(analyze)

<module 'saegus.analyze' from 'c:\\Anaconda3\\lib\\site-packages\\saegus\\analyze.py'>

In [106]:
gwas = analyze.GWAS(daoko_girl_random_sample, segregating_loci, np.array(af['minor_allele']), 'daoko_girl')

In [107]:
gwas

<saegus.analyze.GWAS at 0x8365e10>

In [109]:
ccm = gwas.calculate_count_matrix('daoko_girl_ma_count.txt')

In [110]:
ps_svd = gwas.pop_struct_svd(ccm)

In [111]:
ps_m = gwas.population_structure_formatter(ps_svd, 'daoko_girl_structure.txt')

In [112]:
synthesis_parameters = shelve.open('synthesis_parameters')
int_to_snp_map = synthesis_parameters['integer_to_snp']
synthesis_parameters.close()

In [113]:
int_to_snp_map

{0: 'A', 1: 'C', 2: 'G', 3: 'T', 4: '-', 5: '+'}

In [114]:
pwd

'c:\\Users\\DoubleDanks\\BISB\\wisser\\code\\rjwlab-scripts\\saegus_project\\devel\\magic\\1478'

In [115]:
import yaml
with open('universal_parameters.yaml') as up:
    uniparams = yaml.load(up)

In [116]:
list(uniparams.keys())

['integer_to_snp',
 'meta_pop_sample_sizes',
 'allele_names',
 'number_of_replicates',
 'proportion_of_individuals_saved',
 'number_of_qtl',
 'overshoot_as_proportion',
 'pos_column',
 'locus_names',
 'individuals_per_breeding_subpop',
 'generations_of_random_mating',
 'prefounder_file_name',
 'heritability',
 'generations_of_selection',
 'allele_effect_parameters',
 'founders',
 'operating_population_size',
 'snp_to_integer']

In [117]:
pos_names = uniparams['pos_column'][:866]

In [118]:
gwas.pos_names = pos_names

In [121]:
hmap = gwas.hapmap_formatter(int_to_snp_map, 'daoko_girl_hapmap.txt')

In [125]:
phenos = gwas.trait_formatter('daoko_girl_phenotype_vector.txt')

In [122]:
ks_m = gwas.calc_kinship_matrix(ccm, af, 'daoko_girl_kinship_matrix.txt')

In [123]:
intermediate_data = shelve.open('daoko_girl_debug_data')
intermediate_data['allele_frequencies'] = af
intermediate_data['g'] = np.array(daoko_girl_random_sample.indInfo('g'))
intermediate_data['p'] = np.array(daoko_girl_random_sample.indInfo('p'))
intermediate_data['segregating_loci'] = segregating_loci
intermediate_data['run_name'] = 'daoko_girl'

In [124]:
trait.close()
#analysis_parameters.close()
intermediate_data.close()

In [138]:
saegus_to_tassel_loci = {}
tassel_to_saegus_loci = {}
for idx, locus in enumerate(segregating_loci):
    saegus_to_tassel_loci[locus] = idx
    tassel_to_saegus_loci[idx] = locus

In [131]:
segregating_frqs = [daoko_girl_random_sample.dvars().alleleFreq[seg_loc] for seg_loc in segregating_loci]

In [133]:
segregating_frqs

[defdict({1: 0.86, 3: 0.14}),
 defdict({1: 0.055, 3: 0.945}),
 defdict({0: 0.065, 2: 0.935}),
 defdict({0: 0.72, 2: 0.28}),
 defdict({0: 0.9, 2: 0.1}),
 defdict({0: 0.755, 2: 0.245}),
 defdict({1: 0.725, 3: 0.275}),
 defdict({1: 0.29, 3: 0.71}),
 defdict({0: 0.12, 2: 0.88}),
 defdict({1: 0.625, 3: 0.375}),
 defdict({2: 0.875, 3: 0.125}),
 defdict({0: 0.6, 3: 0.4}),
 defdict({1: 0.245, 2: 0.755}),
 defdict({4: 0.51, 5: 0.49}),
 defdict({1: 0.925, 3: 0.075}),
 defdict({0: 0.415, 2: 0.585}),
 defdict({0: 0.055, 3: 0.945}),
 defdict({1: 0.135, 2: 0.865}),
 defdict({0: 0.265, 2: 0.735}),
 defdict({0: 0.885, 1: 0.115}),
 defdict({1: 0.12, 3: 0.88}),
 defdict({1: 0.715, 3: 0.285}),
 defdict({1: 0.365, 3: 0.635}),
 defdict({4: 0.05, 5: 0.95}),
 defdict({1: 0.9, 3: 0.1}),
 defdict({1: 0.295, 2: 0.705}),
 defdict({0: 0.865, 2: 0.135}),
 defdict({0: 0.1, 2: 0.9}),
 defdict({1: 0.26, 3: 0.74}),
 defdict({1: 0.14, 3: 0.86}),
 defdict({0: 0.135, 2: 0.865}),
 defdict({1: 0.69, 2: 0.31}),
 defdict({0:

In [135]:
aeframe

Unnamed: 0,locus,alpha_allele,alpha_effect,beta_allele,beta_effect
0,208,1,2.093311,3,1.6326
1,379,0,1.347801,1,1.37194
2,402,0,3.36542,3,2.879401
3,431,3,5.5236,1,2.047416
4,601,5,3.714504,4,5.088742
5,796,0,8.413466,2,2.975832
6,1055,1,5.531009,0,5.036642
7,1121,3,5.723502,2,4.621541
8,1261,2,6.408103,0,4.360915
9,1323,2,2.550206,0,1.710661


In [139]:
tassel_to_saegus_loci[13]

23

In [143]:
aeframe

Unnamed: 0,locus,alpha_allele,alpha_effect,beta_allele,beta_effect
0,208,1,2.093311,3,1.6326
1,379,0,1.347801,1,1.37194
2,402,0,3.36542,3,2.879401
3,431,3,5.5236,1,2.047416
4,601,5,3.714504,4,5.088742
5,796,0,8.413466,2,2.975832
6,1055,1,5.531009,0,5.036642
7,1121,3,5.723502,2,4.621541
8,1261,2,6.408103,0,4.360915
9,1323,2,2.550206,0,1.710661


In [149]:
saegus_to_tassel_loci[208]

109

In [150]:
saegus_to_tassel_loci[379]

208

In [156]:
tassel_to_saegus_loci[87]

159

In [158]:
pos_names

[88897,
 170386,
 251876,
 333365,
 414855,
 496344,
 577834,
 659323,
 740812,
 822302,
 903791,
 985281,
 1066770,
 1148260,
 1229749,
 1311239,
 1392728,
 1474217,
 1555707,
 1637196,
 1718686,
 1800175,
 1881665,
 1963154,
 2180524,
 2397894,
 2615265,
 2832635,
 2969600,
 3026161,
 3082721,
 3139282,
 3195842,
 3252403,
 3308963,
 3365524,
 3422084,
 3478645,
 3535205,
 3591766,
 3648326,
 3704887,
 3784140,
 3886086,
 3988033,
 4089979,
 4191925,
 4293872,
 4395818,
 4471636,
 4521325,
 4571015,
 4620704,
 4670394,
 4720084,
 4769773,
 4819463,
 4869152,
 4918842,
 4968531,
 5018221,
 5067911,
 5117600,
 5167290,
 5216979,
 5266669,
 5316358,
 5366048,
 5415738,
 5465427,
 5515117,
 5564806,
 5625467,
 5697098,
 5768729,
 5840360,
 5911991,
 5983623,
 6055254,
 6126885,
 6198516,
 6284826,
 6385813,
 6486801,
 6587788,
 6688776,
 6789763,
 6890751,
 6991738,
 7092726,
 7193713,
 7298970,
 7404228,
 7509485,
 7614742,
 7719999,
 7825257,
 7930514,
 8035771,
 8141028,
 8246286,
 83

In [157]:
np.array([list(aeframe['locus']), list()

0     208
1     379
2     402
3     431
4     601
5     796
6    1055
7    1121
8    1261
9    1323
Name: locus, dtype: int64

In [None]:
saegus_to_tassel_loci[203`]