## Run: daoko_girl

In [1]:
import pytest
import simuOpt
simuOpt.setOptions(alleleType='short', numThreads=4, quiet=True)
import simuPOP as sim
import pandas as pd
from saegus import breed, operators, simulate, analyze, parse, parameters
import shelve
import numpy as np
import random
np.set_printoptions(suppress=True, precision=3)

In [2]:
magic1478 = sim.loadPopulation('populations\\magic_1478.pop')

In [7]:
magic1478.setSubPopName('daoko_girl', 0)

In [9]:
magic1478.dvars()

{'rep': 0, 'gen': 3}

### Analysis Parameters

In [13]:
analysis_parameters = shelve.open('analysis_parameters')
analysis_parameters['population_name'] = 'daoko_girl'
analysis_parameters['scenario'] = 'random_mating'
analysis_parameters['generations'] = 3
analysis_parameters['output_prefix'] = 'daoko_girl'

In [23]:
sim.tagID(magic1478, reset=False)

In [10]:
genetic_map = pd.read_hdf('parameters\\genetic_map_1478.hdf')

In [25]:
trait = shelve.open('daoko_girl_trait_parameters')
alleles = np.array(pd.read_hdf('parameters\\alleles_at_1478_loci.hdf'))

In [32]:
recombination_rates = np.array(list(genetic_map['recom_rate']))

In [33]:
breed_magic_1478 = breed.MAGIC(magic1478, recombination_rates)

In [34]:
breed_magic_1478.interim_random_mating(3, 2000)

Initiating interim random mating for 3 generations.
Generation: 3
Generation: 4
Generation: 5


In [35]:
magic1478.subPopName(0)

'daoko_girl'

In [36]:
sim.stat(magic1478, numOfSegSites=sim.ALL_AVAIL, vars=['segSites', 'numOfSegSites'])

In [39]:
magic1478.dvars().segSites

[1,
 2,
 4,
 5,
 6,
 8,
 10,
 12,
 13,
 15,
 16,
 20,
 21,
 23,
 24,
 25,
 26,
 28,
 29,
 31,
 32,
 34,
 36,
 38,
 40,
 42,
 44,
 45,
 46,
 47,
 49,
 50,
 54,
 55,
 56,
 58,
 60,
 61,
 62,
 68,
 70,
 74,
 77,
 82,
 85,
 86,
 94,
 95,
 99,
 100,
 102,
 103,
 105,
 106,
 107,
 108,
 109,
 114,
 115,
 116,
 119,
 120,
 121,
 122,
 123,
 124,
 126,
 127,
 128,
 130,
 132,
 134,
 137,
 138,
 139,
 140,
 141,
 144,
 145,
 146,
 147,
 149,
 152,
 153,
 155,
 156,
 158,
 159,
 163,
 165,
 168,
 171,
 173,
 174,
 176,
 178,
 184,
 185,
 186,
 187,
 189,
 191,
 192,
 193,
 194,
 195,
 196,
 202,
 203,
 208,
 211,
 212,
 213,
 214,
 215,
 220,
 222,
 223,
 225,
 226,
 227,
 229,
 232,
 234,
 235,
 240,
 241,
 242,
 244,
 245,
 247,
 252,
 256,
 257,
 258,
 259,
 261,
 262,
 264,
 265,
 266,
 268,
 272,
 273,
 274,
 275,
 277,
 281,
 282,
 283,
 284,
 285,
 288,
 290,
 291,
 292,
 293,
 294,
 297,
 299,
 300,
 301,
 302,
 303,
 305,
 306,
 307,
 310,
 312,
 314,
 315,
 316,
 317,
 318,
 319,
 320,

In [40]:
sim.stat(magic1478, alleleFreq=magic1478.dvars().segSites)

In [47]:
qtl = sorted(random.sample(magic1478.dvars().segSites, 10))

In [43]:
def assign_allele_effects(alleles, qtl, distribution_function,
                         *distribution_function_parameters, multiplicity):
    allele_effects = {}
    for locus in qtl:
        allele_effects[locus] = {}
        for allele in alleles[locus]:
            allele_effects[locus][allele] = sum([distribution_function(*distribution_function_parameters) 
                                      for i in range(multiplicity)])
    return allele_effects

In [60]:
allele_effects = assign_allele_effects(alleles, qtl, random.expovariate, 1, multiplicity=3)

In [61]:
allele_effects

{320: {1: 1.6541676446507818, 3: 2.31659541866544},
 336: {2: 3.7526295859582963, 3: 1.9879813354873381},
 475: {0: 1.1552932103968137, 3: 2.304770194571306},
 506: {0: 1.6844311754399501, 1: 5.9971203347814805},
 594: {0: 0.68441297279427, 2: 1.8701746234984733},
 652: {4: 4.065242472525488, 5: 2.2014046642624985},
 681: {0: 1.2280387118365108, 2: 1.6143881288616857},
 723: {0: 1.2760538047478562, 2: 5.5059961632261345},
 1089: {1: 5.017551668852887, 3: 0.7581611325103076},
 1462: {1: 3.584039597858368, 3: 0.648031205124744}}

In [63]:
trait['number_of_qtl'] = len(qtl)
trait['qtl'] = qtl
trait['allele_effects'] = allele_effects
trait['allele_effect_distribution'] = random.expovariate.__name__
trait['heritability'] = 0.7

In [70]:
heritability = 0.7

In [64]:
def assign_additive_g(pop, qtl, allele_effects):
    """
    Calculates genotypic contribution ``g`` by summing the effect of each
    allele at each QTL triplet.
    """
    for ind in pop.individuals():
        genotypic_contribution = \
            sum([
                    allele_effects[locus][ind.genotype(ploidy=0)[locus]] +\
                    allele_effects[locus][ind.genotype(ploidy=1)[locus]]
                 for locus
                 in qtl])
        ind.g = genotypic_contribution

In [65]:
assign_additive_g(magic1478, qtl, allele_effects)

In [66]:
magic1478.indInfo('g')

(56.99713424687718,
 45.649373502905604,
 61.20926827550158,
 48.72983887720939,
 57.02718413886285,
 52.60090391328775,
 59.52354449157236,
 55.14894482215833,
 49.55672320551368,
 52.89263065382137,
 58.391081463867096,
 47.90024727008317,
 60.34954194862165,
 55.3414820048999,
 51.91012818664743,
 55.8476572627027,
 53.023537996842634,
 57.02718413886285,
 63.12036259148275,
 51.10136217861779,
 55.28818338190096,
 49.16344480195275,
 46.79964486046475,
 57.627953896717784,
 59.24732147216535,
 51.111539646421626,
 58.724132257893324,
 55.401774448134525,
 50.93340397212158,
 48.746229469590986,
 54.74558144866392,
 50.64093319319058,
 49.27854610352895,
 44.69161000801947,
 54.46474172155074,
 49.33058698509804,
 58.09935472333349,
 47.9846121377843,
 53.733366940993015,
 54.46474172155074,
 58.427366130396805,
 54.50102638808045,
 58.614341876015,
 51.06507751208808,
 59.24306278086035,
 55.44067156269193,
 57.38348366390237,
 55.81137259617299,
 52.21455449626258,
 58.53493673131

In [67]:
def calculate_error_variance(pop, heritability):
    """
    Calculates the parameter ``epsilon`` to be used as the variance
    of the error distribution. The error distribution generates noise
    found in real experiments.
    """
    variance_of_g = np.var(pop.indInfo('g'))
    epsilon = variance_of_g*(1/heritability - 1)
    pop.dvars().epsilon = epsilon

In [68]:

def phenotypic_effect_calculator(pop):
    """
    Simulate measurement error by adding random error to genotypic
    contribution.
    """
    for ind in pop.individuals():
        ind.p = ind.g + random.normalvariate(0, pop.dvars().epsilon)

In [71]:
calculate_error_variance(magic1478, heritability)

In [72]:
magic1478.dvars().epsilon

7.339266620131486

In [74]:
phenotypic_effect_calculator(magic1478)

In [None]:
synthesis_parameters = shelve.open('synthesis_parameters')
synthesis_parameters['prefounder_names'] = prefounder_names

In [None]:
synthesis_parameters['founders'] = simulation_parameters['founders']
synthesis_parameters['operating_population_size'] = 2000
synthesis_parameters['snp_to_integer'] = simulation_parameters['snp_to_integer']
synthesis_parameters['integer_to_snp'] = simulation_parameters['integer_to_snp']
synthesis_parameters['prefounder_file_name'] = 'prefounders_1478.pop'
synthesis_parameters['mating_scheme'] = 'MAGIC'

In [90]:
allele_effects.setdefault(0.0)

In [94]:
for alpha, beta in alleles:
    print(alpha, beta)
    break

1 2


In [91]:
allele_effects

{0.0: None,
 320: {1: 1.6541676446507818, 3: 2.31659541866544},
 336: {2: 3.7526295859582963, 3: 1.9879813354873381},
 475: {0: 1.1552932103968137, 3: 2.304770194571306},
 506: {0: 1.6844311754399501, 1: 5.9971203347814805},
 594: {0: 0.68441297279427, 2: 1.8701746234984733},
 652: {4: 4.065242472525488, 5: 2.2014046642624985},
 681: {0: 1.2280387118365108, 2: 1.6143881288616857},
 723: {0: 1.2760538047478562, 2: 5.5059961632261345},
 1089: {1: 5.017551668852887, 3: 0.7581611325103076},
 1462: {1: 3.584039597858368, 3: 0.648031205124744}}

In [104]:
trait['allele_effects']

{320: {1: 1.6541676446507818, 3: 2.31659541866544},
 336: {2: 3.7526295859582963, 3: 1.9879813354873381},
 475: {0: 1.1552932103968137, 3: 2.304770194571306},
 506: {0: 1.6844311754399501, 1: 5.9971203347814805},
 594: {0: 0.68441297279427, 2: 1.8701746234984733},
 652: {4: 4.065242472525488, 5: 2.2014046642624985},
 681: {0: 1.2280387118365108, 2: 1.6143881288616857},
 723: {0: 1.2760538047478562, 2: 5.5059961632261345},
 1089: {1: 5.017551668852887, 3: 0.7581611325103076},
 1462: {1: 3.584039597858368, 3: 0.648031205124744}}

In [75]:
trait['epsilon'] = magic1478.dvars().epsilon

In [77]:
af = analyze.allele_data(magic1478, alleles, list(range(1478)))

In [81]:
len(magic1478.dvars().segSites)

866

In [None]:
import

In [95]:
segregating_data = np.array([(segloc, 
                              af.ix[segloc, 'minor_allele'], 
                              af.ix[segloc, 'minor_frequency']) for segloc in magic1478.dvars().segSites])

In [97]:
af

Unnamed: 0,minor_allele,minor_frequency,major_allele,major_frequency
0,2,0.00000,1,1.00000
1,3,0.12925,1,0.87075
2,1,0.07925,3,0.92075
3,2,0.00000,0,1.00000
4,0,0.05175,2,0.94825
5,2,0.23900,0,0.76100
6,2,0.10925,0,0.89075
7,1,0.00000,3,1.00000
8,2,0.21600,0,0.78400
9,3,0.00000,1,1.00000


In [99]:
segregating_frame = pd.DataFrame(segregating_data, columns=['segregating_locus', 'minor_allele', 'frequency'])

In [100]:
segregating_frame

Unnamed: 0,segregating_locus,minor_allele,frequency
0,1,3,0.12925
1,2,1,0.07925
2,4,0,0.05175
3,5,2,0.23900
4,6,2,0.10925
5,8,2,0.21600
6,10,3,0.26500
7,12,1,0.26525
8,13,0,0.11200
9,15,3,0.34450


In [121]:
segregating_loci = np.array(magic1478.dvars().segSites)

In [118]:
relative_pos = [magic1478.chromLocusPair(seg_loc)[1] for seg_loc in segregating_loci]

In [120]:
relative_pos

[1,
 2,
 4,
 5,
 6,
 8,
 10,
 12,
 13,
 15,
 16,
 20,
 21,
 23,
 24,
 25,
 26,
 28,
 29,
 31,
 32,
 34,
 36,
 38,
 40,
 42,
 44,
 45,
 46,
 47,
 49,
 50,
 54,
 55,
 56,
 58,
 60,
 61,
 62,
 68,
 70,
 74,
 77,
 82,
 85,
 86,
 94,
 95,
 99,
 100,
 102,
 103,
 105,
 106,
 107,
 108,
 109,
 114,
 115,
 116,
 119,
 120,
 121,
 122,
 123,
 124,
 126,
 127,
 128,
 130,
 132,
 134,
 137,
 138,
 139,
 140,
 141,
 144,
 145,
 146,
 147,
 149,
 152,
 153,
 155,
 156,
 158,
 159,
 163,
 165,
 168,
 171,
 173,
 174,
 176,
 178,
 184,
 185,
 186,
 187,
 189,
 191,
 192,
 193,
 194,
 195,
 196,
 202,
 203,
 208,
 1,
 2,
 3,
 4,
 5,
 10,
 12,
 13,
 15,
 16,
 17,
 19,
 22,
 24,
 25,
 30,
 31,
 32,
 34,
 35,
 37,
 42,
 46,
 47,
 48,
 49,
 51,
 52,
 54,
 55,
 56,
 58,
 62,
 63,
 64,
 65,
 67,
 71,
 72,
 73,
 74,
 75,
 78,
 80,
 81,
 82,
 83,
 84,
 87,
 89,
 90,
 91,
 92,
 93,
 95,
 96,
 97,
 100,
 102,
 104,
 105,
 106,
 107,
 108,
 109,
 110,
 111,
 115,
 118,
 121,
 122,
 123,
 124,
 125,
 126,
 129,
 

In [122]:
intermediate_data = shelve.open('daoko_girl_debug_data')
intermediate_data['allele_frequencies'] = af
intermediate_data['segregating_allele_frequencies'] = segregating_frame
intermediate_data['g'] = np.array(magic1478.indInfo('g'))
intermediate_data['p'] = np.array(magic1478.indInfo('p'))
intermediate_data['segregating_loci'] = segregating_loci
intermediate_data['relative_pos'] = relative_pos
intermediate_data['run_name'] = 'daoko_girl'

In [123]:
trait.close()
analysis_parameters.close()
intermediate_data.close()