## Run: daoko_girl

In [2]:
import pytest
import simuOpt
simuOpt.setOptions(alleleType='short', numThreads=4, quiet=True)
import simuPOP as sim
import pandas as pd
from saegus import breed, operators, simulate, analyze, parse, parameters
import shelve
import numpy as np
import random
np.set_printoptions(suppress=True, precision=3)

In [3]:
magic1478 = sim.loadPopulation('populations\\magic_1478.pop')

In [4]:
magic1478.setSubPopName('daoko_girl', 0)

In [5]:
magic1478.dvars()

{'rep': 0, 'gen': 3}

In [6]:
magic1478.subPopNames()

('daoko_girl',)

### Analysis Parameters

In [11]:
analysis_parameters = shelve.open('analysis_parameters')
analysis_parameters['population_name'] = 'daoko_girl'
analysis_parameters['scenario'] = 'random_mating'
analysis_parameters['generations'] = 3
analysis_parameters['output_prefix'] = 'daoko_girl'

In [12]:
sim.tagID(magic1478, reset=False)

In [13]:
genetic_map = pd.read_hdf('parameters\\genetic_map_1478.hdf')

In [14]:
trait = shelve.open('daoko_girl_trait_parameters')
alleles = np.array(pd.read_hdf('parameters\\alleles_at_1478_loci.hdf'))

In [15]:
recombination_rates = np.array(list(genetic_map['recom_rate']))

In [16]:
breed_magic_1478 = breed.MAGIC(magic1478, recombination_rates)

In [17]:
breed_magic_1478.interim_random_mating(3, 2000)

Initiating interim random mating for 3 generations.
Generation: 3
Generation: 4
Generation: 5


In [19]:
sim.stat(magic1478, numOfSegSites=sim.ALL_AVAIL, vars=['segSites', 'numOfSegSites'])

In [22]:
sim.stat(magic1478, alleleFreq=magic1478.dvars().segSites)

In [23]:
qtl = sorted(random.sample(magic1478.dvars().segSites, 10))

In [24]:
qtl

[44, 103, 168, 340, 488, 639, 737, 819, 981, 1065]

In [25]:
def assign_allele_effects(alleles, qtl, distribution_function,
                         *distribution_function_parameters, multiplicity):
    allele_effects = {}
    for locus in qtl:
        allele_effects[locus] = {}
        for allele in alleles[locus]:
            allele_effects[locus][allele] = sum([distribution_function(*distribution_function_parameters) 
                                      for i in range(multiplicity)])
    return allele_effects

In [26]:
allele_effects = assign_allele_effects(alleles, qtl, random.expovariate, 1, multiplicity=3)

In [27]:
allele_effects

{44: {0: 5.629446187924926, 2: 1.8962727055819322},
 103: {0: 1.3097813991257303, 2: 6.14070564290979},
 168: {2: 6.718096248082958, 3: 4.697238579652859},
 340: {1: 1.521689147484636, 2: 2.2131077852927032},
 488: {1: 2.512286137462885, 3: 2.486777318327935},
 639: {0: 1.1268072986309254, 3: 1.3391282487711016},
 737: {0: 1.4879865577936147, 1: 1.607534785598338},
 819: {1: 2.2153417608326986, 3: 0.20077940947200731},
 981: {0: 3.9513501430851568, 3: 1.78843909724396},
 1065: {0: 0.998194377898828, 2: 1.5139052352904945}}

In [28]:
trait['number_of_qtl'] = len(qtl)
trait['qtl'] = qtl
trait['allele_effects'] = allele_effects
trait['allele_effect_distribution'] = random.expovariate.__name__
trait['heritability'] = 0.7

In [29]:
heritability = 0.7

In [30]:
def assign_additive_g(pop, qtl, allele_effects):
    """
    Calculates genotypic contribution ``g`` by summing the effect of each
    allele at each QTL triplet.
    """
    for ind in pop.individuals():
        genotypic_contribution = \
            sum([
                    allele_effects[locus][ind.genotype(ploidy=0)[locus]] +\
                    allele_effects[locus][ind.genotype(ploidy=1)[locus]]
                 for locus
                 in qtl])
        ind.g = genotypic_contribution

In [31]:
assign_additive_g(magic1478, qtl, allele_effects)

In [32]:
magic1478.indInfo('g')

(60.3606582168071,
 51.34596525875175,
 58.197747170965904,
 48.95721508748723,
 57.02858546699207,
 54.49831225823971,
 51.161301405483655,
 57.855893827189696,
 51.203911881340645,
 57.01312282537254,
 57.610625371041465,
 54.49831225823971,
 55.22634406577155,
 57.02858546699207,
 59.776416769880605,
 55.38138527854254,
 52.99946076427069,
 57.82408969098534,
 53.5406804407973,
 60.3606582168071,
 51.352260575821155,
 55.767563742298165,
 51.998356902141715,
 47.9363179113824,
 52.99946076427069,
 57.54429632438374,
 55.72741656312256,
 53.06343301993035,
 51.70663490337243,
 50.64559054809199,
 59.20339295714963,
 54.888179570757245,
 53.515171621662354,
 45.08251856765639,
 54.37246871336558,
 54.89447488782665,
 53.36682292718184,
 53.974371650366194,
 51.79656049068005,
 57.02858546699207,
 54.40427284956993,
 51.52866356839482,
 60.3606582168071,
 54.83831533521498,
 59.844947359415436,
 50.55566496078437,
 53.72749257180253,
 57.05409428612701,
 53.806893620431644,
 66.5708857

In [33]:
def calculate_error_variance(pop, heritability):
    """
    Calculates the parameter ``epsilon`` to be used as the variance
    of the error distribution. The error distribution generates noise
    found in real experiments.
    """
    variance_of_g = np.var(pop.indInfo('g'))
    epsilon = variance_of_g*(1/heritability - 1)
    pop.dvars().epsilon = epsilon

In [34]:

def phenotypic_effect_calculator(pop):
    """
    Simulate measurement error by adding random error to genotypic
    contribution.
    """
    for ind in pop.individuals():
        ind.p = ind.g + random.normalvariate(0, pop.dvars().epsilon)

In [35]:
calculate_error_variance(magic1478, heritability)

In [36]:
magic1478.dvars().epsilon

5.7159708628358308

In [37]:
phenotypic_effect_calculator(magic1478)

In [52]:
synthesis_parameters = shelve.open('synthesis_parameters')
synthesis_parameters['prefounder_names'] = prefounder_names

In [53]:
#synthesis_parameters['founders'] = simulation_parameters['founders']
synthesis_parameters['operating_population_size'] = 2000
#synthesis_parameters['snp_to_integer'] = simulation_parameters['snp_to_integer']
#synthesis_parameters['integer_to_snp'] = simulation_parameters['integer_to_snp']
synthesis_parameters['prefounder_file_name'] = 'prefounders_1478.pop'
synthesis_parameters['mating_scheme'] = 'MAGIC'

NameError: name 'simulation_parameters' is not defined

In [63]:
allele_effects

{44: {0: 5.629446187924926, 2: 1.8962727055819322},
 103: {0: 1.3097813991257303, 2: 6.14070564290979},
 168: {2: 6.718096248082958, 3: 4.697238579652859},
 340: {1: 1.521689147484636, 2: 2.2131077852927032},
 488: {1: 2.512286137462885, 3: 2.486777318327935},
 639: {0: 1.1268072986309254, 3: 1.3391282487711016},
 737: {0: 1.4879865577936147, 1: 1.607534785598338},
 819: {1: 2.2153417608326986, 3: 0.20077940947200731},
 981: {0: 3.9513501430851568, 3: 1.78843909724396},
 1065: {0: 0.998194377898828, 2: 1.5139052352904945}}

In [64]:
trait['allele_effects']

{44: {0: 5.629446187924926, 2: 1.8962727055819322},
 103: {0: 1.3097813991257303, 2: 6.14070564290979},
 168: {2: 6.718096248082958, 3: 4.697238579652859},
 340: {1: 1.521689147484636, 2: 2.2131077852927032},
 488: {1: 2.512286137462885, 3: 2.486777318327935},
 639: {0: 1.1268072986309254, 3: 1.3391282487711016},
 737: {0: 1.4879865577936147, 1: 1.607534785598338},
 819: {1: 2.2153417608326986, 3: 0.20077940947200731},
 981: {0: 3.9513501430851568, 3: 1.78843909724396},
 1065: {0: 0.998194377898828, 2: 1.5139052352904945}}

In [67]:
alleles

array([[1, 2],
       [1, 3],
       [3, 1],
       ..., 
       [1, 0],
       [3, 0],
       [3, 1]], dtype=int64)

In [81]:
def generate_allele_effects_table(qtl, alleles, allele_effects):
    """
    Creates a simple pd.DataFrame for allele effects. Hard-coded
    for bi-allelic case.
    
    :parameter list qtl: List of loci declared as QTL
    :parameter np.array alleles: Array of alleles at each locus
    :parameter dict allele_effects: Mapping of effects for alleles at each QTLocus
    
    """
    ae_table = {
        'locus': [],
        'alpha_allele': [],
        'alpha_effect': [],
        'beta_allele': [],
        'beta_effect': [],
    }

    for locus in qtl:
        ae_table['locus'].append(locus)
        alpha_allele, beta_allele = alleles[locus]
        ae_table['alpha_allele'].append(alpha_allele)
        ae_table['beta_allele'].append(beta_allele)
        alpha_effect = allele_effects[locus][alpha_allele]
        ae_table['alpha_effect'].append(alpha_effect)
        beta_effect = allele_effects[locus][beta_allele]
        ae_table['beta_effect'].append(beta_effect)
    order_of_columns = ['locus', 'alpha_allele', 'alpha_effect', 'beta_allele', 'beta_effect']
    allele_effect_frame = pd.DataFrame(ae_table, columns=order_of_columns)
    return allele_effect_frame

In [82]:
aeframe = generate_allele_effects_table(qtl, alleles, allele_effects)

In [83]:
aeframe

Unnamed: 0,locus,alpha_allele,alpha_effect,beta_allele,beta_effect
0,44,0,5.629446,2,1.896273
1,103,0,1.309781,2,6.140706
2,168,2,6.718096,3,4.697239
3,340,2,2.213108,1,1.521689
4,488,3,2.486777,1,2.512286
5,639,0,1.126807,3,1.339128
6,737,1,1.607535,0,1.487987
7,819,1,2.215342,3,0.200779
8,981,0,3.95135,3,1.788439
9,1065,2,1.513905,0,0.998194


In [79]:
colz = ['locus', 'alpha_allele', 'alpha_effect', 'beta_allele', 'beta_effect']

In [80]:
pd.DataFrame(ae_table, columns=colz)

Unnamed: 0,locus,alpha_allele,alpha_effect,beta_allele,beta_effect
0,44,0,5.629446,2,1.896273
1,103,0,1.309781,2,6.140706
2,168,2,6.718096,3,4.697239
3,340,2,2.213108,1,1.521689
4,488,3,2.486777,1,2.512286
5,639,0,1.126807,3,1.339128
6,737,1,1.607535,0,1.487987
7,819,1,2.215342,3,0.200779
8,981,0,3.95135,3,1.788439
9,1065,2,1.513905,0,0.998194


In [None]:
for locus in qtl:
    alpha_allele, beta_allele = alleles[locus]
    alpha_effect = allele_effects[locus][alpha_allele]
    beta_effect = allele_effects[locus][beta_allele]
    print(locus, alpha_allele, alpha_effect, beta_allele, beta_effect)
    break

In [65]:
[np.array(qtl),

[44, 103, 168, 340, 488, 639, 737, 819, 981, 1065]

In [75]:
trait['epsilon'] = magic1478.dvars().epsilon

In [77]:
af = analyze.allele_data(magic1478, alleles, list(range(1478)))

In [81]:
len(magic1478.dvars().segSites)

866

In [None]:
import

In [95]:
segregating_data = np.array([(segloc, 
                              af.ix[segloc, 'minor_allele'], 
                              af.ix[segloc, 'minor_frequency']) for segloc in magic1478.dvars().segSites])

In [97]:
af

Unnamed: 0,minor_allele,minor_frequency,major_allele,major_frequency
0,2,0.00000,1,1.00000
1,3,0.12925,1,0.87075
2,1,0.07925,3,0.92075
3,2,0.00000,0,1.00000
4,0,0.05175,2,0.94825
5,2,0.23900,0,0.76100
6,2,0.10925,0,0.89075
7,1,0.00000,3,1.00000
8,2,0.21600,0,0.78400
9,3,0.00000,1,1.00000


In [99]:
segregating_frame = pd.DataFrame(segregating_data, columns=['segregating_locus', 'minor_allele', 'frequency'])

In [100]:
segregating_frame

Unnamed: 0,segregating_locus,minor_allele,frequency
0,1,3,0.12925
1,2,1,0.07925
2,4,0,0.05175
3,5,2,0.23900
4,6,2,0.10925
5,8,2,0.21600
6,10,3,0.26500
7,12,1,0.26525
8,13,0,0.11200
9,15,3,0.34450


In [121]:
segregating_loci = np.array(magic1478.dvars().segSites)

In [118]:
relative_pos = [magic1478.chromLocusPair(seg_loc)[1] for seg_loc in segregating_loci]

In [120]:
relative_pos

[1,
 2,
 4,
 5,
 6,
 8,
 10,
 12,
 13,
 15,
 16,
 20,
 21,
 23,
 24,
 25,
 26,
 28,
 29,
 31,
 32,
 34,
 36,
 38,
 40,
 42,
 44,
 45,
 46,
 47,
 49,
 50,
 54,
 55,
 56,
 58,
 60,
 61,
 62,
 68,
 70,
 74,
 77,
 82,
 85,
 86,
 94,
 95,
 99,
 100,
 102,
 103,
 105,
 106,
 107,
 108,
 109,
 114,
 115,
 116,
 119,
 120,
 121,
 122,
 123,
 124,
 126,
 127,
 128,
 130,
 132,
 134,
 137,
 138,
 139,
 140,
 141,
 144,
 145,
 146,
 147,
 149,
 152,
 153,
 155,
 156,
 158,
 159,
 163,
 165,
 168,
 171,
 173,
 174,
 176,
 178,
 184,
 185,
 186,
 187,
 189,
 191,
 192,
 193,
 194,
 195,
 196,
 202,
 203,
 208,
 1,
 2,
 3,
 4,
 5,
 10,
 12,
 13,
 15,
 16,
 17,
 19,
 22,
 24,
 25,
 30,
 31,
 32,
 34,
 35,
 37,
 42,
 46,
 47,
 48,
 49,
 51,
 52,
 54,
 55,
 56,
 58,
 62,
 63,
 64,
 65,
 67,
 71,
 72,
 73,
 74,
 75,
 78,
 80,
 81,
 82,
 83,
 84,
 87,
 89,
 90,
 91,
 92,
 93,
 95,
 96,
 97,
 100,
 102,
 104,
 105,
 106,
 107,
 108,
 109,
 110,
 111,
 115,
 118,
 121,
 122,
 123,
 124,
 125,
 126,
 129,
 

In [122]:
intermediate_data = shelve.open('daoko_girl_debug_data')
intermediate_data['allele_frequencies'] = af
intermediate_data['segregating_allele_frequencies'] = segregating_frame
intermediate_data['g'] = np.array(magic1478.indInfo('g'))
intermediate_data['p'] = np.array(magic1478.indInfo('p'))
intermediate_data['segregating_loci'] = segregating_loci
intermediate_data['relative_pos'] = relative_pos
intermediate_data['run_name'] = 'daoko_girl'

In [123]:
trait.close()
analysis_parameters.close()
intermediate_data.close()