## Run: daoko_girl

In [1]:
import pytest
import simuOpt
simuOpt.setOptions(alleleType='short', numThreads=4, quiet=True)
import simuPOP as sim
import pandas as pd
from saegus import breed, operators, simulate, analyze, parse, parameters
import shelve
import numpy as np
import random
np.set_printoptions(suppress=True, precision=3)

In [2]:
magic1478 = sim.loadPopulation('populations\\magic_1478.pop')

In [3]:
magic1478.setSubPopName('daoko_girl', 0)

In [4]:
magic1478.dvars()

{'gen': 3, 'rep': 0}

In [5]:
magic1478.subPopNames()

('daoko_girl',)

### Analysis Parameters

In [None]:
analysis_parameters = shelve.open('analysis_parameters')
analysis_parameters['population_name'] = 'daoko_girl'
analysis_parameters['scenario'] = 'random_mating'
analysis_parameters['generations'] = 3
analysis_parameters['output_prefix'] = 'daoko_girl'

In [6]:
sim.tagID(magic1478, reset=False)

In [7]:
genetic_map = pd.read_hdf('parameters\\genetic_map_1478.hdf')

In [8]:
trait = shelve.open('daoko_girl_trait_parameters')
alleles = np.array(pd.read_hdf('parameters\\alleles_at_1478_loci.hdf'))

In [9]:
recombination_rates = np.array(list(genetic_map['recom_rate']))

In [10]:
breed_magic_1478 = breed.MAGIC(magic1478, recombination_rates)

In [11]:
breed_magic_1478.interim_random_mating(3, 2000)

Initiating interim random mating for 3 generations.
Generation: 3
Generation: 4
Generation: 5


In [12]:
sim.stat(magic1478, numOfSegSites=sim.ALL_AVAIL, vars=['segSites', 'numOfSegSites'])

In [13]:
sim.stat(magic1478, alleleFreq=magic1478.dvars().segSites)

In [14]:
qtl = sorted(random.sample(magic1478.dvars().segSites, 10))

In [15]:
qtl

[137, 688, 787, 824, 894, 983, 1022, 1026, 1232, 1276]

In [16]:
def assign_allele_effects(alleles, qtl, distribution_function,
                         *distribution_function_parameters, multiplicity):
    allele_effects = {}
    for locus in qtl:
        allele_effects[locus] = {}
        for allele in alleles[locus]:
            allele_effects[locus][allele] = sum([distribution_function(*distribution_function_parameters) 
                                      for i in range(multiplicity)])
    return allele_effects

In [17]:
allele_effects = assign_allele_effects(alleles, qtl, random.expovariate, 1, multiplicity=3)

In [18]:
allele_effects

{137: {1: 3.078821273322924, 3: 0.5829712695936001},
 688: {2: 2.969000689332179, 3: 3.6857109128806576},
 787: {1: 3.305930137340206, 3: 3.3562554392930437},
 824: {2: 1.1965961388117203, 3: 3.338712110348264},
 894: {0: 3.443208307676648, 2: 3.320355105475178},
 983: {1: 1.1699849842081493, 3: 3.2272875444749127},
 1022: {1: 3.886448775272619, 3: 1.1265398514212266},
 1026: {1: 0.9450591255805005, 2: 5.266496403224768},
 1232: {0: 6.9604205283702365, 2: 4.000834343924458},
 1276: {2: 0.6667751598860474, 3: 2.630377450799637}}

In [19]:
trait['number_of_qtl'] = len(qtl)
trait['qtl'] = qtl
trait['allele_effects'] = allele_effects
trait['allele_effect_distribution'] = random.expovariate.__name__
trait['heritability'] = 0.7

In [20]:
heritability = 0.7

In [21]:
def assign_additive_g(pop, qtl, allele_effects):
    """
    Calculates genotypic contribution ``g`` by summing the effect of each
    allele at each QTL triplet.
    """
    for ind in pop.individuals():
        genotypic_contribution = \
            sum([
                    allele_effects[locus][ind.genotype(ploidy=0)[locus]] +\
                    allele_effects[locus][ind.genotype(ploidy=1)[locus]]
                 for locus
                 in qtl])
        ind.g = genotypic_contribution

In [22]:
assign_additive_g(magic1478, qtl, allele_effects)

In [24]:
def calculate_error_variance(pop, heritability):
    """
    Calculates the parameter ``epsilon`` to be used as the variance
    of the error distribution. The error distribution generates noise
    found in real experiments.
    """
    variance_of_g = np.var(pop.indInfo('g'))
    epsilon = variance_of_g*(1/heritability - 1)
    pop.dvars().epsilon = epsilon

In [25]:

def phenotypic_effect_calculator(pop):
    """
    Simulate measurement error by adding random error to genotypic
    contribution.
    """
    for ind in pop.individuals():
        ind.p = ind.g + random.normalvariate(0, pop.dvars().epsilon)

In [26]:
calculate_error_variance(magic1478, heritability)

In [27]:
magic1478.dvars().epsilon

7.7486364405234669

In [28]:
phenotypic_effect_calculator(magic1478)

In [29]:
synthesis_parameters = shelve.open('synthesis_parameters')
#synthesis_parameters['prefounder_names'] = prefounder_names

In [None]:
#synthesis_parameters['founders'] = simulation_parameters['founders']
#synthesis_parameters['operating_population_size'] = 2000
#synthesis_parameters['snp_to_integer'] = simulation_parameters['snp_to_integer']
#synthesis_parameters['integer_to_snp'] = simulation_parameters['integer_to_snp']
#synthesis_parameters['prefounder_file_name'] = 'prefounders_1478.pop'
#synthesis_parameters['mating_scheme'] = 'MAGIC'

In [30]:
allele_effects

{137: {1: 3.078821273322924, 3: 0.5829712695936001},
 688: {2: 2.969000689332179, 3: 3.6857109128806576},
 787: {1: 3.305930137340206, 3: 3.3562554392930437},
 824: {2: 1.1965961388117203, 3: 3.338712110348264},
 894: {0: 3.443208307676648, 2: 3.320355105475178},
 983: {1: 1.1699849842081493, 3: 3.2272875444749127},
 1022: {1: 3.886448775272619, 3: 1.1265398514212266},
 1026: {1: 0.9450591255805005, 2: 5.266496403224768},
 1232: {0: 6.9604205283702365, 2: 4.000834343924458},
 1276: {2: 0.6667751598860474, 3: 2.630377450799637}}

In [31]:
trait['allele_effects']

{137: {1: 3.078821273322924, 3: 0.5829712695936001},
 688: {2: 2.969000689332179, 3: 3.6857109128806576},
 787: {1: 3.305930137340206, 3: 3.3562554392930437},
 824: {2: 1.1965961388117203, 3: 3.338712110348264},
 894: {0: 3.443208307676648, 2: 3.320355105475178},
 983: {1: 1.1699849842081493, 3: 3.2272875444749127},
 1022: {1: 3.886448775272619, 3: 1.1265398514212266},
 1026: {1: 0.9450591255805005, 2: 5.266496403224768},
 1232: {0: 6.9604205283702365, 2: 4.000834343924458},
 1276: {2: 0.6667751598860474, 3: 2.630377450799637}}

In [32]:
alleles

array([[1, 2],
       [1, 3],
       [3, 1],
       ..., 
       [1, 0],
       [3, 0],
       [3, 1]], dtype=int64)

In [33]:
def generate_allele_effects_table(qtl, alleles, allele_effects):
    """
    Creates a simple pd.DataFrame for allele effects. Hard-coded
    for bi-allelic case.
    
    :parameter list qtl: List of loci declared as QTL
    :parameter np.array alleles: Array of alleles at each locus
    :parameter dict allele_effects: Mapping of effects for alleles at each QTLocus
    
    """
    ae_table = {
        'locus': [],
        'alpha_allele': [],
        'alpha_effect': [],
        'beta_allele': [],
        'beta_effect': [],
    }

    for locus in qtl:
        ae_table['locus'].append(locus)
        alpha_allele, beta_allele = alleles[locus]
        ae_table['alpha_allele'].append(alpha_allele)
        ae_table['beta_allele'].append(beta_allele)
        alpha_effect = allele_effects[locus][alpha_allele]
        ae_table['alpha_effect'].append(alpha_effect)
        beta_effect = allele_effects[locus][beta_allele]
        ae_table['beta_effect'].append(beta_effect)
    order_of_columns = ['locus', 'alpha_allele', 'alpha_effect', 'beta_allele', 'beta_effect']
    allele_effect_frame = pd.DataFrame(ae_table, columns=order_of_columns)
    return allele_effect_frame

In [34]:
aeframe = generate_allele_effects_table(qtl, alleles, allele_effects)

In [35]:
aeframe

Unnamed: 0,locus,alpha_allele,alpha_effect,beta_allele,beta_effect
0,137,3,0.582971,1,3.078821
1,688,2,2.969001,3,3.685711
2,787,1,3.30593,3,3.356255
3,824,2,1.196596,3,3.338712
4,894,2,3.320355,0,3.443208
5,983,1,1.169985,3,3.227288
6,1022,3,1.12654,1,3.886449
7,1026,1,0.945059,2,5.266496
8,1232,2,4.000834,0,6.960421
9,1276,2,0.666775,3,2.630377


In [36]:
trait['epsilon'] = magic1478.dvars().epsilon

In [37]:
segregating_loci = magic1478.dvars().segSites

In [63]:
import importlib as imp
imp.reload(analyze)

<module 'saegus.analyze' from 'c:\\Anaconda3\\lib\\site-packages\\saegus\\analyze.py'>

In [39]:
af = analyze.allele_data(magic1478, alleles, list(range(1478)))

In [None]:
pca = analyze.PCA(magic1478, segregating_loci, np.array(af['minor_allele']))

In [None]:
ccm = pca.calculate_count_matrix('run_daoko_girl_minor_cm.txt')

In [None]:
af['minor_allele']

In [None]:
eigendata = pca.svd(ccm)

In [None]:
segregating_frame = pd.DataFrame(segregating_data, columns=['segregating_locus', 'minor_allele', 'frequency'])

In [None]:
segregating_loci = np.array(magic1478.dvars().segSites)

In [132]:
import importlib as imp
imp.reload(analyze)

<module 'saegus.analyze' from 'c:\\Anaconda3\\lib\\site-packages\\saegus\\analyze.py'>

In [133]:
gwas = analyze.GWAS(magic1478, segregating_loci, list(af['minor_allele']), 'daoko_girl')

In [134]:
gwas

<saegus.analyze.GWAS at 0x1b631978>

In [135]:
ccm = gwas.calculate_count_matrix('daoko_girl_ma_count.txt')

In [136]:
ps_svd = gwas.pop_struct_svd(ccm)

In [137]:
ps_m = gwas.population_structure_formatter(ps_svd, 'daoko_girl_structure.txt')

In [138]:
synthesis_parameters = shelve.open('synthesis_parameters')
int_to_snp_map = synthesis_parameters['integer_to_snp']
synthesis_parameters.close()

In [139]:
int_to_snp_map

{0: 'A', 1: 'C', 2: 'G', 3: 'T', 4: '-', 5: '+'}

In [140]:
hmap = gwas.hapmap_formatter(int_to_snp_map, 'daoko_girl_hapmap.txt')

In [141]:
ks_m = gwas.calc_kinship_matrix(ccm, af, 'daoko_girl_kinship_matrix.txt')

In [142]:
trait_vector = pd.DataFrame(np.array([magic1478.indInfo('ind_id'),magic1478.indInfo('p')]).T)

In [143]:
trait_vector.to_csv('daoko_girl_trait_vector.txt', sep=' ', index=False, header=False)

In [144]:
max(segregating_loci)

1477

In [145]:
intermediate_data = shelve.open('daoko_girl_debug_data')
intermediate_data['allele_frequencies'] = af
intermediate_data['g'] = np.array(magic1478.indInfo('g'))
intermediate_data['p'] = np.array(magic1478.indInfo('p'))
intermediate_data['segregating_loci'] = segregating_loci
intermediate_data['run_name'] = 'daoko_girl'

In [146]:
trait.close()
#analysis_parameters.close()
intermediate_data.close()