## Run: daoko_girl

In [1]:
import pytest
import simuOpt
simuOpt.setOptions(alleleType='short', numThreads=4, quiet=True)
import simuPOP as sim
import pandas as pd
from saegus import breed, operators, simulate, analyze, parse, parameters
import shelve
import numpy as np
import random
np.set_printoptions(suppress=True, precision=3)

In [None]:
import importlib as imp
imp.reload(analyze)

### Top Level Definitions

In [2]:
trait_parameter_set = shelve.open('daoko_girl_trait_parameters')
list(trait_parameter_set)

['number_of_qtl',
 'qtl',
 'distribution_parameters',
 'allele_effects',
 'heritability',
 'allele_effect_distribution',
 'multiplicity',
 'epsilon']

In [3]:
run_id = 'daoko_girl'
trait_parameter_set = run_id + '_trait_parameters'
analysis_parameter_set = run_id + '_analysis_parameters'

In [4]:
analysis_parameters = shelve.open(analysis_parameter_set)
analysis_parameters['population_name'] = run_id
analysis_parameters['scenario'] = 'random_mating'
analysis_parameters['generations'] = 3
analysis_parameters['run_identifier'] = run_id
analysis_parameters['operating_population_size'] = 2000

In [5]:
trait = shelve.open(trait_parameter_set)
trait['allele_effect_distribution'] = random.expovariate.__name__
trait['distribution_parameters'] = 1
trait['multiplicity'] = 3
trait['heritability'] = 0.7

### File Names

In [6]:
base_population_file_name = "populations\\magic_1478.pop"
genetic_map_file_name = "parameters\\genetic_map_1478.hdf"
allele_file_name = "parameters\\alleles_at_1478_loci.hdf"

### Genotype Data

In [7]:
genetic_map = pd.read_hdf(genetic_map_file_name)
alleles = np.array(pd.read_hdf(allele_file_name))
recombination_rates = np.array(list(genetic_map['recom_rate']))
number_qt_loci = 10

### Quantitative Trait

In [8]:
base_population = sim.loadPopulation(base_population_file_name)

In [9]:
base_population.setSubPopName(run_id, 0)

### Create Analysis Population

In [10]:
sim.tagID(base_population, reset=False)

In [11]:
random_mater = breed.MAGIC(base_population, recombination_rates)

In [12]:
random_mater.interim_random_mating(analysis_parameters['generations']
                                   ,analysis_parameters['operating_population_size'])

Initiating interim random mating for 3 generations.
Generation: 3
Generation: 4
Generation: 5


In [13]:
sim.stat(base_population, alleleFreq=sim.ALL_AVAIL)
sim.stat(base_population, numOfSegSites=sim.ALL_AVAIL, vars=['segSites', 'numOfSegSites'])

In [14]:
qtl = sorted(random.sample(base_population.dvars().segSites, number_qt_loci))

In [15]:
additive_trait = parameters.Trait()

In [16]:
aes = additive_trait.assign_allele_effects(alleles, qtl, random.expovariate, 1, multiplicity=3)

In [17]:
aes

{42: {1: 1.393701239159606, 2: 3.184295015795534},
 127: {1: 5.367228389288934, 3: 1.3078061708495794},
 449: {1: 8.035747688940756, 3: 0.6976499336874429},
 611: {0: 2.519242596821685, 2: 0.6305183368377753},
 621: {1: 4.386457733737859, 3: 4.030080017077876},
 705: {0: 2.556492893713342, 2: 2.4893106817937616},
 714: {1: 2.09885237770004, 2: 4.887870521417468},
 930: {1: 2.0592992333167435, 2: 3.567773234808219},
 1018: {1: 3.0368174994984773, 3: 5.546120917748954},
 1255: {1: 1.3620001555727523, 3: 0.9369738103146801}}

In [27]:
heratability = 0.7

In [None]:
operators.assign_additive_g(base_population, qtl, aes)

<module 'saegus.parameters' from 'c:\\Anaconda3\\lib\\site-packages\\saegus\\parameters.py'>

In [35]:
qtl

[42, 127, 449, 611, 621, 705, 714, 930, 1018, 1255]

In [85]:
import importlib as imp
imp.reload(analyze)
imp.reload(parameters)

<module 'saegus.parameters' from 'c:\\Anaconda3\\lib\\site-packages\\saegus\\parameters.py'>

# Main Analysis Engine

In [86]:
segregating_loci, allele_effects_table = analyze.population_sample_analyzer(base_population, 200, qtl, 
                                                  alleles, aes, heratability)

In [87]:
allele_effects_table

Unnamed: 0,locus,alpha_allele,alpha_effect,beta_allele,beta_effect
0,42,2,3.184295,1,1.393701
1,127,3,1.307806,1,5.367228
2,449,3,0.69765,1,8.035748
3,611,2,0.630518,0,2.519243
4,621,3,4.03008,1,4.386458
5,705,2,2.489311,0,2.556493
6,714,1,2.098852,2,4.887871
7,930,2,3.567773,1,2.059299
8,1018,3,5.546121,1,3.036817
9,1255,1,1.362,3,0.936974


In [82]:
expanded_ae_table = analyze.remap_ae_table_loci(allele_effects_table, saegus_to_tassel_loci)

In [88]:
saegus_to_tassel_loci[]

{1: 0,
 2: 1,
 4: 2,
 5: 3,
 6: 4,
 8: 5,
 10: 6,
 12: 7,
 13: 8,
 15: 9,
 16: 10,
 20: 11,
 21: 12,
 23: 13,
 24: 14,
 25: 15,
 26: 16,
 28: 17,
 29: 18,
 31: 19,
 32: 20,
 34: 21,
 36: 22,
 38: 23,
 40: 24,
 42: 25,
 44: 26,
 45: 27,
 46: 28,
 47: 29,
 49: 30,
 50: 31,
 54: 32,
 55: 33,
 56: 34,
 58: 35,
 60: 36,
 61: 37,
 62: 38,
 68: 39,
 70: 40,
 74: 41,
 77: 42,
 82: 43,
 85: 44,
 86: 45,
 94: 46,
 95: 47,
 99: 48,
 100: 49,
 102: 50,
 103: 51,
 105: 52,
 106: 53,
 107: 54,
 108: 55,
 109: 56,
 114: 57,
 115: 58,
 116: 59,
 119: 60,
 120: 61,
 121: 62,
 122: 63,
 123: 64,
 124: 65,
 126: 66,
 127: 67,
 128: 68,
 130: 69,
 132: 70,
 134: 71,
 137: 72,
 138: 73,
 139: 74,
 140: 75,
 141: 76,
 144: 77,
 145: 78,
 146: 79,
 147: 80,
 149: 81,
 152: 82,
 153: 83,
 155: 84,
 156: 85,
 158: 86,
 159: 87,
 163: 88,
 165: 89,
 168: 90,
 171: 91,
 173: 92,
 174: 93,
 176: 94,
 178: 95,
 184: 96,
 185: 97,
 186: 98,
 187: 99,
 189: 100,
 191: 101,
 192: 102,
 193: 103,
 194: 104,
 195: 105,

In [48]:
allele_effects_table.locus

0     25
1     67
2    251
3    347
4    353
5    410
6    416
7    536
8    590
9    729
Name: locus, dtype: int64

In [None]:
reread_ae_table = pd.read_hdf("C:\\tassel\\output\\allele_effects_table_100.hdf")

In [None]:
remapped_loci = [saegus_to_tassel_loci[reread_ae_table['locus'][i]] for i in range(10)]

In [None]:
remapped_loci

In [None]:
reread_ae_table['locus'] = remapped_loci

In [None]:
reread_ae_table.to_hdf("allele_effects_table_200.hdf", "daoko")

In [None]:
#synthesis_parameters['founders'] = simulation_parameters['founders']
#synthesis_parameters['operating_population_size'] = 2000
#synthesis_parameters['snp_to_integer'] = simulation_parameters['snp_to_integer']
#synthesis_parameters['integer_to_snp'] = simulation_parameters['integer_to_snp']
#synthesis_parameters['prefounder_file_name'] = 'prefounders_1478.pop'
#synthesis_parameters['mating_scheme'] = 'MAGIC'

In [37]:
saegus_to_tassel_loci = {}
tassel_to_saegus_loci = {}
for idx, locus in enumerate(segregating_loci):
    saegus_to_tassel_loci[locus] = idx
    tassel_to_saegus_loci[idx] = locus

In [None]:
segregating_frqs = [base_population.dvars().alleleFreq[seg_loc] for seg_loc in segregating_loci]

In [None]:
aeframe.to_hdf(indir+'daoko_girl_allele_effects_table.hdf', 'aeframe')

In [None]:
analysis_parameters['sample_size'] = 100
rm_sample = sim.sampling.drawRandomSample(base_population, sizes=analysis_parameters['sample_size'])
analysis_parameters['sampled_ind_ids'] = list(rm_sample.indInfo('ind_id'))

In [None]:
sim.stat(rm_sample, numOfSegSites=sim.ALL_AVAIL, vars=['segSites'])
sim.stat(rm_sample, alleleFreq=sim.ALL_AVAIL)

In [None]:
rm_sample.dvars()

In [None]:
analysis_parameters['sample_segregating_loci'] = rm_sample.dvars().segSites
analysis_parameters['sample_allele_frequencies'] = dict(rm_sample.dvars().alleleFreq)
analysis_parameters['saegus_to_tassel_loci'] = saegus_to_tassel_loci
analysis_parameters['tassel_to_saegus_loci'] = tassel_to_saegus_loci

In [None]:
analysis_parameters.close()

In [None]:
import importlib as imp
imp.reload(analyze)

In [None]:
gwas.generate_tassel_gwas_configs("C:\\tassel\\bin\\daoko_girl_", 
                                     "C:\\tassel\\input\\daoko_girl_",
                                    "C:\\tassel\\output\\daoko_girl_",
                                    "C:\\Users\DoubleDanks\\BISB\\wisser\\code\\rjwlab-scripts\\saegus_project\\devel\\magic\\1478\\daoko_girl_gwas_pipeline.xml")