## Run: daoko_girl

In [1]:
import pytest
import simuOpt
simuOpt.setOptions(alleleType='short', numThreads=4, quiet=True)
import simuPOP as sim
import pandas as pd
from saegus import breed, operators, simulate, analyze, parse, parameters
import shelve
import numpy as np
import random
np.set_printoptions(suppress=True, precision=3)

### Top Level Definitions

In [2]:
trait_parameter_set = shelve.open('daoko_girl_trait_parameters')
list(trait_parameter_set)

['epsilon',
 'number_of_qtl',
 'qtl',
 'multiplicity',
 'distribution_parameters',
 'allele_effect_distribution',
 'allele_effects',
 'heritability']

In [3]:
run_id = 'daoko_girl'
trait_parameter_set = run_id + '_trait_parameters'
analysis_parameter_set = run_id + '_analysis_parameters'

In [4]:
analysis_parameters = shelve.open(analysis_parameter_set)
analysis_parameters['population_name'] = run_id
analysis_parameters['scenario'] = 'random_mating'
analysis_parameters['generations'] = 3
analysis_parameters['run_identifier'] = run_id
analysis_parameters['operating_population_size'] = 2000

In [5]:
trait = shelve.open(trait_parameter_set)
trait['allele_effect_distribution'] = random.expovariate.__name__
trait['distribution_parameters'] = 1
trait['multiplicity'] = 3
trait['heritability'] = 0.7

### File Names

In [6]:
base_population_file_name = "populations\\magic_1478.pop"
genetic_map_file_name = "parameters\\genetic_map_1478.hdf"
allele_file_name = "parameters\\alleles_at_1478_loci.hdf"

### Genotype Data

In [7]:
genetic_map = pd.read_hdf(genetic_map_file_name)
alleles = np.array(pd.read_hdf(allele_file_name))
recombination_rates = np.array(list(genetic_map['recom_rate']))
number_qt_loci = 10

### Quantitative Trait

In [8]:
base_population = sim.loadPopulation(base_population_file_name)

In [9]:
base_population.setSubPopName(run_id, 0)

### Create Analysis Population

In [10]:
sim.tagID(base_population, reset=False)

In [11]:
random_mater = breed.MAGIC(base_population, recombination_rates)

In [12]:
random_mater.interim_random_mating(analysis_parameters['generations']
                                   ,analysis_parameters['operating_population_size'])

Initiating interim random mating for 3 generations.
Generation: 3
Generation: 4
Generation: 5


In [13]:
sim.stat(base_population, alleleFreq=sim.ALL_AVAIL)
sim.stat(base_population, numOfSegSites=sim.ALL_AVAIL, vars=['segSites', 'numOfSegSites'])

In [14]:
qtl = sorted(random.sample(base_population.dvars().segSites, number_qt_loci))

In [15]:
additive_trait = parameters.Trait()

In [16]:
aes = additive_trait.assign_allele_effects(alleles, qtl, random.expovariate, 1, multiplicity=3)

In [17]:
aes

{155: {0: 5.246355642444491, 2: 4.4593782658468335},
 420: {2: 2.259192368494645, 3: 3.092229777105861},
 646: {1: 4.785768179045667, 2: 3.9965290715630735},
 667: {2: 3.514687471834254, 3: 4.566962428451008},
 925: {0: 2.531262958045664, 2: 3.105306129011896},
 1093: {1: 6.0877450839025204, 2: 3.8672779857240007},
 1132: {2: 3.139314451285914, 3: 2.354233854620061},
 1178: {1: 2.775562895221297, 2: 3.1281122552083684},
 1191: {2: 6.586549033287626, 3: 0.8933349309649996},
 1445: {1: 6.085636913676679, 3: 3.277924371568076}}

In [19]:
heritability = 0.7

In [78]:
operators.assign_additive_g(base_population, qtl, aes)

In [141]:
allele_effects_table = population_sample_analyzer(base_population, 300, 10, alleles, random.expovariate, 1, multiplicity=3,
                          heritability=0.7, run_id="daoko_girl")

In [144]:
reread_ae_table = pd.read_hdf("C:\\tassel\\output\\allele_effects_table_200.hdf")

In [149]:
saegus_to_tassel_loci[215]

114

In [153]:
remapped_loci = [saegus_to_tassel_loci[reread_ae_table['locus'][i]] for i in range(10)]

In [154]:
remapped_loci

[114, 183, 363, 405, 420, 423, 503, 634, 690, 780]

In [155]:
reread_ae_table['locus'] = remapped_loci

In [157]:
reread_ae_table.to_hdf("allele_effects_table_200.hdf", "daoko")

In [None]:
#synthesis_parameters['founders'] = simulation_parameters['founders']
#synthesis_parameters['operating_population_size'] = 2000
#synthesis_parameters['snp_to_integer'] = simulation_parameters['snp_to_integer']
#synthesis_parameters['integer_to_snp'] = simulation_parameters['integer_to_snp']
#synthesis_parameters['prefounder_file_name'] = 'prefounders_1478.pop'
#synthesis_parameters['mating_scheme'] = 'MAGIC'

In [23]:
aeframe = analyze.generate_allele_effects_table(qtl, alleles, aes)

In [25]:
trait['epsilon'] = base_population.dvars().epsilon

In [61]:
gwas = analyze.GWAS(base_population, segregating_loci, np.array(af['minor_allele']), run_id)

In [67]:
ps_m = gwas.population_structure_formatter(ps_svd, indir+'daoko_girl_population_structure.txt')

In [68]:
synthesis_parameters = shelve.open('synthesis_parameters')
int_to_snp_map = synthesis_parameters['integer_to_snp']
synthesis_parameters.close()

In [69]:
hmap = gwas.hapmap_formatter(int_to_snp_map, indir+'daoko_girl_hapmap.txt')

In [70]:
phenos = gwas.trait_formatter(indir+'daoko_girl_phenotype_vector.txt')

In [71]:
ks_m = gwas.calc_kinship_matrix(ccm, af, indir+'daoko_girl_kinship_matrix.txt')

In [None]:
trait.close()
#analysis_parameters.close()
intermediate_data.close()

### `saegus_to_tassel_loci`:
    Takes a locus from segregating_loci and returns corresponding locus for TASSEL

### `saegus_to_tassel_loci`:
    Takes a locus from rs column of TASSEL output and returns corresponding segregating locus in saegus

In [73]:
saegus_to_tassel_loci = {}
tassel_to_saegus_loci = {}
for idx, locus in enumerate(segregating_loci):
    saegus_to_tassel_loci[locus] = idx
    tassel_to_saegus_loci[idx] = locus

In [74]:
segregating_frqs = [base_population.dvars().alleleFreq[seg_loc] for seg_loc in segregating_loci]

In [76]:
aeframe.to_hdf(indir+'daoko_girl_allele_effects_table.hdf', 'aeframe')

In [None]:
analysis_parameters['sample_size'] = 100
rm_sample = sim.sampling.drawRandomSample(base_population, sizes=analysis_parameters['sample_size'])
analysis_parameters['sampled_ind_ids'] = list(rm_sample.indInfo('ind_id'))

In [None]:
sim.stat(rm_sample, numOfSegSites=sim.ALL_AVAIL, vars=['segSites'])
sim.stat(rm_sample, alleleFreq=sim.ALL_AVAIL)

In [None]:
rm_sample.dvars()

In [None]:
analysis_parameters['sample_segregating_loci'] = rm_sample.dvars().segSites
analysis_parameters['sample_allele_frequencies'] = dict(rm_sample.dvars().alleleFreq)
analysis_parameters['saegus_to_tassel_loci'] = saegus_to_tassel_loci
analysis_parameters['tassel_to_saegus_loci'] = tassel_to_saegus_loci

In [None]:
analysis_parameters.close()

In [130]:
import importlib as imp
imp.reload(analyze)

<module 'saegus.analyze' from 'c:\\Anaconda3\\lib\\site-packages\\saegus\\analyze.py'>

In [90]:
gwas.generate_tassel_gwas_configs("C:\\tassel\\bin\\daoko_girl_", 
                                     "C:\\tassel\\input\\daoko_girl_",
                                    "C:\\tassel\\output\\daoko_girl_",
                                    "C:\\Users\DoubleDanks\\BISB\\wisser\\code\\rjwlab-scripts\\saegus_project\\devel\\magic\\1478\\daoko_girl_gwas_pipeline.xml")