## Run: daoko_girl

In [1]:
import pytest
import simuOpt
simuOpt.setOptions(alleleType='short', numThreads=4, quiet=True)
import simuPOP as sim
import pandas as pd
from saegus import breed, operators, simulate, analyze, parse, parameters
import shelve
import numpy as np
import random
np.set_printoptions(suppress=True, precision=3)

### Top Level Definitions

In [2]:
trait_parameter_set = shelve.open('daoko_girl_trait_parameters')
list(trait_parameter_set)

['epsilon',
 'number_of_qtl',
 'qtl',
 'multiplicity',
 'distribution_parameters',
 'allele_effect_distribution',
 'allele_effects',
 'heritability']

In [3]:
run_id = 'daoko_girl'
trait_parameter_set = run_id + '_trait_parameters'
analysis_parameter_set = run_id + '_analysis_parameters'

In [4]:
analysis_parameters = shelve.open(analysis_parameter_set)
analysis_parameters['population_name'] = run_id
analysis_parameters['scenario'] = 'random_mating'
analysis_parameters['generations'] = 3
analysis_parameters['run_identifier'] = run_id
analysis_parameters['operating_population_size'] = 2000

In [5]:
trait = shelve.open(trait_parameter_set)
trait['allele_effect_distribution'] = random.expovariate.__name__
trait['distribution_parameters'] = 1
trait['multiplicity'] = 3
trait['heritability'] = 0.7

### File Names

In [6]:
base_population_file_name = "populations\\magic_1478.pop"
genetic_map_file_name = "parameters\\genetic_map_1478.hdf"
allele_file_name = "parameters\\alleles_at_1478_loci.hdf"

### Genotype Data

In [7]:
genetic_map = pd.read_hdf(genetic_map_file_name)
alleles = np.array(pd.read_hdf(allele_file_name))
recombination_rates = np.array(list(genetic_map['recom_rate']))
number_qt_loci = 10

### Quantitative Trait

In [8]:
base_population = sim.loadPopulation(base_population_file_name)

In [9]:
base_population.setSubPopName(run_id, 0)

### Create Analysis Population

In [10]:
sim.tagID(base_population, reset=False)

In [11]:
random_mater = breed.MAGIC(base_population, recombination_rates)

In [12]:
random_mater.interim_random_mating(analysis_parameters['generations']
                                   ,analysis_parameters['operating_population_size'])

Initiating interim random mating for 3 generations.
Generation: 3
Generation: 4
Generation: 5


In [13]:
sim.stat(base_population, alleleFreq=sim.ALL_AVAIL)
sim.stat(base_population, numOfSegSites=sim.ALL_AVAIL, vars=['segSites', 'numOfSegSites'])

In [14]:
qtl = sorted(random.sample(base_population.dvars().segSites, number_qt_loci))

In [15]:
additive_trait = parameters.Trait()

In [16]:
aes = additive_trait.assign_allele_effects(alleles, qtl, random.expovariate, 1, multiplicity=3)

In [17]:
aes

{155: {0: 5.246355642444491, 2: 4.4593782658468335},
 420: {2: 2.259192368494645, 3: 3.092229777105861},
 646: {1: 4.785768179045667, 2: 3.9965290715630735},
 667: {2: 3.514687471834254, 3: 4.566962428451008},
 925: {0: 2.531262958045664, 2: 3.105306129011896},
 1093: {1: 6.0877450839025204, 2: 3.8672779857240007},
 1132: {2: 3.139314451285914, 3: 2.354233854620061},
 1178: {1: 2.775562895221297, 2: 3.1281122552083684},
 1191: {2: 6.586549033287626, 3: 0.8933349309649996},
 1445: {1: 6.085636913676679, 3: 3.277924371568076}}

In [19]:
heritability = 0.7

In [78]:
operators.assign_additive_g(base_population, qtl, aes)

In [139]:
def population_sample_analyzer(full_population, sample_size, number_of_qtl, alleles,
                             dist_function, *dist_func_parameters,
                             multiplicity=3, heritability=0.7, run_id='daoko_girl', **kwargs):
    
    sample_population = sim.sampling.drawRandomSample(full_population, sizes=sample_size)
    sim.stat(sample_population, alleleFreq=sim.ALL_AVAIL)
    sim.stat(sample_population, numOfSegSites=sim.ALL_AVAIL, vars=['segSites', 'numOfSegSites'])
    segregating_loci = sample_population.dvars().segSites
    quantitative_trait_loci = sorted(random.sample(sample_population.dvars().segSites, number_of_qtl))
    add_trait = parameters.Trait()
    aes = add_trait.assign_allele_effects(alleles, quantitative_trait_loci, dist_function,
                               *dist_func_parameters, multiplicity=multiplicity)
    aes_table = analyze.generate_allele_effects_table(quantitative_trait_loci, alleles, aes)
    operators.assign_additive_g(full_population, quantitative_trait_loci, aes)
    operators.calculate_error_variance(sample_population, heritability)
    operators.phenotypic_effect_calculator(sample_population)
    af = analyze.allele_data(sample_population, alleles, range(sample_population.totNumLoci()))
    
    gwas = analyze.GWAS(sample_population, segregating_loci, np.array(af['minor_allele']), run_id)
    
    indir = "C:\\tassel\\input\\"
    ccm = gwas.calculate_count_matrix(indir+'daoko_girl_MAC.txt')
    ps_svd = gwas.pop_struct_svd(ccm)
    ps_m = gwas.population_structure_formatter(ps_svd, indir+'daoko_girl_structure_matrix.txt')
    hmap = gwas.hapmap_formatter(int_to_snp_map, indir+'daoko_girl_simulated_hapmap.txt')
    phenos = gwas.trait_formatter(indir+'daoko_girl_phenotype_vector.txt')
    ks_m = gwas.calc_kinship_matrix(ccm, af, indir+'daoko_girl_kinship_matrix.txt')
    
    gwas.generate_tassel_gwas_configs("C:\\tassel\\bin\\daoko_girl_", 
                                     "C:\\tassel\\input\\daoko_girl_",
                                    "C:\\tassel\\output\\daoko_girl_",
                                    "C:\\Users\DoubleDanks\\BISB\\wisser\\code\\rjwlab-scripts\\" \
                                      "saegus_project\\devel\\magic\\1478\\daoko_girl_gwas_pipeline.xml")
    return aes_table


In [141]:
allele_effects_table = population_sample_analyzer(base_population, 300, 10, alleles, random.expovariate, 1, multiplicity=3,
                          heritability=0.7, run_id="daoko_girl")

In [144]:
reread_ae_table = pd.read_hdf("C:\\tassel\\output\\allele_effects_table_200.hdf")

In [149]:
saegus_to_tassel_loci[215]

114

In [153]:
remapped_loci = [saegus_to_tassel_loci[reread_ae_table['locus'][i]] for i in range(10)]

In [154]:
remapped_loci

[114, 183, 363, 405, 420, 423, 503, 634, 690, 780]

In [155]:
reread_ae_table['locus'] = remapped_loci

In [157]:
reread_ae_table.to_hdf("allele_effects_table_200.hdf", "daoko")

In [None]:
#synthesis_parameters['founders'] = simulation_parameters['founders']
#synthesis_parameters['operating_population_size'] = 2000
#synthesis_parameters['snp_to_integer'] = simulation_parameters['snp_to_integer']
#synthesis_parameters['integer_to_snp'] = simulation_parameters['integer_to_snp']
#synthesis_parameters['prefounder_file_name'] = 'prefounders_1478.pop'
#synthesis_parameters['mating_scheme'] = 'MAGIC'

In [23]:
aeframe = analyze.generate_allele_effects_table(qtl, alleles, aes)

In [25]:
trait['epsilon'] = base_population.dvars().epsilon

In [61]:
gwas = analyze.GWAS(base_population, segregating_loci, np.array(af['minor_allele']), run_id)

In [67]:
ps_m = gwas.population_structure_formatter(ps_svd, indir+'daoko_girl_population_structure.txt')

In [68]:
synthesis_parameters = shelve.open('synthesis_parameters')
int_to_snp_map = synthesis_parameters['integer_to_snp']
synthesis_parameters.close()

In [69]:
hmap = gwas.hapmap_formatter(int_to_snp_map, indir+'daoko_girl_hapmap.txt')

In [70]:
phenos = gwas.trait_formatter(indir+'daoko_girl_phenotype_vector.txt')

In [71]:
ks_m = gwas.calc_kinship_matrix(ccm, af, indir+'daoko_girl_kinship_matrix.txt')

In [None]:
trait.close()
#analysis_parameters.close()
intermediate_data.close()

### `saegus_to_tassel_loci`:
    Takes a locus from segregating_loci and returns corresponding locus for TASSEL

### `saegus_to_tassel_loci`:
    Takes a locus from rs column of TASSEL output and returns corresponding segregating locus in saegus

In [73]:
saegus_to_tassel_loci = {}
tassel_to_saegus_loci = {}
for idx, locus in enumerate(segregating_loci):
    saegus_to_tassel_loci[locus] = idx
    tassel_to_saegus_loci[idx] = locus

In [74]:
segregating_frqs = [base_population.dvars().alleleFreq[seg_loc] for seg_loc in segregating_loci]

In [76]:
aeframe.to_hdf(indir+'daoko_girl_allele_effects_table.hdf', 'aeframe')

In [None]:
analysis_parameters['sample_size'] = 100
rm_sample = sim.sampling.drawRandomSample(base_population, sizes=analysis_parameters['sample_size'])
analysis_parameters['sampled_ind_ids'] = list(rm_sample.indInfo('ind_id'))

In [None]:
sim.stat(rm_sample, numOfSegSites=sim.ALL_AVAIL, vars=['segSites'])
sim.stat(rm_sample, alleleFreq=sim.ALL_AVAIL)

In [None]:
rm_sample.dvars()

In [None]:
analysis_parameters['sample_segregating_loci'] = rm_sample.dvars().segSites
analysis_parameters['sample_allele_frequencies'] = dict(rm_sample.dvars().alleleFreq)
analysis_parameters['saegus_to_tassel_loci'] = saegus_to_tassel_loci
analysis_parameters['tassel_to_saegus_loci'] = tassel_to_saegus_loci

In [None]:
analysis_parameters.close()

In [130]:
import importlib as imp
imp.reload(analyze)

<module 'saegus.analyze' from 'c:\\Anaconda3\\lib\\site-packages\\saegus\\analyze.py'>

In [90]:
gwas.generate_tassel_gwas_configs("C:\\tassel\\bin\\daoko_girl_", 
                                     "C:\\tassel\\input\\daoko_girl_",
                                    "C:\\tassel\\output\\daoko_girl_",
                                    "C:\\Users\DoubleDanks\\BISB\\wisser\\code\\rjwlab-scripts\\saegus_project\\devel\\magic\\1478\\daoko_girl_gwas_pipeline.xml")

In [326]:
def reconfigure_gwas_results(gwas_results_file, q_values_file, delim="\t"):
    gwas_results = pd.read_csv(gwas_results_file, sep=delim)
    gwas_results.drop('Trait', axis=1, inplace=True)
    gwas_results.drop('Pos', axis=1, inplace=True)
    gwas_results.drop(0, axis=0, inplace=True)
    gwas_results = gwas_results.ix[:, 'Marker':'p']
    gwas_results.index = gwas_results.index - 1
    gwas_results.drop('Marker', axis=1, inplace=True)
    qvalues = pd.read_csv(q_values_file, sep=delim)
    qvalues.columns = ['q']
    qvalues.index = qvalues.index - 1
    results = gwas_results.join(qvalues)
    return results

In [327]:
res = reconfigure_gwas_results("C:\\tassel\\output\\daoko_girl_gwas_out_2.txt", 
                               "C:\\tassel\\output\\qvalues_daoko_girl_200.txt")

Unnamed: 0,Chr,df,F,p,q
0,1,2,1.274190,0.28197,0.975103
1,1,2,0.403770,0.66836,0.998361
2,1,2,3.591700,0.02939,0.907438
3,1,2,1.001740,0.36912,0.981454
4,1,2,0.486790,0.61534,0.998361
5,1,2,2.549480,0.08072,0.907438
6,1,2,0.132980,0.87556,0.998361
7,1,2,0.930920,0.39593,0.981454
8,1,2,0.844620,0.43129,0.981454
9,1,2,0.924080,0.39863,0.981454


In [320]:
gwas_results

Unnamed: 0,Chr,df,F,p
0,1,2,0.05091,0.95039
1,1,1,0.71959,0.39839
2,1,2,2.19675,0.11678
3,1,2,0.95987,0.38663
4,1,1,1.64924,0.20215
5,1,2,1.16271,0.31705
6,1,2,0.24170,0.78578
7,1,2,1.41658,0.24762
8,1,2,1.20565,0.30404
9,1,2,1.84678,0.16336


In [201]:
def remap_ae_table_loci(allele_effect_table, saegus_to_tassel_loci):
    remapped_loci = [saegus_to_tassel_loci[allele_effects_table['locus'][i]] for i in range(10)]
    allele_effects_table['locus'] = remapped_loci
    allele_effects_table['difference'] = np.abs(allele_effects_table['alpha_effect'] - 
                                          allele_effects_table['beta_effect'])
    return allele_effects_table

In [None]:
def join_p_and_q(gwas_results_file, q_values_file, sep="\t"):
    pd.read_csv(gwas_results_file, sep=sep,
    gwas_results.index = gwas_results.Marker
    gwas_results.drop('Marker', axis=1, inplace=True)
    
    q_values = pd.

In [321]:
qvalues = pd.read_csv("C:\\tassel\\output\\qvalues_daoko_girl_200.txt", sep='\t')
qvalues.columns = ['q']
qvalues.index = qvalues.index - 1

In [322]:
qvalues

Unnamed: 0,q
0,0.975103
1,0.998361
2,0.907438
3,0.981454
4,0.998361
5,0.907438
6,0.998361
7,0.981454
8,0.981454
9,0.981454


In [323]:
results = gwas_results.join(qvalues)

In [324]:
results

Unnamed: 0,Chr,df,F,p,q
0,1,2,0.05091,0.95039,0.975103
1,1,1,0.71959,0.39839,0.998361
2,1,2,2.19675,0.11678,0.907438
3,1,2,0.95987,0.38663,0.981454
4,1,1,1.64924,0.20215,0.998361
5,1,2,1.16271,0.31705,0.907438
6,1,2,0.24170,0.78578,0.998361
7,1,2,1.41658,0.24762,0.981454
8,1,2,1.20565,0.30404,0.981454
9,1,2,1.84678,0.16336,0.981454


In [287]:
results.index = results['Marker']

In [222]:
results.columns = ['locus', 'Chr', 'df', 'F', 'p', 'q']

In [288]:
results

Unnamed: 0_level_0,Marker,Chr,df,F,p,q
Marker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,0,1,2,0.05091,0.95039,0.998361
1,1,1,1,0.71959,0.39839,0.907438
2,2,1,2,2.19675,0.11678,0.981454
3,3,1,2,0.95987,0.38663,0.998361
4,4,1,1,1.64924,0.20215,0.907438
5,5,1,2,1.16271,0.31705,0.998361
6,6,1,2,0.24170,0.78578,0.981454
7,7,1,2,1.41658,0.24762,0.981454
8,8,1,2,1.20565,0.30404,0.981454
9,9,1,2,1.84678,0.16336,0.998361


In [292]:
tassel_to_saegus_loci[183]

335

In [294]:
aes

{155: {0: 5.246355642444491, 2: 4.4593782658468335},
 420: {2: 2.259192368494645, 3: 3.092229777105861},
 646: {1: 4.785768179045667, 2: 3.9965290715630735},
 667: {2: 3.514687471834254, 3: 4.566962428451008},
 925: {0: 2.531262958045664, 2: 3.105306129011896},
 1093: {1: 6.0877450839025204, 2: 3.8672779857240007},
 1132: {2: 3.139314451285914, 3: 2.354233854620061},
 1178: {1: 2.775562895221297, 2: 3.1281122552083684},
 1191: {2: 6.586549033287626, 3: 0.8933349309649996},
 1445: {1: 6.085636913676679, 3: 3.277924371568076}}

In [293]:
allele_effects_table

Unnamed: 0_level_0,locus,alpha_allele,alpha_effect,beta_allele,beta_effect,difference
locus,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
114,114,1,2.345108,3,3.363427,1.018319
183,183,2,2.577748,0,3.580992,1.003244
363,363,0,1.855103,3,4.429047,2.573944
405,405,4,2.482319,5,3.311027,0.828708
420,420,1,3.100708,3,6.109125,3.008417
423,423,0,1.150356,2,1.791831,0.641475
503,503,2,5.587148,0,3.980416,1.606732
634,634,3,3.912901,1,2.222465,1.690436
690,690,1,3.350831,2,1.331439,2.019392
780,780,2,2.873186,1,2.270707,0.602479


In [None]:
results.join(allele_effects_table, on=)

In [231]:
allele_effects_table

Unnamed: 0,locus,alpha_allele,alpha_effect,beta_allele,beta_effect,difference
0,114,1,2.345108,3,3.363427,1.018319
1,183,2,2.577748,0,3.580992,1.003244
2,363,0,1.855103,3,4.429047,2.573944
3,405,4,2.482319,5,3.311027,0.828708
4,420,1,3.100708,3,6.109125,3.008417
5,423,0,1.150356,2,1.791831,0.641475
6,503,2,5.587148,0,3.980416,1.606732
7,634,3,3.912901,1,2.222465,1.690436
8,690,1,3.350831,2,1.331439,2.019392
9,780,2,2.873186,1,2.270707,0.602479


In [237]:
pd.concat(allele_effects_table, results)

TypeError: first argument must be an iterable of pandas objects, you passed an object of type "DataFrame"

In [251]:
smaller = pd.DataFrame([results.ix[locus, :] for locus in allele_effects_table['locus']])

In [271]:
allele_effects_table

Unnamed: 0_level_0,locus,alpha_allele,alpha_effect,beta_allele,beta_effect,difference
locus,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
114,114,1,2.345108,3,3.363427,1.018319
183,183,2,2.577748,0,3.580992,1.003244
363,363,0,1.855103,3,4.429047,2.573944
405,405,4,2.482319,5,3.311027,0.828708
420,420,1,3.100708,3,6.109125,3.008417
423,423,0,1.150356,2,1.791831,0.641475
503,503,2,5.587148,0,3.980416,1.606732
634,634,3,3.912901,1,2.222465,1.690436
690,690,1,3.350831,2,1.331439,2.019392
780,780,2,2.873186,1,2.270707,0.602479


In [269]:
results

Unnamed: 0,locus,Chr,df,F,p,q
1,0,1,2,0.05091,0.95039,0.975103
2,1,1,1,0.71959,0.39839,0.998361
3,2,1,2,2.19675,0.11678,0.907438
4,3,1,2,0.95987,0.38663,0.981454
5,4,1,1,1.64924,0.20215,0.998361
6,5,1,2,1.16271,0.31705,0.907438
7,6,1,2,0.24170,0.78578,0.998361
8,7,1,2,1.41658,0.24762,0.981454
9,8,1,2,1.20565,0.30404,0.981454
10,9,1,2,1.84678,0.16336,0.981454


In [275]:
pd.concat([allele_effects_table, smaller], axis=1, )

Unnamed: 0_level_0,locus,alpha_allele,alpha_effect,beta_allele,beta_effect,difference,locus,Chr,df,F,p,q
locus,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
114,114,1,2.345108,3,3.363427,1.018319,113,2,2,0.15953,0.85277,0.998361
183,183,2,2.577748,0,3.580992,1.003244,182,2,2,0.36219,0.69711,0.998361
363,363,0,1.855103,3,4.429047,2.573944,362,4,2,0.04505,0.95597,0.998361
405,405,4,2.482319,5,3.311027,0.828708,404,5,2,0.21922,0.80355,0.975847
420,420,1,3.100708,3,6.109125,3.008417,419,5,1,0.39127,0.53312,0.998361
423,423,0,1.150356,2,1.791831,0.641475,422,5,2,0.11928,0.88769,0.981454
503,503,2,5.587148,0,3.980416,1.606732,502,6,2,0.79714,0.45361,0.998361
634,634,3,3.912901,1,2.222465,1.690436,633,7,2,0.88893,0.41449,0.981454
690,690,1,3.350831,2,1.331439,2.019392,689,8,2,0.1797,0.83581,0.975103
780,780,2,2.873186,1,2.270707,0.602479,779,9,1,0.48704,0.48694,0.998361


In [279]:
allele_effects_table

Unnamed: 0_level_0,locus,alpha_allele,alpha_effect,beta_allele,beta_effect,difference
locus,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
114,114,1,2.345108,3,3.363427,1.018319
183,183,2,2.577748,0,3.580992,1.003244
363,363,0,1.855103,3,4.429047,2.573944
405,405,4,2.482319,5,3.311027,0.828708
420,420,1,3.100708,3,6.109125,3.008417
423,423,0,1.150356,2,1.791831,0.641475
503,503,2,5.587148,0,3.980416,1.606732
634,634,3,3.912901,1,2.222465,1.690436
690,690,1,3.350831,2,1.331439,2.019392
780,780,2,2.873186,1,2.270707,0.602479
