# Epsilon

In [1]:
import simuOpt
simuOpt.setOptions(alleleType='short', optimized=True, numThreads=4, quiet=True)
import simuPOP as sim
import pandas as pd
from saegus import breed, operators, simulate, analyze, parse, parameters
import shelve
import numpy as np
import random
import h5py
import collections as col
np.set_printoptions(suppress=True, precision=3)

In [2]:
mg = analyze.MultiGeneration('epsilon')

In [3]:
run_id = 'epsilon'
generations_of_random_mating = 10
number_of_qtl = 10
number_of_replicates = 6
founders = [[2, 26], [3, 25], [4, 24], [5, 23]]
os_per_pair = 500
recombination_rates = [0.01]*1478

In [4]:
prefounders = sim.loadPopulation('bia_prefounders.pop')

In [5]:
sim.tagID(prefounders, reset=True)

In [6]:
alleles = np.array(pd.read_hdf('parameters/alleles_at_1478_loci.hdf'))

### Allele Effects & Frequencies Table

In [9]:
rdm_populations = sim.Simulator(prefounders, 2, stealPops=False)
sel_populations = sim.Simulator(prefounders, 2, stealPops=False)
dri_populations = sim.Simulator(prefounders, 2, stealPops=False)

rdm_magic = breed.MAGIC(rdm_populations, founders, recombination_rates)
sel_magic = breed.MAGIC(sel_populations, founders, recombination_rates)
dri_magic = breed.MAGIC(dri_populations, founders, recombination_rates)

sim.tagID(prefounders, reset=27)

rdm_magic.generate_f_one(founders, os_per_pair)
sel_magic.generate_f_one(founders, os_per_pair)
dri_magic.generate_f_one(founders, os_per_pair)

In [10]:
sim.stat(rdm_populations.population(0), alleleFreq=sim.ALL_AVAIL)
af = analyze.allele_data(rdm_populations.population(0), alleles, list(range(1478)))

In [11]:
minor_alleles = np.asarray(af.minor_allele, dtype=np.int8)

In [12]:
rdm_magic.recombinatorial_convergence(rdm_populations, 4, 500)
sel_magic.recombinatorial_convergence(sel_populations, 4, 500)
dri_magic.recombinatorial_convergence(dri_populations, 4, 500)

Start of recombinatorial convergence.
Prior to convergence: 4
Prior to convergence: 2
Start of recombinatorial convergence.
Prior to convergence: 4
Prior to convergence: 2
Start of recombinatorial convergence.
Prior to convergence: 4
Prior to convergence: 2


In [13]:
study = analyze.Study(run_id)

In [14]:
qtl = sorted(random.sample(tuple(range(1478)), 50))

In [15]:
additive_trait = parameters.Trait()
allele_effects = additive_trait.assign_allele_effects(alleles, qtl, random.expovariate, 1,
                                                     multiplicity=1)

In [16]:
ae_array = additive_trait.convert_allele_effects_into_array(prefounders.totNumLoci(), 6, allele_effects)

In [None]:
ae_array[qtl, :]

In [17]:


##### Components
#Allele frequencies required for the super-table are created from the population allele frequencies and the function `analyze.minor_allele_frequencies_table`. The quantitative allele data is created from the population allele frequencies, allele array, alelle effects array and the function `analyze.generate_allele_effects_table`.
sampling_generations = [i for i in range(2, 10, 2)]
sample_sizes = {i: 100 for i in range(11)}

In [18]:
rdm_meta_populations = {rep: [] for rep in range(2)}
sel_meta_populations = {rep: [] for rep in range(2)}
dri_meta_populations = {rep: [] for rep in range(2)}

In [19]:
rdm_mating = simulate.RandomMating(10, 2000, 0.05, 0.5, 5, 0.7, sample_sizes)

In [None]:
drift = simulate.Drift(10, 2000, 0.05, 0.5, 5, 0.7, sample_sizes)

In [None]:
selection = simulate.Truncation(10, 2000, 0.05, 0.5, 5, 0.7, sample_sizes)

In [20]:
rdm_mating.replicate_random_mating(rdm_populations, rdm_meta_populations, qtl, ae_array, recombination_rates)

Initial: Sampled 100 individuals from generation 0 Replicate: 0.
Initial: Sampled 100 individuals from generation 0 Replicate: 1.
Generation: 0
Generation: 0
Generation: 1
Generation: 1
Generation: 2
Generation: 2
Generation: 3
Generation: 3
Generation: 4
Generation: 4
Generation: 5
Generation: 5
Generation: 6
Generation: 6
Generation: 7
Generation: 7
Generation: 8
Generation: 8
Generation: 9
Generation: 9
Final: Sampled 100 individuals from generation 10
Final: Sampled 100 individuals from generation 10


del rdm_populations

In [None]:
drift.replicate_recurrent_drift(dri_populations, dri_meta_populations, qtl, ae_array, recombination_rates)

In [None]:
selection.replicate_selection(sel_populations, sel_meta_populations, qtl, ae_array, recombination_rates)

del sel_populations

In [None]:
wombo_combo = [rdm_meta_populations, dri_meta_populations, sel_meta_populations]

In [21]:
sets_of_segregating_loci = study.seg_loci_among_samples(rdm_meta_populations)

In [22]:
len(sets_of_segregating_loci.keys())

1

In [23]:
concordant_segregating_loci = list(sets_of_segregating_loci.keys())[0]

In [24]:
concordant_segregating_loci

(1,
 2,
 3,
 4,
 5,
 6,
 8,
 10,
 12,
 13,
 14,
 15,
 16,
 20,
 21,
 23,
 24,
 25,
 26,
 27,
 29,
 30,
 31,
 32,
 34,
 36,
 37,
 38,
 39,
 40,
 42,
 45,
 46,
 50,
 52,
 53,
 54,
 56,
 59,
 61,
 62,
 63,
 66,
 67,
 68,
 70,
 71,
 74,
 76,
 77,
 79,
 80,
 82,
 86,
 88,
 90,
 91,
 95,
 96,
 97,
 98,
 99,
 100,
 101,
 103,
 104,
 106,
 107,
 108,
 109,
 114,
 115,
 116,
 119,
 120,
 121,
 122,
 123,
 124,
 126,
 127,
 128,
 129,
 130,
 132,
 134,
 136,
 138,
 139,
 140,
 141,
 144,
 145,
 146,
 147,
 149,
 152,
 153,
 155,
 158,
 159,
 163,
 165,
 168,
 170,
 171,
 172,
 173,
 174,
 175,
 177,
 181,
 182,
 185,
 186,
 190,
 191,
 192,
 193,
 196,
 198,
 202,
 208,
 209,
 211,
 212,
 214,
 215,
 218,
 219,
 220,
 221,
 223,
 224,
 225,
 227,
 229,
 232,
 235,
 240,
 241,
 246,
 247,
 249,
 250,
 252,
 253,
 254,
 255,
 256,
 257,
 258,
 259,
 261,
 262,
 263,
 264,
 265,
 266,
 268,
 270,
 272,
 274,
 275,
 277,
 278,
 279,
 280,
 282,
 283,
 284,
 285,
 288,
 291,
 292,
 293,
 294,
 297,
 

In [25]:
repz = rdm_meta_populations[0]

In [26]:
analyze.combine_population_samples(repz)

In [28]:
meta_pop = repz[0]

In [29]:
sim.stat(meta_pop, alleleFreq=sim.ALL_AVAIL)

In [30]:
indir = "/home/vakanas/tassel-5-standalone/input/"
outdir = "/home/vakanas/tassel-5-standalone/output/"
rep_id_name = "0"

In [31]:
gwas = analyze.GWAS(meta_pop, list(range(meta_pop.totNumLoci())), run_id)

In [32]:
ccm = gwas.calculate_count_matrix(minor_alleles, list(range(meta_pop.totNumLoci())))

In [33]:
ps_svd = gwas.pop_struct_svd(ccm)

In [34]:
name = run_id+'_'+rep_id_name

In [35]:
gwas.population_structure_formatter(ps_svd, indir+name+'_structure_matrix.txt')

Unnamed: 0,0,1
I16058,-0.039086,-0.025462
I16064,-0.040828,0.052701
I16074,-0.038964,0.040117
I16091,-0.038899,-0.000425
I16096,-0.041705,-0.037641
I16120,-0.038286,0.018488
I16129,-0.038003,0.049646
I16145,-0.037994,0.035441
I16155,-0.039742,-0.001542
I16163,-0.040009,-0.009983


In [36]:
int_to_snp_map = {0:'A', 1:'C', 2:'G', 3:'T', 4:'-', 5:'+'}

In [37]:
locus_names = list(concordant_segregating_loci)

In [38]:
alleles_column = ['NA']*len(concordant_segregating_loci)

In [39]:
chromosomes = [meta_pop.chromLocusPair(locus)[0]+1 for locus in concordant_segregating_loci]

In [40]:
gwas.hapmap_formatter(concordant_segregating_loci, alleles_column, 
                      locus_names, chromosomes, 
                      locus_names, 
                      indir+name+'_simulated_hapmap.txt')

Unnamed: 0,rs,alleles,chrom,pos,strand,assembly,center,protLSID,assayLSID,panelLSID,...,I73333,I73343,I73388,I73420,I73426,I73514,I73533,I73545,I73725,I73883
0,1,,1,1,,,,,,,...,CT,CC,CT,CC,CC,CT,CC,CT,CC,CT
1,2,,1,2,,,,,,,...,TT,TT,TT,TT,CT,TT,TT,TT,CT,TT
2,3,,1,3,,,,,,,...,AA,AA,AA,AG,AG,AG,GG,AA,AG,AA
3,4,,1,4,,,,,,,...,GG,GG,GG,GG,GG,GG,GG,GG,GG,GG
4,5,,1,5,,,,,,,...,AG,AG,AA,AA,AA,AG,AA,AG,AA,AA
5,6,,1,6,,,,,,,...,AA,AG,AG,AA,AA,AG,AA,AG,AA,AG
6,8,,1,8,,,,,,,...,AA,AA,AA,AA,AA,AA,AA,AA,AG,AA
7,10,,1,10,,,,,,,...,TT,CC,TT,CT,TT,CC,TT,CC,CT,CC
8,12,,1,12,,,,,,,...,CC,TT,CC,CT,CT,TT,TT,TT,TT,CT
9,13,,1,13,,,,,,,...,GG,GG,GG,GG,GG,GG,GG,GG,GG,GG


In [50]:
minor_allele_frequency_table = analyze.minor_allele_frequencies_table(
        meta_pop.dvars().alleleFreq, minor_alleles)

In [51]:
minor_allele_frequencies = np.array(minor_allele_frequency_table.minor_frequency)

In [52]:
minor_allele_frequencies

array([ 0.   ,  0.131,  0.144, ...,  0.279,  0.099,  0.   ])

In [43]:
gwas.calc_kinship_matrix(ccm, minor_allele_frequencies, indir+name+'_kinship_matrix.txt')

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,590,591,592,593,594,595,596,597,598,599
I16058,0.931112,0.096208,-0.050910,0.000172,0.061864,0.150748,-0.032740,-0.093017,0.030132,0.040629,...,-0.090871,0.031831,0.002971,-0.021475,-0.032716,0.006986,0.029382,0.024164,0.077561,-0.025103
I16064,0.096208,0.879354,-0.015114,-0.000312,-0.054713,0.074078,-0.105782,0.120546,0.018765,-0.032413,...,-0.007913,-0.001304,-0.084583,-0.040098,-0.025943,-0.011637,0.057922,0.067215,-0.002737,0.032460
I16074,-0.050910,-0.015114,0.911630,0.113780,-0.024063,0.035798,0.033706,-0.030200,-0.052167,-0.038043,...,0.019109,0.025718,-0.017654,-0.002193,0.019218,-0.024522,0.001502,-0.050879,-0.019250,-0.009449
I16091,0.000172,-0.000312,0.113780,0.854164,0.081437,-0.072750,-0.024051,0.013625,-0.019226,-0.070404,...,0.004888,0.040520,0.037055,0.008981,-0.060306,-0.071395,-0.034487,-0.010682,0.013692,0.045260
I16096,0.061864,-0.054713,-0.024063,0.081437,0.850573,-0.116266,-0.038544,0.071690,-0.095394,0.045708,...,0.001278,-0.028392,-0.010089,-0.023652,0.012271,0.015693,-0.092516,-0.007036,0.097152,-0.081698
I16120,0.150748,0.074078,0.035798,-0.072750,-0.116266,0.901363,0.050340,-0.009938,0.116839,0.036638,...,-0.051327,-0.051974,-0.008275,0.021697,-0.043962,0.032019,-0.039911,0.067336,0.008268,-0.043605
I16129,-0.032740,-0.105782,0.033706,-0.024051,-0.038544,0.050340,0.857272,0.013365,-0.092044,0.016406,...,-0.020768,0.000353,0.073074,0.026861,-0.107729,0.011787,0.037811,0.025337,0.002548,0.005093
I16145,-0.093017,0.120546,-0.030200,0.013625,0.071690,-0.009938,0.013365,0.899973,0.065353,0.017803,...,-0.026627,-0.020018,-0.048878,0.108072,-0.015634,-0.001328,0.053720,-0.020429,-0.057730,-0.015277
I16155,0.030132,0.018765,-0.052167,-0.019226,-0.095394,0.116839,-0.092044,0.065353,0.906829,-0.087606,...,-0.008687,0.045085,-0.027310,0.028058,-0.048485,0.016612,0.064404,-0.053280,0.011001,0.024430
I16163,0.040629,-0.032413,-0.038043,-0.070404,0.045708,0.036638,0.016406,0.017803,-0.087606,0.811730,...,-0.012702,-0.031488,-0.020441,-0.004980,0.096245,-0.023682,-0.044821,-0.082690,-0.029293,0.009532


In [44]:
gwas.trait_formatter(indir+name+'_trait_vector.txt')

Unnamed: 0,0,1
0,I16058,142.58579745276688
1,I16064,138.01269206327035
2,I16074,133.31715401884895
3,I16091,154.42181697398638
4,I16096,154.84025911754665
5,I16120,182.100155946399
6,I16129,148.44448752350038
7,I16145,137.41374276692636
8,I16155,157.71296455677395
9,I16163,148.17069211496644


In [45]:
import xml.etree.ElementTree as ET
import lxml.etree as etree

In [46]:
config_file_template = '/home/vakanas/BISB/rjwlab-scripts/saegus_project/devel/magic/1478/gwas_pipeline.xml'

In [47]:
tree = ET.parse(config_file_template)
root = tree.getroot()
lxml_tree = etree.fromstring(ET.tostring(root))
lxml_root = lxml_tree.getroottree()

In [48]:
lxml_root.find('fork1/h').text = indir+name+'_simulated_hapmap.txt'
lxml_root.find('fork2/t').text = indir+name+'_trait_vector.txt'
lxml_root.find('fork3/q').text = indir+name+'_structure_matrix.txt'
lxml_root.find('fork4/k').text = indir+name+'_kinship_matrix.txt'

lxml_root.find('combine6/export').text = outdir+name+'_out_'

In [49]:
lxml_root.write("/home/vakanas/tassel-5-standalone/"+"R"+rep_id_name+'_'+
                run_id+'_'+"_sim_gwas_pipeline.xml",
                encoding="UTF-8",
                method="xml", 
                xml_declaration=True, 
                standalone='',
                pretty_print=True) 

# Run TASSEL at This Point

# Use R Qvalue package to get Qvalues

# Combine TASSEL, Qvalues and Other Information

In [None]:
qvalues = pd.read_csv("/home/vakanas/tassel-5-standalone/output/epsilon_0_qvalues.txt", sep='\t')

In [None]:
qvalues.index = list(concordant_segregating_loci)

In [None]:
qvalues

In [None]:
raw_gwas_results = pd.read_csv("/home/vakanas/tassel-5-standalone/output/epsilon_0_out_2.txt", sep='\t')

In [None]:
raw_gwas_results.drop(0, axis=0, inplace=True)

In [None]:
raw_gwas_results.drop('Trait', axis=1, inplace=True)

In [None]:
raw_gwas_results.index = np.array(list(map(int, raw_gwas_results.Marker)))

In [None]:
raw_gwas_results

In [None]:
raw_gwas_results = raw_gwas_results.join(qvalues)

In [None]:
minor_allele_frequencies

In [None]:
mafrqs = analyze.minor_allele_frequencies_table(meta_pop.dvars().alleleFreq, minor_alleles)

In [None]:
raw_gwas_results = raw_gwas_results.join(mafrqs.ix[concordant_segregating_loci, :])

In [None]:
raw_gwas_results

In [17]:
def tassel_results_tables(gwas_file_name, q_values_file_name, 
                              minor_allele_frequency_table, 
                              quantitative_allele_table):
    raw_gwas_results = pd.read_csv(gwas_file_name, sep='\t')
    raw_gwas_results.drop(0, axis=0, inplace=True)
    raw_gwas_results.drop('Trait', axis=1, inplace=True)
    raw_gwas_results.index = np.array(list(map(int, raw_gwas_results.Marker)))
    q_values = pd.read_csv(q_values_file_name, sep='\t')
    q_values.index = np.array(list(map(int, raw_gwas_results.Marker)))
    raw_gwas_results = raw_gwas_results.join(q_values)
    
    assert minor_allele_frequency_table.index.dtype == raw_gwas_results.index.dtype, "Indexes of these tables are different"
    
    raw_gwas_results = raw_gwas_results.join(minor_allele_frequency_table.ix[raw_gwas_results.index, :])
    
    assert quantitative_allele_table.index.dtype == raw_gwas_results.index.dtype, "Indexes of these tables are different"
    
    raw_gwas_results = raw_gwas_results.join(quantitative_allele_table.ix[raw_gwas_results.index, :])
    return raw_gwas_results

In [3]:
pwd

'/home/vakanas/BISB/rjwlab-scripts/saegus_project/devel/magic/1478'

In [6]:
cd /home/vakanas/tassel-5-standalone/output/

/home/vakanas/tassel-5-standalone/output


In [7]:
ls

epsilon_0_maf_table.txt  epsilon_0_out_2.txt  epsilon_0_quant_allele_table.txt
epsilon_0_out_1.txt      epsilon_0_out_3.txt  epsilon_0_qvalues.txt


In [11]:
mafrqs = pd.read_csv('epsilon_0_maf_table.txt', sep='\t', index_col=0)

In [12]:
mafrqs

Unnamed: 0,minor_allele,minor_frequency
0,2,0.000000
1,3,0.134167
2,1,0.120000
3,2,0.125000
4,0,0.072500
5,2,0.265000
6,2,0.261667
7,1,0.000000
8,2,0.133333
9,3,0.000000


In [13]:
qtad = pd.read_csv('epsilon_0_quant_allele_table.txt', sep='\t', index_col=0)

In [14]:
qtad

Unnamed: 0,alpha,alpha_effect,alpha_frequency,beta,beta_effect,beta_frequency
0,1,0.131916,1.000000,2,0.729528,0.000000
1,1,0.915951,0.865833,3,0.256704,0.134167
2,3,0.000000,0.880000,1,0.000000,0.120000
3,0,0.000000,0.875000,2,0.000000,0.125000
4,2,0.000000,0.927500,0,0.000000,0.072500
5,0,0.000000,0.735000,2,0.000000,0.265000
6,0,0.000000,0.738333,2,0.000000,0.261667
7,3,0.000000,1.000000,1,0.000000,0.000000
8,0,0.000000,0.866667,2,0.000000,0.133333
9,1,0.000000,1.000000,3,0.000000,0.000000


In [19]:
super_table = tassel_results_tables('epsilon_0_out_2.txt', 'epsilon_0_qvalues.txt', mafrqs, qtad)

In [22]:
super_table.ix[super_table.q < 0.05]

Unnamed: 0,Marker,Chr,Pos,df,F,p,add_effect,add_F,add_p,dom_effect,...,-2LnLikelihood,q,minor_allele,minor_frequency,alpha,alpha_effect,alpha_frequency,beta,beta_effect,beta_frequency
983,983,7,983,2,37.8941,3.2332e-16,4.37307,8.45588,0.00377,-1.921,...,3969.62784,2.981721e-13,3,0.118333,1,8.054639,0.881667,3,1.755104,0.118333


In [23]:
super_table.ix[super_table.alpha_effect > 0]

Unnamed: 0,Marker,Chr,Pos,df,F,p,add_effect,add_F,add_p,dom_effect,...,-2LnLikelihood,q,minor_allele,minor_frequency,alpha,alpha_effect,alpha_frequency,beta,beta_effect,beta_frequency
1,1,1,1,2,3.68505,0.02567,3.16621,6.88669,0.00891,2.3548,...,3969.62784,0.7978518,3,0.134167,1,0.915951,0.865833,3,0.256704,0.134167
37,37,1,37,2,0.69912,0.49743,0.10428,0.00935,0.92301,-0.91146,...,3969.62784,0.965672,1,0.1425,3,1.672678,0.8575,1,1.207993,0.1425
88,88,1,88,2,1.02331,0.36003,1.78925,1.84566,0.1748,1.23502,...,3969.62784,0.965672,3,0.116667,1,0.164328,0.883333,3,0.684191,0.116667
138,138,1,138,2,0.2568,0.77361,-0.72241,0.42325,0.51557,0.8217,...,3969.62784,0.965672,1,0.135,3,0.310844,0.865,1,0.597185,0.135
299,299,2,299,2,0.17967,0.83559,0.48669,0.21781,0.64089,0.10188,...,3969.62784,0.965672,3,0.140833,1,0.444588,0.859167,3,0.339358,0.140833
319,319,2,319,2,2.5779,0.07678,-1.0601,0.64925,0.4207,0.55881,...,3969.62784,0.8861087,3,0.12,1,0.456536,0.88,3,1.536323,0.12
328,328,2,328,2,4.0355,0.01816,-0.84462,0.43362,0.51047,-2.7962,...,3969.62784,0.7510557,2,0.12,0,3.20926,0.88,2,1.179975,0.12
348,348,2,348,2,0.11984,0.88708,-0.16351,0.1096,0.74071,0.19051,...,3969.62784,0.965672,3,0.493333,1,0.283417,0.506667,3,0.310107,0.493333
366,366,2,366,2,3.45232,0.03231,2.81061,4.14748,0.04214,-3.8442,...,3969.62784,0.8053221,0,0.105,2,0.699294,0.895,0,0.554707,0.105
474,474,3,474,2,2.23724,0.10765,-4.0213,4.27874,0.03902,-3.4131,...,3969.62784,0.965672,2,0.106667,1,0.517391,0.893333,2,1.206618,0.106667


In [None]:
mg.multiple_sample_analyzer(meta_populations, qtl, allele_effects, 
                            minor_alleles, concordant_segregating_loci)

In [None]:
analyze.store_allele_effect_frequency_tables(meta_population, alleles, 
                                             qtl,
                                             exponential_allele_effects,
                                            run_id, 'exponential')

In [None]:
loci_conversions = shelve.open(run_id+'_loci_conversions')
saegus_to_tassel_loci = {}
tassel_to_saegus_loci = {}
for idx, locus in enumerate(concordant_segregating_loci):
    saegus_to_tassel_loci[locus] = idx
    tassel_to_saegus_loci[idx] = locus
loci_conversions['saegus_to_tassel'] = saegus_to_tassel_loci
loci_conversions['tassel_to_saegus'] = tassel_to_saegus_loci
loci_conversions.close()

In [None]:
seg_loc_storage = shelve.open('segregating_loci_storage')
seg_loc_storage['bacchus'] = concordant_segregating_loci
seg_loc_storage.close()

In [None]:
int_to_snp = {0: 'A', 1: 'C', 2: 'G', 3: 'T', 4: '-', 5: '+'}
snp_to_int = {'A': 0, 'C': 1, '-': 4, 'G': 2, '+': 5, 'T': 3}
conv = shelve.open('synthesis_parameters')
conv['integer_to_snp'] = int_to_snp
conv['snp_to_integer'] = snp_to_int
conv.close()

In [None]:
exponential_allele_effects_table = analyze.generate_allele_effects_table(qtl, alleles, 
                                                exponential_allele_effects, saegus_to_tassel_loci)

In [None]:
analyze.remap_allele_frequency_table_loci(analyze.reload_allele_frequencies_table(run_id, 0, 250, 
                                                                                  'exponential'), 
                                          concordant_segregating_loci)

In [None]:
analyze.write_multiple_sample_analyzer(sample_library, sample_sizes, qtl, alleles, 
                                       exponential_allele_effects, 0.7,  concordant_segregating_loci, 
                                       run_id=run_id, sub_run_id='_exponential', 
                                       allele_frequency_hdf=run_id+'_allele_frequency_storage.h5')

In [None]:
multiple_sample_analyzer(meta_populations, qtl, allele_effects, minor_alleles, concordant_segregating_loci)

In [None]:
import h5py

In [None]:
with h5py.File('bia_allele_frequencies.hdf5') as biaf:
    reloaded_af = np.array(biaf[afname])

In [None]:
fisegloc = list(concordant_segregating_loci)

In [None]:
minor_allele_frequencies = reloaded_af[fisegloc]

In [None]:
def write_super_tables(power_and_fpr_raw_data, sample_sizes, number_of_replicates, run_id, sub_run_id=''):
    for size in sample_sizes:
        for rep in range(number_of_replicates):
            name = run_id + '_' + sub_run_id + '_' + str(rep) + '_' + str(size) + '_super_table.txt'
            power_and_fpr_raw_data[size][rep].to_csv(name, sep='\t')

In [None]:
expo_power_fpr_raw_data = analyze.collect_power_analysis_data(run_id, sample_sizes, number_of_replicates, concordant_segregating_loci, 'exponential')

In [None]:
expo_power_fpr_raw_data[250]

In [None]:
write_super_tables(expo_power_fpr_raw_data,
                  sample_sizes,
                  number_of_replicates,
                  'bacchus',
                  sub_run_id='exponential')

In [None]:
expo_results, expo_true_positives, expo_false_positives = study.calculate_power_fpr(expo_power_fpr_raw_data, sample_sizes, 
                                                                             number_of_replicates, number_of_qtl)

In [None]:
expo_results

In [None]:
mean_and_stdev = pd.DataFrame([expo_results.mean(), expo_results.std()], index=['mean', 'stdev']).T
mean_and_stdev.to_csv('bacchus_exponential_mean_and_stdev_power_fpr.csv', sep='\t')

In [None]:
geo_results

In [None]:
geometric_allele_effects_table

In [None]:
exponential_allele_effects_table

In [None]:
expo_results.to_csv("bacchus_exponential_power_fpr_results.txt", sep='\t')

In [None]:
mean_and_stdev = pd.DataFrame([geo_results.mean(), geo_results.std()], index=['mean', 'stdev']).T
mean_and_stdev.to_csv('full_icecrown_geometric_mean_and_stdev_power_fpr.txt', sep='\t')

In [None]:
expo_results, expo_true_positives, expo_false_positives = full_icecrown.calculate_power_fpr(expo_power_fpr_raw_data,
                                                                                      sample_sizes,
                                                                                      number_of_replicates,
                                                                                      number_of_qtl)

In [None]:
expo_results

In [None]:
expo_results.to_csv('full_icecrown_exponential_power_fpr_results.txt', sep='\t')

In [None]:
mean_and_stdev = pd.DataFrame([expo_results.mean(), expo_results.std()], index=['mean', 'stdev']).T
mean_and_stdev.to_csv('full_icecrown_exponential_mean_and_stdev_power_fpr.txt', sep='\t')

In [None]:
write_super_tables(expo_power_fpr_raw_data, sample_sizes, number_of_replicates, run_id, 'exponential')

In [None]:
geo_aggregate_estimated_actual = pd.DataFrame([np.array(geo_agg_estimated), np.array(geo_agg_actual)], index=['estimated', 'actual']).T

In [None]:
geo_aggregate_estimated_actual['estimated'] = geo_aggregate_estimated_actual['estimated'].apply(np.fabs)

In [None]:
geo_aggregate_estimated_actual

In [None]:
geo_corr = geo_aggregate_estimated_actual['estimated'].corr(geo_aggregate_estimated_actual['actual'])

In [None]:
geo_agg_estimated

In [None]:
aggregate_estimated_actual

In [None]:
geo_corr

In [None]:
pwd

In [None]:
geo_aggregate_estimated_actual.to_csv('full_icecrown_geometric_estimated_vs_actual_allele_effects.txt', sep='\t')

In [None]:
agg_estimated = []
agg_actual = []

In [None]:
for rep in reps:
    for size in sample_sizes:
        sutable = sutable_collection[rep][size]
        droppable = list(sutable.ix[sutable.ix[:, 'difference'] == 0.0].index)
        qtloci = sutable.drop(droppable, axis=0)
        agg_estimated.extend(list(qtloci['add_effect']))
        agg_actual.extend(list(qtloci['difference']))

In [None]:
aggregate_estimated_actual = pd.DataFrame([np.array(agg_estimated), np.array(agg_actual)], index=['estimated', 'actual']).T

In [None]:
aggregate_estimated_actual['estimated'] = np.fabs(aggregate_estimated_actual['estimated'])

In [None]:
aggregate_estimated_actual

In [None]:
correlation_actual_vs_effects = aggregate_estimated_actual['estimated'].corr(aggregate_estimated_actual['actual'])

In [None]:
aggregate_estimated_actual.to_csv('full_icecrown_exponential_estimated_vs_actual_allele_effects.txt', sep='\t')

In [None]:
aggregate_estimated_actual['estimated'] = np.fabs(aggregate_estimated_actual['estimated'])

In [None]:
cd C:\tassel\output\full_icecrown\exponential

In [None]:
expo_estimated_actual = pd.read_csv('full_icecrown_exponential_estimated_vs_actual_allele_effects.txt', sep='\t', index_col=0)

In [None]:
expo_estimated_actual

In [None]:
aggregate_estimated_actual

In [None]:
, from bokeh.plotting import figure, show, output_file
from bokeh.io import output_notebook

In [None]:
output_notebook()

In [None]:
aggregate_estimated_actual

In [None]:
geo_x = aggregate_estimated_actual['estimated']
geo_y = aggregate_estimated_actual['actual']

In [None]:
p = figure(title="Estimated vs Actual Allele Effects - Geometric Series", 
           title_text_font_size="16",
          x_range=(-0.2, 4))

In [None]:
p.scatter(geo_x, y, x="Estimated", y="Actual")

p.xaxis.axis_label = "Estimated"
p.yaxis.axis_label = "Actual"

In [None]:
show(p)

In [None]:
expo

In [None]:
p = figure(title="Estimated vs Actual Allele Effects - Geometric Series", title_text_font_size="16")

In [None]:
expo_plot = figure(title="Estimated vs Actual Effects - Exponential(lambda=1)", 
                   title_text_font_size="16", 
                  x_range=(0, 4))

x = np.array(expo_estimated_actual['estimated'])
y = np.array(expo_estimated_actual['actual'])

expo_plot.xaxis.axis_label = "Estimated"
expo_plot.yaxis.axis_label = "Actual"

In [None]:
expo_plot.scatter(x, y)

In [None]:
show(expo_plot)

In [None]:
from bokeh.io import hplot

In [None]:
geo_plot = figure(title="Estimated vs Actual Allele Effects - Geometric Series", 
           title_text_font_size="16",
          x_range=(0, 4), y_range=(0, 4))

In [None]:
geo_x = aggregate_estimated_actual['actual']
geo_y = aggregate_estimated_actual['estimated']

In [None]:
geo_plot.xaxis.axis_label = "Actual"
geo_plot.yaxis.axis_label = "Estimated"
geo_plot.scatter(geo_x, geo_y, x="Actual", y="Estimated")

In [None]:
expo_plot = figure(title="Estimated vs Actual Effects - Exponential(lambda=1)", 
                   title_text_font_size="16", 
                  x_range=(0, 4), y_range=(0, 4))

expo_x = np.array(expo_estimated_actual['actual'])
expo_y = np.array(expo_estimated_actual['estimated'])

expo_plot.xaxis.axis_label = "Actual"
expo_plot.yaxis.axis_label = "Estimated"
expo_plot.scatter(expo_x, expo_y)

In [None]:
multi_plot = hplot(geo_plot, expo_plot)
show(multi_plot)

In [None]:
output_file("multi_plot.png")

In [None]:
ls