In [1]:
import simuOpt
simuOpt.setOptions(alleleType='short', optimized=True, numThreads=4, quiet=True)
import simuPOP as sim
import pandas as pd
from saegus import breed, operators, simulate, analyze, parse, parameters
import shelve
import numpy as np
import random
import collections as col
np.set_printoptions(suppress=True, precision=3)

In [2]:
run_id='demonstration'

In [3]:
number_of_replicates = 5

In [4]:
sample_sizes=[500, 600, 700, 800, 900, 1000]

In [5]:
number_of_qtl = 10

In [6]:
prefounders = sim.loadPopulation('prefounders1478.pop')
multi_prefounders = sim.Simulator(prefounders, number_of_replicates, stealPops=False)
founders = [[2, 26], [3, 25], [4, 24], [5, 23]]
os_per_pair = 500
magic = breed.MAGIC(multi_prefounders, founders, [0.01]*1478)
sim.tagID(prefounders, reset=27)
magic.generate_f_one(founders, os_per_pair)
mrc = breed.MultiRandomCross(multi_prefounders, 4, 500)
mother_choices, father_choices = mrc.determine_random_cross()
multi_snd_ord_chooser = breed.MultiSecondOrderPairIDChooser(mother_choices, father_choices)
multi_prefounders.evolve(
    matingScheme=sim.HomoMating(
        sim.PyParentsChooser(multi_snd_ord_chooser.snd_ord_id_pairs),
        sim.OffspringGenerator(ops=[
            sim.IdTagger(),
            sim.PedigreeTagger(),
            sim.Recombinator(rates=0.01)
        ],
            numOffspring=1),
        subPopSize=[2000],
    ),
    gen=1,
)
final_mrc = breed.MultiRandomCross(multi_prefounders, 2, 1000)
final_mothers, final_fathers = final_mrc.determine_random_cross()
final_multi_snd_ord_chooser = breed.MultiSecondOrderPairIDChooser(final_mothers, final_fathers)
multi_prefounders.evolve(
    matingScheme=sim.HomoMating(
        sim.PyParentsChooser(final_multi_snd_ord_chooser.snd_ord_id_pairs),
        sim.OffspringGenerator(ops=[
            sim.IdTagger(),
            sim.PedigreeTagger(),
            sim.Recombinator(rates=0.01)
        ],
            numOffspring=1),
        subPopSize=[2000],
    ),
    gen=1,
)
mater = breed.MAGIC(multi_prefounders, founders, [0.01]*1478)
mater.random_mating(3, 2000)

Initiating random mating for 3 generations.


In [7]:
demonstration = analyze.Study(run_id)

In [8]:
sample_library = demonstration.collect_samples(multi_prefounders, sample_sizes)

In [9]:
sample_library

{0: [<simuPOP.Population>,
  <simuPOP.Population>,
  <simuPOP.Population>,
  <simuPOP.Population>,
  <simuPOP.Population>,
  <simuPOP.Population>],
 1: [<simuPOP.Population>,
  <simuPOP.Population>,
  <simuPOP.Population>,
  <simuPOP.Population>,
  <simuPOP.Population>,
  <simuPOP.Population>],
 2: [<simuPOP.Population>,
  <simuPOP.Population>,
  <simuPOP.Population>,
  <simuPOP.Population>,
  <simuPOP.Population>,
  <simuPOP.Population>],
 3: [<simuPOP.Population>,
  <simuPOP.Population>,
  <simuPOP.Population>,
  <simuPOP.Population>,
  <simuPOP.Population>,
  <simuPOP.Population>],
 4: [<simuPOP.Population>,
  <simuPOP.Population>,
  <simuPOP.Population>,
  <simuPOP.Population>,
  <simuPOP.Population>,
  <simuPOP.Population>]}

In [10]:
pwd

'C:\\Users\\DoubleDanks\\BISB\\wisser\\code\\rjwlab-scripts\\saegus_project\\devel\\magic\\1478'

In [11]:
alleles = np.array(pd.read_hdf('parameters\\alleles_at_1478_loci.hdf'))

In [12]:
for rep_id, sample_list in sample_library.items():
    for sample in sample_list:
        sim.stat(sample, alleleFreq=sim.ALL_AVAIL)

In [13]:
demonstration.store_allele_frequencies(sample_library, alleles)



In [14]:
allele_frequencies = pd.HDFStore('demonstration_allele_frequency_storage.h5')

In [15]:
sample_sizes

[500, 600, 700, 800, 900, 1000]

In [None]:
allele_frequencies.close()

In [18]:
sets_of_segregating_loci = demonstration.seg_loci_among_samples(sample_library)

In [19]:
sets_of_segregating_loci.values()

dict_values([30])

In [20]:
concordant_segregating_loci = list(sets_of_segregating_loci.keys())[0]

In [21]:
concordant_segregating_loci[::100]

(1, 159, 329, 483, 632, 785, 950, 1095, 1254, 1413)

In [22]:
qtl = tuple(sorted(random.sample(concordant_segregating_loci, number_of_qtl)))

In [23]:
qtl

(56, 255, 465, 472, 616, 1030, 1141, 1247, 1327, 1468)

In [24]:
add_trait = parameters.Trait()
allele_effects = add_trait.assign_allele_effects(alleles, qtl, random.expovariate, 1, multiplicity=1)

In [25]:
allele_effects

{56: {1: 2.2812691945674968, 3: 0.48357569403108913},
 255: {1: 0.6747500901207086, 3: 1.5632913345055426},
 465: {1: 0.6888397005445828, 3: 1.5293488062276326},
 472: {0: 0.08405376413465437, 1: 1.211195259597092},
 616: {1: 0.19086429218337728, 3: 0.2893219884051875},
 1030: {1: 0.2603456951411934, 3: 0.49506270330959246},
 1141: {1: 1.1232078799522691, 2: 1.063124727790315},
 1247: {1: 3.8635674058046083, 3: 0.05602116696649215},
 1327: {0: 3.5076628660065423, 2: 0.7912709232384485},
 1468: {0: 0.7695915531897579, 2: 1.209493868151905}}

In [26]:
allele_effects_store = shelve.open('allele_effects_storage')
allele_effects_store['demonstration'] = allele_effects
allele_effects_store.close()

In [27]:
with shelve.open('demonstration_loci_conversions') as lconvert:
    nuloci_conversions = lconvert['saegus_to_tassel']

In [29]:
loci_conversions = shelve.open('demonstration_loci_conversions')

In [30]:
allele_effects_storage = shelve.open('allele_effects_storage')

In [31]:
allele_effects_storage['demonstration']

{56: {1: 2.2812691945674968, 3: 0.48357569403108913},
 255: {1: 0.6747500901207086, 3: 1.5632913345055426},
 465: {1: 0.6888397005445828, 3: 1.5293488062276326},
 472: {0: 0.08405376413465437, 1: 1.211195259597092},
 616: {1: 0.19086429218337728, 3: 0.2893219884051875},
 1030: {1: 0.2603456951411934, 3: 0.49506270330959246},
 1141: {1: 1.1232078799522691, 2: 1.063124727790315},
 1247: {1: 3.8635674058046083, 3: 0.05602116696649215},
 1327: {0: 3.5076628660065423, 2: 0.7912709232384485},
 1468: {0: 0.7695915531897579, 2: 1.209493868151905}}

In [32]:
allele_effects = allele_effects_storage['demonstration']

In [33]:
allele_effects_storage.close()

In [34]:
loci_conversions.close()

In [35]:
allele_effects

{56: {1: 2.2812691945674968, 3: 0.48357569403108913},
 255: {1: 0.6747500901207086, 3: 1.5632913345055426},
 465: {1: 0.6888397005445828, 3: 1.5293488062276326},
 472: {0: 0.08405376413465437, 1: 1.211195259597092},
 616: {1: 0.19086429218337728, 3: 0.2893219884051875},
 1030: {1: 0.2603456951411934, 3: 0.49506270330959246},
 1141: {1: 1.1232078799522691, 2: 1.063124727790315},
 1247: {1: 3.8635674058046083, 3: 0.05602116696649215},
 1327: {0: 3.5076628660065423, 2: 0.7912709232384485},
 1468: {0: 0.7695915531897579, 2: 1.209493868151905}}

In [None]:
demon_allele_effects

In [None]:
def record_loci_conversions(loci_conversion_file_name, concordant_segregating_loci):
    """
    At present I cannot get TASSEL to run with using consistent locus names.
    The loci in TASSEL are relabeled in terms of index regardless of their
    absolute position in the saegus data.
    
    This function records a map to make it possible to go back and forth
    between TASSEL labeled loci and saegus labeled loci.
    """
    with shelve.open(loci_conversion_file_name) as loci_conversion_file:
        loci_conversion_file['saegus_to_tassel'] = {locus: idx for locus, idx in enumerate(concordant_segregating_loci)}
        loci_conversion_file['tassel_to_saegus'] = dict(zip(loci_conversion_file['saegus_to_tassel'].values(), 
                                                            loci_conversion_file['saegus_to_tassel'].keys()))
        

In [None]:
record_loci_conversions('demo_example_loc_verts', concordant_segregating_loci)

In [None]:
reop = shelve.open('demo_example_loc_verts')

In [None]:
sae_to_tas = {locus: idx for locus,idx in enumerate(concordant_segregating_loci)}

In [None]:
sae_to_tas

In [None]:
inverted_sae_to_tas = dict(zip(sae_to_tas.values(), sae_to_tas.keys()))

In [None]:
inverted_sae_to_tas

In [None]:
sae_to_tas

In [36]:
loci_conversions = shelve.open('demonstration_loci_conversions')
saegus_to_tassel_loci = {}
tassel_to_saegus_loci = {}
for idx, locus in enumerate(concordant_segregating_loci):
    saegus_to_tassel_loci[locus] = idx
    tassel_to_saegus_loci[idx] = locus
#loci_conversions['saegus_to_tassel'] = saegus_to_tassel_loci
#loci_conversions['tassel_to_saegus'] = tassel_to_saegus_loci
#loci_conversions.close()

In [37]:
saegus_to_tassel_loci

{1: 0,
 2: 1,
 3: 2,
 4: 3,
 5: 4,
 6: 5,
 8: 6,
 10: 7,
 12: 8,
 13: 9,
 14: 10,
 15: 11,
 16: 12,
 20: 13,
 21: 14,
 23: 15,
 24: 16,
 25: 17,
 26: 18,
 27: 19,
 29: 20,
 30: 21,
 31: 22,
 32: 23,
 34: 24,
 36: 25,
 37: 26,
 38: 27,
 39: 28,
 40: 29,
 42: 30,
 45: 31,
 46: 32,
 50: 33,
 52: 34,
 53: 35,
 54: 36,
 56: 37,
 59: 38,
 61: 39,
 62: 40,
 63: 41,
 66: 42,
 67: 43,
 68: 44,
 70: 45,
 71: 46,
 74: 47,
 76: 48,
 77: 49,
 79: 50,
 80: 51,
 82: 52,
 86: 53,
 88: 54,
 90: 55,
 91: 56,
 95: 57,
 96: 58,
 97: 59,
 98: 60,
 99: 61,
 100: 62,
 101: 63,
 103: 64,
 104: 65,
 106: 66,
 107: 67,
 108: 68,
 109: 69,
 114: 70,
 115: 71,
 116: 72,
 119: 73,
 120: 74,
 121: 75,
 122: 76,
 123: 77,
 124: 78,
 126: 79,
 127: 80,
 128: 81,
 129: 82,
 130: 83,
 132: 84,
 134: 85,
 136: 86,
 138: 87,
 139: 88,
 140: 89,
 141: 90,
 144: 91,
 145: 92,
 146: 93,
 147: 94,
 149: 95,
 152: 96,
 153: 97,
 155: 98,
 158: 99,
 159: 100,
 163: 101,
 165: 102,
 168: 103,
 170: 104,
 171: 105,
 172: 106,
 1

In [38]:
qtl

(56, 255, 465, 472, 616, 1030, 1141, 1247, 1327, 1468)

In [39]:
allele_effects_table = analyze.generate_allele_effects_table(qtl, alleles, 
                                                allele_effects, saegus_to_tassel_loci)

In [40]:
allele_effects_table

Unnamed: 0,locus,tassel_locus,alpha_allele,alpha_effect,beta_allele,beta_effect,difference
0,56,37,1,2.281269,3,0.483576,1.797694
1,255,148,3,1.563291,1,0.67475,0.888541
2,465,288,1,0.68884,3,1.529349,0.840509
3,472,292,1,1.211195,0,0.084054,1.127141
4,616,390,1,0.190864,3,0.289322,0.098458
5,1030,657,3,0.495063,1,0.260346,0.234717
6,1141,726,1,1.123208,2,1.063125,0.060083
7,1247,795,3,0.056021,1,3.863567,3.807546
8,1327,850,0,3.507663,2,0.791271,2.716392
9,1468,936,2,1.209494,0,0.769592,0.439902


In [None]:
saegus_to_tassel_loci = loci_conversions['saegus_to_tassel']

In [None]:
saegus_to_tassel_loci

In [43]:
len(saegus_to_tassel_loci)

943

In [44]:
allele_effects_table

Unnamed: 0,locus,tassel_locus,alpha_allele,alpha_effect,beta_allele,beta_effect,difference
0,56,37,1,2.281269,3,0.483576,1.797694
1,255,148,3,1.563291,1,0.67475,0.888541
2,465,288,1,0.68884,3,1.529349,0.840509
3,472,292,1,1.211195,0,0.084054,1.127141
4,616,390,1,0.190864,3,0.289322,0.098458
5,1030,657,3,0.495063,1,0.260346,0.234717
6,1141,726,1,1.123208,2,1.063125,0.060083
7,1247,795,3,0.056021,1,3.863567,3.807546
8,1327,850,0,3.507663,2,0.791271,2.716392
9,1468,936,2,1.209494,0,0.769592,0.439902


In [45]:
remapped_loci = [saegus_to_tassel_loci[locus] for locus in allele_effects_table['locus']]

In [47]:
allele_effects_table

Unnamed: 0,locus,tassel_locus,alpha_allele,alpha_effect,beta_allele,beta_effect,difference
0,56,37,1,2.281269,3,0.483576,1.797694
1,255,148,3,1.563291,1,0.67475,0.888541
2,465,288,1,0.68884,3,1.529349,0.840509
3,472,292,1,1.211195,0,0.084054,1.127141
4,616,390,1,0.190864,3,0.289322,0.098458
5,1030,657,3,0.495063,1,0.260346,0.234717
6,1141,726,1,1.123208,2,1.063125,0.060083
7,1247,795,3,0.056021,1,3.863567,3.807546
8,1327,850,0,3.507663,2,0.791271,2.716392
9,1468,936,2,1.209494,0,0.769592,0.439902


In [49]:
import h5py

In [50]:
afs = h5py.File('demonstration_allele_frequency_storage.h5')

In [62]:
np.array(afs['demonstration']['0']['500']['axis1'])

array([   0,    1,    2, ..., 1475, 1476, 1477], dtype=int64)

In [70]:
pd.DataFrame(np.array(afs['demonstration/0/500/block0_values']))

Unnamed: 0,0,1,2,3
0,2.0,0.000,1.0,1.000
1,3.0,0.129,1.0,0.871
2,1.0,0.130,3.0,0.870
3,2.0,0.120,0.0,0.880
4,0.0,0.049,2.0,0.951
5,2.0,0.209,0.0,0.791
6,2.0,0.235,0.0,0.765
7,1.0,0.000,3.0,1.000
8,2.0,0.093,0.0,0.907
9,3.0,0.000,1.0,1.000


In [71]:
afs.close()

In [72]:
afd = pd.HDFStore('demonstration_allele_frequency_storage.h5')

In [74]:
afd['/demonstration/0/500']

Unnamed: 0,minor_allele,minor_frequency,major_allele,major_frequency
0,2.0,0.000,1.0,1.000
1,3.0,0.129,1.0,0.871
2,1.0,0.130,3.0,0.870
3,2.0,0.120,0.0,0.880
4,0.0,0.049,2.0,0.951
5,2.0,0.209,0.0,0.791
6,2.0,0.235,0.0,0.765
7,1.0,0.000,3.0,1.000
8,2.0,0.093,0.0,0.907
9,3.0,0.000,1.0,1.000


In [41]:
expanded = analyze.remap_ae_table_loci(allele_effects_table, saegus_to_tassel_loci)

ValueError: Shape of passed values is (943, 6), indices imply (1, 6)

In [None]:
expanded

In [None]:
allele_frequencies = pd.HDFStore('demonstration_storage.h5', mode='r')

In [75]:
afrqs = allele_frequencies['/demonstration/0/500']

In [95]:
import h5py

In [96]:
h5pyaf = h5py.File('demonstration_allele_frequency_storage.h5')

In [100]:
list(h5pyaf.keys())

['demonstration']

In [113]:
for k in list(h5pyaf["demonstration/0/500/axis0/"].values()):

AttributeError: 'Dataset' object has no attribute 'values'

In [133]:
som = h5pyaf['demonstration/0/500/']

In [138]:
np.array(som['axis1'])

array([   0,    1,    2, ..., 1475, 1476, 1477], dtype=int64)

In [139]:
list(som.keys())

['axis0', 'axis1', 'block0_items', 'block0_values']

In [159]:
thing = list(som['axis0'])[0].decode()

In [160]:
thing

'minor_allele'

In [152]:
thing?

In [164]:
list(som['block0_values'].attrs)

['CLASS', 'VERSION', 'TITLE', 'FLAVOR', 'transposed']

In [146]:
list(som['block0_values'])

[array([ 2.,  0.,  1.,  1.]),
 array([ 3.   ,  0.129,  1.   ,  0.871]),
 array([ 1.  ,  0.13,  3.  ,  0.87]),
 array([ 2.  ,  0.12,  0.  ,  0.88]),
 array([ 0.   ,  0.049,  2.   ,  0.951]),
 array([ 2.   ,  0.209,  0.   ,  0.791]),
 array([ 2.   ,  0.235,  0.   ,  0.765]),
 array([ 1.,  0.,  3.,  1.]),
 array([ 2.   ,  0.093,  0.   ,  0.907]),
 array([ 3.,  0.,  1.,  1.]),
 array([ 3. ,  0.4,  1. ,  0.6]),
 array([ 3.,  0.,  1.,  1.]),
 array([ 1.   ,  0.382,  3.   ,  0.618]),
 array([ 0. ,  0.1,  2. ,  0.9]),
 array([ 0.   ,  0.092,  3.   ,  0.908]),
 array([ 1.   ,  0.389,  3.   ,  0.611]),
 array([ 3.   ,  0.116,  2.   ,  0.884]),
 array([ 3.,  0.,  2.,  1.]),
 array([ 0.,  0.,  2.,  1.]),
 array([ 4.,  0.,  5.,  1.]),
 array([ 0.   ,  0.386,  3.   ,  0.614]),
 array([ 1.   ,  0.138,  2.   ,  0.862]),
 array([ 3.,  0.,  2.,  1.]),
 array([ 5.   ,  0.434,  4.   ,  0.566]),
 array([ 3.   ,  0.064,  1.   ,  0.936]),
 array([ 0.   ,  0.251,  2.   ,  0.749]),
 array([ 0.   ,  0.066,  3. 

In [124]:
tuple(som.attrs)

('TITLE',
 'CLASS',
 'VERSION',
 'pandas_type',
 'pandas_version',
 'encoding',
 'ndim',
 'axis0_variety',
 'axis1_variety',
 'nblocks',
 'block0_items_variety')

In [131]:
som.attrs['axis1_variety']

OSError: Unable to read attribute (No appropriate function for conversion path)

In [116]:
h5py.enable_ipython_completer()

ImportError: cannot import name 'generics'

In [115]:
som['b

<HDF5 group "/demonstration/0/500" (4 members)>

In [94]:
afrqs

Unnamed: 0,minor_allele,minor_frequency,major_allele,major_frequency
0,2.0,0.000,1.0,1.000
1,3.0,0.129,1.0,0.871
2,1.0,0.130,3.0,0.870
3,2.0,0.120,0.0,0.880
4,0.0,0.049,2.0,0.951
5,2.0,0.209,0.0,0.791
6,2.0,0.235,0.0,0.765
7,1.0,0.000,3.0,1.000
8,2.0,0.093,0.0,0.907
9,3.0,0.000,1.0,1.000


In [78]:
expop = multi_prefounders.population(0)

In [80]:
sim.stat(expop, alleleFreq=sim.ALL_AVAIL)

In [92]:
af = analyze.allele_data(expop, alleles, loci)

In [93]:
af

Unnamed: 0,minor_allele,minor_frequency,major_allele,major_frequency
0,2.0,0.00000,1.0,1.00000
1,3.0,0.12850,1.0,0.87150
2,1.0,0.11900,3.0,0.88100
3,2.0,0.12850,0.0,0.87150
4,0.0,0.06125,2.0,0.93875
5,2.0,0.24425,0.0,0.75575
6,2.0,0.23700,0.0,0.76300
7,1.0,0.00000,3.0,1.00000
8,2.0,0.11225,0.0,0.88775
9,3.0,0.00000,1.0,1.00000


In [90]:
analyze.remap_allele_effect_and_frq_table_loci(aeftable, concordant_segregating_loci)

Unnamed: 0,locus,alpha_allele,alpha_frequency,alpha_effect,beta_allele,beta_frequency,beta_effect,difference
0,1,1,0.87150,0.000000,3,0.12850,0.000000,0.000000
1,2,3,0.88100,0.000000,1,0.11900,0.000000,0.000000
2,3,0,0.87150,0.000000,2,0.12850,0.000000,0.000000
3,4,2,0.93875,0.000000,0,0.06125,0.000000,0.000000
4,5,0,0.75575,0.000000,2,0.24425,0.000000,0.000000
5,6,0,0.76300,0.000000,2,0.23700,0.000000,0.000000
6,8,0,0.88775,0.000000,2,0.11225,0.000000,0.000000
7,10,1,0.60875,0.000000,3,0.39125,0.000000,0.000000
8,12,3,0.64525,0.000000,1,0.35475,0.000000,0.000000
9,13,2,0.88450,0.000000,0,0.11550,0.000000,0.000000


In [91]:
analyze.remap_afrq_table_loci(af, concordant_segregating_loci)

Unnamed: 0,minor_allele,minor_frequency,major_allele,major_frequency
0,3.0,0.12850,1.0,0.87150
1,1.0,0.11900,3.0,0.88100
2,2.0,0.12850,0.0,0.87150
3,0.0,0.06125,2.0,0.93875
4,2.0,0.24425,0.0,0.75575
5,2.0,0.23700,0.0,0.76300
6,2.0,0.11225,0.0,0.88775
7,3.0,0.39125,1.0,0.60875
8,1.0,0.35475,3.0,0.64525
9,0.0,0.11550,2.0,0.88450


In [86]:
aeftable = analyze.generate_allele_effects_frequencies(expop, alleles, qtl, allele_effects)

In [None]:
aefs = pd.HDFStore('demonstration_allele_.h5')

In [76]:
loci = list(range(1478))
droppable_loci = [droppable_locus for droppable_locus in loci if droppable_locus not in concordant_segregating_loci]

In [77]:
for sample in sample_sizes:
    for rep in range(number_of_replicates):
        allele_frequencies_table = allele_frequencies['/'+'/'.join(['demonstration', str(rep), str(sample)])]
        subset_afrqs_table = analyze.remap_afrq_table_loci(allele_frequencies_table, saegus_to_tassel_loci)
        gwas_results_file_name = 'demonstration_'+str(rep)+'_'+str(sample)+'_out_2.txt'
        qvalues_file_name = 'demonstration_'+str(rep)+'_'+str(sample)+'_qvalues.txt'
        super_table = analyze.generate_super_table(gwas_results_file_name, 
                                                  qvalues_file_name, 
                                                  subset_afrqs_table,
                                                  expanded)
        super_table.to_csv('demonstration_'+str(rep)+'_'+str(sample)+'_super_table.txt', sep='\t')

NameError: name 'expanded' is not defined

In [None]:
super_table

In [None]:
remapped_afrqs = analyze.remap_afrq_table_loci(allele_frequencies_table, saegus_to_tassel_loci)

In [None]:
sutable = analyze.generate_super_table('demonstration_0_1000_out_2.txt', 'demonstration_0_1000_qvalues.txt', remapped_afrqs, expanded)

In [None]:
sutable

In [None]:
analyze.write_multiple_sample_analyzer(sample_library, sample_sizes, qtl, alleles, 
                                       allele_effects, 0.7,  concordant_segregating_loci, 
                                       run_id='demonstration', 
                                       allele_frequency_hdf='demonstration_storage.h5')

In [None]:
expanded

In [None]:
demonstration = analyze.Study(run_id)

In [None]:
power_fpr_raw_data = demonstration.collect_power_analysis_data(sample_sizes, number_of_replicates, expanded)

In [None]:
raw_demonstration_data = shelve.open('demonstration_raw_data')
for size, data_list in power_fpr_raw_data.items():
    raw_demonstration_data[str(size)] = data_list
raw_demonstration_data.close()

In [None]:
power_fpr_raw_data

In [None]:
power_fpr_raw_data[500][0].index

In [None]:
subsuper_table = power_fpr_raw_data[500][0].join(subset)

In [None]:
power_fpr_raw_data

In [None]:
subsuper_table

In [None]:
results, true_positives, false_positives = demonstration.calculate_power_fpr(power_fpr_raw_data, sample_sizes, 
                                                                             number_of_replicates, number_of_qtl)

In [None]:
qtl

In [None]:
results

In [None]:
true_positives

In [None]:
results

In [None]:
true_positives

In [None]:
results.to_csv(run_id + '_fpr_results.txt', sep='\t')

In [None]:
detection_table = demonstration.probability_of_detection(allele_effects_table, sample_sizes, 
                                                         number_of_replicates, true_positives)

In [None]:
detection_table

In [None]:
detection_table.to_csv(run_id + '_probability_of_detection.txt', sep='\t')

In [None]:
mean_stdev = pd.DataFrame([results.mean(), results.std()], index=['mean', 'standard_dev']).T

In [None]:
mean_stdev.to_csv('beneath_these_waves_power_mean_stdev')

In [None]:
prob_detection_table['difference'].corr(prob_detection_table['detected'])

In [None]:
results.to_csv('beneath_these_waves_power_fpr_results.txt', sep='\t', index=False)

### Running Family Comparison

#### Modified Existing Phenotype Trait Vector
    Reads existing phenotype file and writes the modified
    values under a new file-name

In [None]:
def modify_existing_phenotypes(existing_phenotype_file, modded_values, modified_phenotype_file):
    existing_phenotypes = np.array(pd.read_csv(existing_phenotype_file, sep='\t'))
    modified_phenotypes = np.array(existing_phenotypes)
    modified_phenotypes[:, 1] = modded_values
    modified_pheno_output = pd.DataFrame(modified_phenotypes)
    header = "<Trait>\tsim\n"    
    with open(modified_phenotype_file, 'w') as mod_pheno_file:
        mod_pheno_file.write(header)
        modified_pheno_output.to_csv(mod_pheno_file, sep='\t', index=False, header=False)

In [None]:
modify_existing_phenotypes("heaven_denies_0_750_phenotype_vector.txt",
                          np.ones((750)),
                          "blood_hands_0_750_phenotype_vector.txt")

## Newly Developed Functions

### Collects Samples from Each Replicate
    Stores them in a library of lists

In [None]:
def collect_samples(replicate_populations, sample_sizes, run_id):
    """
    Testing for concordance of segregating loci among samples requires that
    the samples be gathered in advance. Collects samples from replicate_populations

    :param replicate_populations: Multi-replicate population to analyze
    :param sample_sizes: Size of sample to gather.

    :note: :py:func:`len(sample_sizez)` == number of samples gathered from each replicate.

    :param str run_id: Identifier
    :return: List of populations
    """
    samples = {}
    for rep in replicate_populations.populations():
        samples[rep.dvars().rep] = [sim.sampling.drawRandomSample(rep, sizes=sample_size) for sample_size in sample_sizes]
    return samples

### Calculates Allele Frequencies for Each Sample
    Stores in a single HDF File

In [None]:
def multi_sample_allele_frq_storage(library_of_samples, alleles, run_id='heaven_denies'):

    hdf_store = pd.HDFStore(run_id + '_afrqs.h5')

    for rep_id, samples in library_of_samples.items():
        for sample in samples:
            af = analyze.allele_data(sample, alleles,
                                 range(sample.totNumLoci()))

            name = run_id + '/' + str(rep_id) + '/' + str(sample.popSize())

            hdf_store.put(name, af)
    hdf_store.close()

# Visualization

In [None]:
from bokeh.io import output_notebook()
from bokeh.charts import Dot, show, output_file

output_notebook()

data_2 = {
    'power': [0.2, 0.4, 0.6, 0.8]
    'sample_sizes': ['250', '500', '750', '1000'],
    'rep' = ['0', '0', '0', '0']
}

# x-axis labels pulled from the interpreter column, stacking labels from sample column
#dots = Dot(data, values='timing', label='interpreter',
#           group='sample', agg='mean',
#           title="Python Interpreter Sampling",
#           legend='top_right', width=600)
           
dots_2 = Dot(data_2, values='power', label='sample_sizes',
                group='rep', agg='mean', 
                title="First Bokeh Dot PLot",
                legend='top_right', width=600)


output_file("dots_example.html", title="dots.py example")

show(dots)
