In [1]:
import simuOpt
simuOpt.setOptions(alleleType='short', optimized=True, numThreads=4, quiet=True)
import simuPOP as sim
import pandas as pd
from saegus import breed, operators, simulate, analyze, parse, parameters
import shelve
import numpy as np
import random
import collections as col
np.set_printoptions(suppress=True, precision=3)

In [2]:
run_id='demonstration'

In [3]:
number_of_replicates = 5

In [4]:
sample_sizes=[500, 600, 700, 800, 900, 1000]

In [5]:
number_of_qtl = 10

In [6]:
prefounders = sim.loadPopulation('prefounders1478.pop')
multi_prefounders = sim.Simulator(prefounders, number_of_replicates, stealPops=False)
founders = [[2, 26], [3, 25], [4, 24], [5, 23]]
os_per_pair = 500
magic = breed.MAGIC(multi_prefounders, founders, [0.01]*1478)
sim.tagID(prefounders, reset=27)
magic.generate_f_one(founders, os_per_pair)
mrc = breed.MultiRandomCross(multi_prefounders, 4, 500)
mother_choices, father_choices = mrc.determine_random_cross()
multi_snd_ord_chooser = breed.MultiSecondOrderPairIDChooser(mother_choices, father_choices)
multi_prefounders.evolve(
    matingScheme=sim.HomoMating(
        sim.PyParentsChooser(multi_snd_ord_chooser.snd_ord_id_pairs),
        sim.OffspringGenerator(ops=[
            sim.IdTagger(),
            sim.PedigreeTagger(),
            sim.Recombinator(rates=0.01)
        ],
            numOffspring=1),
        subPopSize=[2000],
    ),
    gen=1,
)
final_mrc = breed.MultiRandomCross(multi_prefounders, 2, 1000)
final_mothers, final_fathers = final_mrc.determine_random_cross()
final_multi_snd_ord_chooser = breed.MultiSecondOrderPairIDChooser(final_mothers, final_fathers)
multi_prefounders.evolve(
    matingScheme=sim.HomoMating(
        sim.PyParentsChooser(final_multi_snd_ord_chooser.snd_ord_id_pairs),
        sim.OffspringGenerator(ops=[
            sim.IdTagger(),
            sim.PedigreeTagger(),
            sim.Recombinator(rates=0.01)
        ],
            numOffspring=1),
        subPopSize=[2000],
    ),
    gen=1,
)
mater = breed.MAGIC(multi_prefounders, founders, [0.01]*1478)
mater.random_mating(3, 2000)

Initiating random mating for 3 generations.


In [8]:
sample_library = demonstration.collect_samples(multi_prefounders, sample_sizes)

In [9]:
sample_library

{0: [<simuPOP.Population>,
  <simuPOP.Population>,
  <simuPOP.Population>,
  <simuPOP.Population>,
  <simuPOP.Population>,
  <simuPOP.Population>],
 1: [<simuPOP.Population>,
  <simuPOP.Population>,
  <simuPOP.Population>,
  <simuPOP.Population>,
  <simuPOP.Population>,
  <simuPOP.Population>],
 2: [<simuPOP.Population>,
  <simuPOP.Population>,
  <simuPOP.Population>,
  <simuPOP.Population>,
  <simuPOP.Population>,
  <simuPOP.Population>],
 3: [<simuPOP.Population>,
  <simuPOP.Population>,
  <simuPOP.Population>,
  <simuPOP.Population>,
  <simuPOP.Population>,
  <simuPOP.Population>],
 4: [<simuPOP.Population>,
  <simuPOP.Population>,
  <simuPOP.Population>,
  <simuPOP.Population>,
  <simuPOP.Population>,
  <simuPOP.Population>]}

In [29]:
pwd

'C:\\tassel\\output'

In [30]:
alleles = np.array(pd.read_hdf('alleles_at_1478_loci.hdf'))

In [11]:
alleles

array([[1, 2],
       [1, 3],
       [3, 1],
       ..., 
       [1, 0],
       [3, 0],
       [3, 1]], dtype=int64)

In [12]:
from saegus import analyze

In [13]:
analyze.multi_sample_allele_frq_storage(sample_library, alleles, run_id)



In [14]:
allele_frequencies = pd.HDFStore('demonstration_storage.h5', mode='r')

In [15]:
allele_frequencies['/demonstration/0/500']

Unnamed: 0,minor_allele,minor_frequency,major_allele,major_frequency
0,2.0,0.000,1.0,1.000
1,3.0,0.126,1.0,0.874
2,1.0,0.121,3.0,0.879
3,2.0,0.121,0.0,0.879
4,0.0,0.081,2.0,0.919
5,2.0,0.245,0.0,0.755
6,2.0,0.236,0.0,0.764
7,1.0,0.000,3.0,1.000
8,2.0,0.141,0.0,0.859
9,3.0,0.000,1.0,1.000


In [16]:
allele_frequencies.close()

In [17]:
sets_of_segregating_loci = demonstration.seg_loci_among_samples(sample_library)

In [18]:
sets_of_segregating_loci.values()

dict_values([30])

In [19]:
concordant_segregating_loci = list(sets_of_segregating_loci.keys())[0]

In [20]:
concordant_segregating_loci[::100]

(1, 159, 329, 483, 632, 785, 950, 1095, 1254, 1413)

In [21]:
qtl = sorted(random.sample(concordant_segregating_loci, number_of_qtl))

In [22]:
qtl

[191, 220, 452, 785, 1036, 1214, 1299, 1311, 1406, 1476]

In [23]:
add_trait = parameters.Trait()
allele_effects = add_trait.assign_allele_effects(alleles, qtl, random.expovariate, 1, multiplicity=1)

In [24]:
allele_effects

{191: {0: 1.9647429123958629, 2: 1.5879600600780117},
 220: {1: 2.575873762161676, 2: 2.600574159384017},
 452: {1: 1.9377948804143905, 3: 0.5245134727770605},
 785: {0: 1.80750524477133, 2: 2.976466128510765},
 1036: {0: 4.108767182037622, 2: 0.9896538246082371},
 1214: {0: 2.1378185827566654, 2: 1.1323493003809182},
 1299: {0: 1.0790198847101855, 2: 2.490878890848389},
 1311: {1: 0.5674338651841051, 3: 5.77855788715338},
 1406: {0: 8.619429513796378, 2: 7.211670895068234},
 1476: {0: 3.024908723436104, 3: 1.8032665925914197}}

In [None]:
allele_effects_store = shelve.open('allele_effects_storage')
allele_effects_store['demonstration'] = allele_effects
allele_effects_store.close()

In [6]:
loci_conversions = shelve.open('demonstration_loci_conversions')

In [9]:
loci_conversions['saegus_to_tassel'][191]

116

In [8]:
allele_effects_storage = shelve.open('allele_effects_storage')

In [9]:
allele_effects_storage['demonstration']

{191: {0: 1.9647429123958629, 2: 1.5879600600780117},
 220: {1: 2.575873762161676, 2: 2.600574159384017},
 452: {1: 1.9377948804143905, 3: 0.5245134727770605},
 785: {0: 1.80750524477133, 2: 2.976466128510765},
 1036: {0: 4.108767182037622, 2: 0.9896538246082371},
 1214: {0: 2.1378185827566654, 2: 1.1323493003809182},
 1299: {0: 1.0790198847101855, 2: 2.490878890848389},
 1311: {1: 0.5674338651841051, 3: 5.77855788715338},
 1406: {0: 8.619429513796378, 2: 7.211670895068234},
 1476: {0: 3.024908723436104, 3: 1.8032665925914197}}

In [10]:
allele_effects = allele_effects_storage['demonstration']

In [31]:
allele_effects_storage.close()

In [32]:
loci_conversions.close()

In [11]:
allele_effects

{191: {0: 1.9647429123958629, 2: 1.5879600600780117},
 220: {1: 2.575873762161676, 2: 2.600574159384017},
 452: {1: 1.9377948804143905, 3: 0.5245134727770605},
 785: {0: 1.80750524477133, 2: 2.976466128510765},
 1036: {0: 4.108767182037622, 2: 0.9896538246082371},
 1214: {0: 2.1378185827566654, 2: 1.1323493003809182},
 1299: {0: 1.0790198847101855, 2: 2.490878890848389},
 1311: {1: 0.5674338651841051, 3: 5.77855788715338},
 1406: {0: 8.619429513796378, 2: 7.211670895068234},
 1476: {0: 3.024908723436104, 3: 1.8032665925914197}}

In [16]:
demon_allele_effects

{191: {0: 1.9647429123958629, 2: 1.5879600600780117},
 220: {1: 2.575873762161676, 2: 2.600574159384017},
 452: {1: 1.9377948804143905, 3: 0.5245134727770605},
 785: {0: 1.80750524477133, 2: 2.976466128510765},
 1036: {0: 4.108767182037622, 2: 0.9896538246082371},
 1214: {0: 2.1378185827566654, 2: 1.1323493003809182},
 1299: {0: 1.0790198847101855, 2: 2.490878890848389},
 1311: {1: 0.5674338651841051, 3: 5.77855788715338},
 1406: {0: 8.619429513796378, 2: 7.211670895068234},
 1476: {0: 3.024908723436104, 3: 1.8032665925914197}}

In [18]:
allele_effects_storage['demonstration']

{191: {0: 1.9647429123958629, 2: 1.5879600600780117},
 220: {1: 2.575873762161676, 2: 2.600574159384017},
 452: {1: 1.9377948804143905, 3: 0.5245134727770605},
 785: {0: 1.80750524477133, 2: 2.976466128510765},
 1036: {0: 4.108767182037622, 2: 0.9896538246082371},
 1214: {0: 2.1378185827566654, 2: 1.1323493003809182},
 1299: {0: 1.0790198847101855, 2: 2.490878890848389},
 1311: {1: 0.5674338651841051, 3: 5.77855788715338},
 1406: {0: 8.619429513796378, 2: 7.211670895068234},
 1476: {0: 3.024908723436104, 3: 1.8032665925914197}}

In [19]:
allele_effects_storage.close()

In [12]:
qtl = sorted(list(allele_effects.keys()))

In [13]:
qtl

[191, 220, 452, 785, 1036, 1214, 1299, 1311, 1406, 1476]

In [7]:
loci_conversions = shelve.open('demonstration_loci_conversions')
saegus_to_tassel_loci = {}
tassel_to_saegus_loci = {}
for idx, locus in enumerate(concordant_segregating_loci):
    saegus_to_tassel_loci[locus] = idx
    tassel_to_saegus_loci[idx] = locus
#loci_conversions['saegus_to_tassel'] = saegus_to_tassel_loci
#loci_conversions['tassel_to_saegus'] = tassel_to_saegus_loci
#loci_conversions.close()

In [15]:
loci_conversions['saegus_to_tassel']

{1: 0,
 2: 1,
 3: 2,
 4: 3,
 5: 4,
 6: 5,
 8: 6,
 10: 7,
 12: 8,
 13: 9,
 14: 10,
 15: 11,
 16: 12,
 20: 13,
 21: 14,
 23: 15,
 24: 16,
 25: 17,
 26: 18,
 27: 19,
 29: 20,
 30: 21,
 31: 22,
 32: 23,
 34: 24,
 36: 25,
 37: 26,
 38: 27,
 39: 28,
 40: 29,
 42: 30,
 45: 31,
 46: 32,
 50: 33,
 52: 34,
 53: 35,
 54: 36,
 56: 37,
 59: 38,
 61: 39,
 62: 40,
 63: 41,
 66: 42,
 67: 43,
 68: 44,
 70: 45,
 71: 46,
 74: 47,
 76: 48,
 77: 49,
 79: 50,
 80: 51,
 82: 52,
 86: 53,
 88: 54,
 90: 55,
 91: 56,
 95: 57,
 96: 58,
 97: 59,
 98: 60,
 99: 61,
 100: 62,
 101: 63,
 103: 64,
 104: 65,
 106: 66,
 107: 67,
 108: 68,
 109: 69,
 114: 70,
 115: 71,
 116: 72,
 119: 73,
 120: 74,
 121: 75,
 122: 76,
 123: 77,
 124: 78,
 126: 79,
 127: 80,
 128: 81,
 129: 82,
 130: 83,
 132: 84,
 134: 85,
 136: 86,
 138: 87,
 139: 88,
 140: 89,
 141: 90,
 144: 91,
 145: 92,
 146: 93,
 147: 94,
 149: 95,
 152: 96,
 153: 97,
 155: 98,
 158: 99,
 159: 100,
 163: 101,
 165: 102,
 168: 103,
 170: 104,
 171: 105,
 172: 106,
 1

In [17]:
qtl

[191, 220, 452, 785, 1036, 1214, 1299, 1311, 1406, 1476]

In [21]:
loci_conversions['saegus_to_tassel'][1406]

895

In [16]:
loci_conversions['tassel_to_saegus']

{0: 1,
 1: 2,
 2: 3,
 3: 4,
 4: 5,
 5: 6,
 6: 8,
 7: 10,
 8: 12,
 9: 13,
 10: 14,
 11: 15,
 12: 16,
 13: 20,
 14: 21,
 15: 23,
 16: 24,
 17: 25,
 18: 26,
 19: 27,
 20: 29,
 21: 30,
 22: 31,
 23: 32,
 24: 34,
 25: 36,
 26: 37,
 27: 38,
 28: 39,
 29: 40,
 30: 42,
 31: 45,
 32: 46,
 33: 50,
 34: 52,
 35: 53,
 36: 54,
 37: 56,
 38: 59,
 39: 61,
 40: 62,
 41: 63,
 42: 66,
 43: 67,
 44: 68,
 45: 70,
 46: 71,
 47: 74,
 48: 76,
 49: 77,
 50: 79,
 51: 80,
 52: 82,
 53: 86,
 54: 88,
 55: 90,
 56: 91,
 57: 95,
 58: 96,
 59: 97,
 60: 98,
 61: 99,
 62: 100,
 63: 101,
 64: 103,
 65: 104,
 66: 106,
 67: 107,
 68: 108,
 69: 109,
 70: 114,
 71: 115,
 72: 116,
 73: 119,
 74: 120,
 75: 121,
 76: 122,
 77: 123,
 78: 124,
 79: 126,
 80: 127,
 81: 128,
 82: 129,
 83: 130,
 84: 132,
 85: 134,
 86: 136,
 87: 138,
 88: 139,
 89: 140,
 90: 141,
 91: 144,
 92: 145,
 93: 146,
 94: 147,
 95: 149,
 96: 152,
 97: 153,
 98: 155,
 99: 158,
 100: 159,
 101: 163,
 102: 165,
 103: 168,
 104: 170,
 105: 171,
 106: 172,
 1

In [32]:
saegus_to_tassel_loci

{}

In [36]:
allele_effects_table = analyze.generate_allele_effects_table(qtl, alleles, 
                                                allele_effects, loci_conversions['saegus_to_tassel'])

In [38]:
allele_effects_table

Unnamed: 0,locus,tassel_locus,alpha_allele,alpha_effect,beta_allele,beta_effect,difference
0,191,116,0,1.964743,2,1.58796,0.376783
1,220,130,1,2.575874,2,2.600574,0.0247
2,452,280,3,0.524513,1,1.937795,1.413281
3,785,500,2,2.976466,0,1.807505,1.168961
4,1036,663,0,4.108767,2,0.989654,3.119113
5,1214,773,2,1.132349,0,2.137819,1.005469
6,1299,830,2,2.490879,0,1.07902,1.411859
7,1311,839,1,0.567434,3,5.778558,5.211124
8,1406,895,2,7.211671,0,8.61943,1.407759
9,1476,942,3,1.803267,0,3.024909,1.221642


In [39]:
saegus_to_tassel_loci = loci_conversions['saegus_to_tassel']

In [40]:
expanded = analyze.remap_ae_table_loci(allele_effects_table, saegus_to_tassel_loci)

In [41]:
expanded

Unnamed: 0,difference
0,0.000000
1,0.000000
2,0.000000
3,0.000000
4,0.000000
5,0.000000
6,0.000000
7,0.000000
8,0.000000
9,0.000000


In [29]:
allele_frequencies = pd.HDFStore('demonstration_storage.h5', mode='r')

In [32]:
afrqs = allele_frequencies['/demonstration/0/500']

In [38]:
loci = list(range(1478))
droppable_loci = [droppable_locus for droppable_locus in loci if droppable_locus not in concordant_segregating_loci]

In [117]:
for sample in sample_sizes:
    for rep in range(number_of_replicates):
        allele_frequencies_table = allele_frequencies['/'+'/'.join(['demonstration', str(rep), str(sample)])]
        subset_afrqs_table = analyze.remap_afrq_table_loci(allele_frequencies_table, saegus_to_tassel_loci)
        gwas_results_file_name = 'demonstration_'+str(rep)+'_'+str(sample)+'_out_2.txt'
        qvalues_file_name = 'demonstration_'+str(rep)+'_'+str(sample)+'_qvalues.txt'
        super_table = analyze.generate_super_table(gwas_results_file_name, 
                                                  qvalues_file_name, 
                                                  subset_afrqs_table,
                                                  expanded)
        super_table.to_csv('demonstration_'+str(rep)+'_'+str(sample)+'_super_table.txt', sep='\t')

In [115]:
super_table

Unnamed: 0,Chr,df,F,p,add_effect,add_F,add_p,dom_effect,dom_F,dom_p,q,difference,minor_allele,minor_frequency,major_allele,major_frequency
0,1.0,2,0.198370,0.82013,-0.287890,0.09422,0.75900,-0.570790,0.32035,0.57166,0.99982,0.000000,3.0,0.126,1.0,0.874
1,1.0,2,0.507910,0.60207,-0.601770,0.37542,0.54034,0.072850,0.00464,0.94572,0.99982,0.000000,1.0,0.121,3.0,0.879
2,1.0,2,1.032440,0.35690,-1.634600,2.05777,0.15206,-1.428400,1.40574,0.23633,0.99982,0.000000,2.0,0.121,0.0,0.879
3,1.0,1,0.141970,0.70649,,,,,,,0.99982,0.000000,0.0,0.081,2.0,0.919
4,1.0,2,0.609030,0.54429,0.132690,0.05156,0.82046,0.646970,0.98922,0.32042,0.99982,0.000000,2.0,0.245,0.0,0.755
5,1.0,2,0.677510,0.50835,0.551120,1.03912,0.30852,0.059220,0.01015,0.91977,0.99982,0.000000,2.0,0.236,0.0,0.764
6,1.0,2,1.128290,0.32442,-1.382400,1.41966,0.23403,-1.869200,2.24349,0.13482,0.99982,0.000000,2.0,0.141,0.0,0.859
7,1.0,2,0.933870,0.39372,-0.021858,0.00270,0.95854,0.615960,1.80199,0.18009,0.99982,0.000000,3.0,0.383,1.0,0.617
8,1.0,2,1.131010,0.32354,-0.559110,1.49052,0.22272,0.610740,1.56305,0.21181,0.99982,0.000000,1.0,0.328,3.0,0.672
9,1.0,2,0.026590,0.97376,-0.114980,0.00401,0.94954,0.258460,0.01910,0.89014,0.99982,0.000000,0.0,0.141,2.0,0.859


In [110]:
remapped_afrqs = analyze.remap_afrq_table_loci(allele_frequencies_table, saegus_to_tassel_loci)

In [111]:
sutable = analyze.generate_super_table('demonstration_0_1000_out_2.txt', 'demonstration_0_1000_qvalues.txt', remapped_afrqs, expanded)

In [112]:
sutable

Unnamed: 0,Chr,df,F,p,add_effect,add_F,add_p,dom_effect,dom_F,dom_p,q,difference,minor_allele,minor_frequency,major_allele,major_frequency
0,1.0,2,1.09276,0.33569,-0.732620,1.37823,0.24068,-0.992740,2.184980e+00,0.139680,0.999660,0.000000,3.0,0.126,1.0,0.874
1,1.0,2,0.29662,0.74339,0.330020,0.22837,0.63284,-0.545890,5.416900e-01,0.461910,0.999660,0.000000,1.0,0.121,3.0,0.879
2,1.0,2,4.13912,0.01621,-1.906200,7.92878,0.00496,-1.877100,6.802320e+00,0.009240,0.849425,0.000000,2.0,0.121,0.0,0.879
3,1.0,2,0.37656,0.68632,-1.454900,0.74784,0.38737,1.374390,6.311000e-01,0.427140,0.999660,0.000000,0.0,0.081,2.0,0.919
4,1.0,2,0.32135,0.72525,-0.148940,0.13425,0.71414,0.160900,1.522000e-01,0.696520,0.999660,0.000000,2.0,0.245,0.0,0.755
5,1.0,2,0.09374,0.91052,0.158670,0.16627,0.68353,0.033080,7.120000e-03,0.932790,0.999660,0.000000,2.0,0.236,0.0,0.764
6,1.0,2,2.92065,0.05436,1.737790,5.37254,0.02066,1.793360,5.246810e+00,0.022200,0.999660,0.000000,2.0,0.141,0.0,0.859
7,1.0,2,2.54964,0.07862,-0.104380,0.10092,0.75079,-0.706050,5.065310e+00,0.024630,0.999660,0.000000,3.0,0.383,1.0,0.617
8,1.0,2,0.39170,0.67601,0.133530,0.16276,0.68671,-0.292300,7.708400e-01,0.380170,0.999660,0.000000,1.0,0.328,3.0,0.672
9,1.0,2,1.22226,0.29501,-0.998270,2.42943,0.11939,0.747910,1.210940e+00,0.271410,0.999660,0.000000,0.0,0.141,2.0,0.859


In [57]:
analyze.write_multiple_sample_analyzer(sample_library, sample_sizes, qtl, alleles, 
                                       allele_effects, 0.7,  concordant_segregating_loci, 
                                       run_id='demonstration', 
                                       allele_frequency_hdf='demonstration_storage.h5')

In [24]:
expanded

NameError: name 'expanded' is not defined

In [43]:
demonstration = analyze.Study(run_id)

In [44]:
power_fpr_raw_data = demonstration.collect_power_analysis_data(sample_sizes, number_of_replicates, expanded)

In [76]:
raw_demonstration_data = shelve.open('demonstration_raw_data')
for size, data_list in power_fpr_raw_data.items():
    raw_demonstration_data[str(size)] = data_list
raw_demonstration_data.close()

In [74]:
power_fpr_raw_data

{500: {0:       Chr  df         F        p        q  difference
  0     1.0   2  0.198370  0.82013  0.99982    0.000000
  1     1.0   2  0.507910  0.60207  0.99982    0.000000
  2     1.0   2  1.032440  0.35690  0.99982    0.000000
  3     1.0   1  0.141970  0.70649  0.99982    0.000000
  4     1.0   2  0.609030  0.54429  0.99982    0.000000
  5     1.0   2  0.677510  0.50835  0.99982    0.000000
  6     1.0   2  1.128290  0.32442  0.99982    0.000000
  7     1.0   2  0.933870  0.39372  0.99982    0.000000
  8     1.0   2  1.131010  0.32354  0.99982    0.000000
  9     1.0   2  0.026590  0.97376  0.99982    0.000000
  10    1.0   2  0.113300  0.89291  0.99982    0.000000
  11    1.0   2  0.770060  0.46354  0.99982    0.000000
  12    1.0   2  0.258250  0.77251  0.99982    0.000000
  13    1.0   2  0.482280  0.61767  0.99982    0.000000
  14    1.0   2  0.883250  0.41409  0.99982    0.000000
  15    1.0   2  0.733070  0.48095  0.99982    0.000000
  16    1.0   2  1.523410  0.21899  0.99

In [65]:
power_fpr_raw_data[500][0].index

Int64Index([  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,
            ...
            933, 934, 935, 936, 937, 938, 939, 940, 941, 942],
           dtype='int64', length=943)

In [69]:
subsuper_table = power_fpr_raw_data[500][0].join(subset)

In [72]:
power_fpr_raw_data

{500: {0:       Chr  df         F        p        q  difference
  0     1.0   2  0.198370  0.82013  0.99982    0.000000
  1     1.0   2  0.507910  0.60207  0.99982    0.000000
  2     1.0   2  1.032440  0.35690  0.99982    0.000000
  3     1.0   1  0.141970  0.70649  0.99982    0.000000
  4     1.0   2  0.609030  0.54429  0.99982    0.000000
  5     1.0   2  0.677510  0.50835  0.99982    0.000000
  6     1.0   2  1.128290  0.32442  0.99982    0.000000
  7     1.0   2  0.933870  0.39372  0.99982    0.000000
  8     1.0   2  1.131010  0.32354  0.99982    0.000000
  9     1.0   2  0.026590  0.97376  0.99982    0.000000
  10    1.0   2  0.113300  0.89291  0.99982    0.000000
  11    1.0   2  0.770060  0.46354  0.99982    0.000000
  12    1.0   2  0.258250  0.77251  0.99982    0.000000
  13    1.0   2  0.482280  0.61767  0.99982    0.000000
  14    1.0   2  0.883250  0.41409  0.99982    0.000000
  15    1.0   2  0.733070  0.48095  0.99982    0.000000
  16    1.0   2  1.523410  0.21899  0.99

In [73]:
subsuper_table

Unnamed: 0,Chr,df,F,p,q,difference,minor_allele,minor_frequency,major_allele
0,1.0,2,0.198370,0.82013,0.99982,0.000000,3.0,0.126,1.0
1,1.0,2,0.507910,0.60207,0.99982,0.000000,1.0,0.121,3.0
2,1.0,2,1.032440,0.35690,0.99982,0.000000,2.0,0.121,0.0
3,1.0,1,0.141970,0.70649,0.99982,0.000000,0.0,0.081,2.0
4,1.0,2,0.609030,0.54429,0.99982,0.000000,2.0,0.245,0.0
5,1.0,2,0.677510,0.50835,0.99982,0.000000,2.0,0.236,0.0
6,1.0,2,1.128290,0.32442,0.99982,0.000000,2.0,0.141,0.0
7,1.0,2,0.933870,0.39372,0.99982,0.000000,3.0,0.383,1.0
8,1.0,2,1.131010,0.32354,0.99982,0.000000,1.0,0.328,3.0
9,1.0,2,0.026590,0.97376,0.99982,0.000000,0.0,0.141,2.0


In [45]:
results, true_positives, false_positives = demonstration.calculate_power_fpr(power_fpr_raw_data, sample_sizes, 
                                                                             number_of_replicates, number_of_qtl)

In [49]:
qtl

[191, 220, 452, 785, 1036, 1214, 1299, 1311, 1406, 1476]

In [47]:
results

Unnamed: 0,power_500,fpr_500,power_600,fpr_600,power_700,fpr_700,power_800,fpr_800,power_900,fpr_900,power_1000,fpr_1000
0,0,0.00428725,0,0.00428725,0,0.00428725,0.0,0.00428725,0,0.00428725,0,0.00535906
1,0,0.00428725,0,0.00535906,0,0.00321543,0.0,0.00535906,0,0.00643087,0,0.00535906
2,0,0.00535906,0,0.00643087,0,0.00750268,0.1,0.00535906,0,0.00535906,0,0.00535906
3,0,0.00643087,0,0.00428725,0,0.00428725,0.0,0.00643087,0,0.00750268,0,0.00535906
4,0,0.00428725,0,0.00428725,0,0.00643087,0.0,0.00643087,0,0.00535906,0,0.00643087


In [48]:
true_positives

{(500, 0): Empty DataFrame
 Columns: [Chr, df, F, p, q, difference]
 Index: [], (500, 1): Empty DataFrame
 Columns: [Chr, df, F, p, q, difference]
 Index: [], (500, 2): Empty DataFrame
 Columns: [Chr, df, F, p, q, difference]
 Index: [], (500, 3): Empty DataFrame
 Columns: [Chr, df, F, p, q, difference]
 Index: [], (500, 4): Empty DataFrame
 Columns: [Chr, df, F, p, q, difference]
 Index: [], (600, 0): Empty DataFrame
 Columns: [Chr, df, F, p, q, difference]
 Index: [], (600, 1): Empty DataFrame
 Columns: [Chr, df, F, p, q, difference]
 Index: [], (600, 2): Empty DataFrame
 Columns: [Chr, df, F, p, q, difference]
 Index: [], (600, 3): Empty DataFrame
 Columns: [Chr, df, F, p, q, difference]
 Index: [], (600, 4): Empty DataFrame
 Columns: [Chr, df, F, p, q, difference]
 Index: [], (700, 0): Empty DataFrame
 Columns: [Chr, df, F, p, q, difference]
 Index: [], (700, 1): Empty DataFrame
 Columns: [Chr, df, F, p, q, difference]
 Index: [], (700, 2): Empty DataFrame
 Columns: [Chr, df, F, p,

In [None]:
results

In [None]:
true_positives

In [None]:
results.to_csv(run_id + '_fpr_results.txt', sep='\t')

In [None]:
detection_table = demonstration.probability_of_detection(allele_effects_table, sample_sizes, 
                                                         number_of_replicates, true_positives)

In [None]:
detection_table

In [None]:
detection_table.to_csv(run_id + '_probability_of_detection.txt', sep='\t')

In [None]:
mean_stdev = pd.DataFrame([results.mean(), results.std()], index=['mean', 'standard_dev']).T

In [None]:
mean_stdev.to_csv('beneath_these_waves_power_mean_stdev')

In [None]:
prob_detection_table['difference'].corr(prob_detection_table['detected'])

In [None]:
results.to_csv('beneath_these_waves_power_fpr_results.txt', sep='\t', index=False)

### Running Family Comparison

#### Modified Existing Phenotype Trait Vector
    Reads existing phenotype file and writes the modified
    values under a new file-name

In [None]:
def modify_existing_phenotypes(existing_phenotype_file, modded_values, modified_phenotype_file):
    existing_phenotypes = np.array(pd.read_csv(existing_phenotype_file, sep='\t'))
    modified_phenotypes = np.array(existing_phenotypes)
    modified_phenotypes[:, 1] = modded_values
    modified_pheno_output = pd.DataFrame(modified_phenotypes)
    header = "<Trait>\tsim\n"    
    with open(modified_phenotype_file, 'w') as mod_pheno_file:
        mod_pheno_file.write(header)
        modified_pheno_output.to_csv(mod_pheno_file, sep='\t', index=False, header=False)

In [None]:
modify_existing_phenotypes("heaven_denies_0_750_phenotype_vector.txt",
                          np.ones((750)),
                          "blood_hands_0_750_phenotype_vector.txt")

## Newly Developed Functions

### Collects Samples from Each Replicate
    Stores them in a library of lists

In [None]:
def collect_samples(replicate_populations, sample_sizes, run_id):
    """
    Testing for concordance of segregating loci among samples requires that
    the samples be gathered in advance. Collects samples from replicate_populations

    :param replicate_populations: Multi-replicate population to analyze
    :param sample_sizes: Size of sample to gather.

    :note: :py:func:`len(sample_sizez)` == number of samples gathered from each replicate.

    :param str run_id: Identifier
    :return: List of populations
    """
    samples = {}
    for rep in replicate_populations.populations():
        samples[rep.dvars().rep] = [sim.sampling.drawRandomSample(rep, sizes=sample_size) for sample_size in sample_sizes]
    return samples

### Calculates Allele Frequencies for Each Sample
    Stores in a single HDF File

In [None]:
def multi_sample_allele_frq_storage(library_of_samples, alleles, run_id='heaven_denies'):

    hdf_store = pd.HDFStore(run_id + '_afrqs.h5')

    for rep_id, samples in library_of_samples.items():
        for sample in samples:
            af = analyze.allele_data(sample, alleles,
                                 range(sample.totNumLoci()))

            name = run_id + '/' + str(rep_id) + '/' + str(sample.popSize())

            hdf_store.put(name, af)
    hdf_store.close()