In [1]:
import simuOpt
simuOpt.setOptions(alleleType='short', numThreads=4, quiet=True)
import simuPOP as sim
import pandas as pd
from saegus import breed, operators, simulate, analyze, parse, parameters
import shelve
import numpy as np
import random
import collections as col
np.set_printoptions(suppress=True, precision=3)

In [2]:
run_id='vandaleer'
sample_sizes=[250, 500]
number_of_qtl = 10
number_of_replicates = 5
founders = [[2, 26], [3, 25], [4, 24], [5, 23]]
os_per_pair = 500
recombination_rates = [0.01]*1478

In [3]:
prefounders = sim.loadPopulation('prefounders1478.pop')
prefounders.addInfoFields('generation')
multi_prefounders = sim.Simulator(prefounders, number_of_replicates, stealPops=False)
magic = breed.MAGIC(multi_prefounders, founders, recombination_rates)
sim.tagID(prefounders, reset=27)
magic.generate_f_one(founders, os_per_pair)
mrc = breed.MultiRandomCross(multi_prefounders, 4, os_per_pair)
mother_choices, father_choices = mrc.determine_random_cross()
multi_snd_ord_chooser = breed.MultiSecondOrderPairIDChooser(mother_choices, father_choices)
multi_prefounders.evolve(
    matingScheme=sim.HomoMating(
        sim.PyParentsChooser(multi_snd_ord_chooser.snd_ord_id_pairs),
        sim.OffspringGenerator(ops=[
            sim.IdTagger(),
            sim.PedigreeTagger(),
            sim.Recombinator(rates=0.01)
        ],
            numOffspring=1),
        subPopSize=[2000],
    ),
    gen=1,
)
final_mrc = breed.MultiRandomCross(multi_prefounders, 2, 1000)
final_mothers, final_fathers = final_mrc.determine_random_cross()
final_multi_snd_ord_chooser = breed.MultiSecondOrderPairIDChooser(final_mothers, final_fathers)
multi_prefounders.evolve(
    matingScheme=sim.HomoMating(
        sim.PyParentsChooser(final_multi_snd_ord_chooser.snd_ord_id_pairs),
        sim.OffspringGenerator(ops=[
            sim.IdTagger(),
            sim.PedigreeTagger(),
            sim.Recombinator(rates=0.01)
        ],
            numOffspring=1),
        subPopSize=[2000],
    ),
    gen=1,
)

(1, 1, 1, 1, 1)

In [4]:
study = analyze.Study(run_id)

In [5]:
sample_library = study.collect_samples(multi_prefounders, sample_sizes)

In [6]:
alleles = np.array(pd.read_hdf('parameters/alleles_at_1478_loci.hdf'))

In [7]:
for rep, sample_list in sample_library.items():
    for sample in sample_list:
        sim.stat(sample, alleleFreq=sim.ALL_AVAIL)

In [8]:
study.store_allele_frequencies(sample_library, alleles)

In [9]:
sets_of_segregating_loci = study.seg_loci_among_samples(sample_library)

In [10]:
concordant_segregating_loci = list(sets_of_segregating_loci.keys())[0]

In [11]:
qtl = tuple(sorted(random.sample(concordant_segregating_loci, number_of_qtl)))

In [12]:
add_trait = parameters.Trait()

In [13]:
exponential_allele_effects = add_trait.assign_allele_effects(alleles, qtl, np.random.exponential, 1)

In [14]:
seg_loc_storage = shelve.open('segregating_loci_storage')
seg_loc_storage[run_id] = concordant_segregating_loci
seg_loc_storage.close()

In [15]:
droppable_loci = [droppable for droppable in range(1478) if droppable not in concordant_segregating_loci]


In [16]:
analyze.store_allele_effect_frequency_tables(sample_library, alleles, 
                                             qtl,
                                             exponential_allele_effects,
                                            run_id, 'exponential')

In [17]:
loci_conversions = shelve.open(run_id+'_loci_conversions')
saegus_to_tassel_loci = {}
tassel_to_saegus_loci = {}
for idx, locus in enumerate(concordant_segregating_loci):
    saegus_to_tassel_loci[locus] = idx
    tassel_to_saegus_loci[idx] = locus
loci_conversions['saegus_to_tassel'] = saegus_to_tassel_loci
loci_conversions['tassel_to_saegus'] = tassel_to_saegus_loci
loci_conversions.close()

In [18]:
seg_loc_storage = shelve.open('segregating_loci_storage')
seg_loc_storage['vandaleer'] = concordant_segregating_loci
seg_loc_storage.close()

In [None]:
import importlib as imp
reload(analyze)

In [None]:
exponential_allele_effects

In [19]:
met_allele_data = analyze.gather_allele_data(sample_library[0][0])

In [20]:
minor_alleles = met_allele_data[:, 3]

In [None]:


exponential_allele_effects_table = analyze.generate_allele_effects_table(qtl, alleles, 
                                                exponential_allele_effects)

In [22]:
mg = analyze.MultiGeneration('vandaleer')

In [23]:
af_table = mg.collect_allele_frequency_data(sample_library, minor_alleles)

In [25]:
af_table[0, :]

array([ 0.   ,  0.   ,  0.   , ...,  0.238,  0.122,  0.   ])

In [None]:
a

In [None]:
mg.multiple_sample_analyzer(sample_library, qtl, )

In [None]:
sample_library[0][0].infoFields()

## TASSEL Associated Input

In [40]:
sample = sample_library[0][0]

In [26]:
int_to_snp_map = {0: 'A', 1: 'C', 2: 'G', 3: 'T', 4: '-', 5: '+'}
        

In [41]:
sample

<simuPOP.Population>

In [27]:
indir = "/home/vakanas/tassel-5-standalone/"

In [35]:
minor_allele_frqs = af_table[0, 2:]

In [37]:
gen_id_name = str(int(max(sample.indInfo('generation'))))

In [38]:
rep_id_name = str(0)

In [43]:
operators.assign_additive_g(sample, qtl, exponential_allele_effects)

In [44]:
operators.calculate_error_variance(sample, 0.7)

In [45]:
operators.calculate_p(sample)

In [47]:
name = run_id+'_'+rep_id_name+'_'+gen_id_name

In [48]:
minor_alleles

array([ 0.,  3.,  1., ...,  0.,  0.,  0.])

In [52]:
minor_alleles[np.array(concordant_segregating_loci)]

array([ 3.,  1.,  2.,  0.,  2.,  2.,  2.,  3.,  1.,  0.,  0.,  1.,  3.,
        0.,  1.,  5.,  3.,  0.,  0.,  1.,  0.,  0.,  1.,  1.,  3.,  3.,
        1.,  4.,  2.,  3.,  1.,  2.,  1.,  2.,  3.,  1.,  1.,  3.,  3.,
        3.,  0.,  0.,  3.,  0.,  0.,  4.,  1.,  3.,  3.,  1.,  3.,  0.,
        0.,  3.,  3.,  5.,  3.,  0.,  3.,  2.,  0.,  0.,  3.,  0.,  2.,
        3.,  1.,  3.,  2.,  3.,  2.,  5.,  1.,  1.,  3.,  0.,  3.,  3.,
        3.,  0.,  1.,  3.,  2.,  0.,  3.,  1.,  3.,  1.,  3.,  0.,  3.,
        2.,  2.,  2.,  3.,  3.,  1.,  3.,  0.,  3.,  3.,  1.,  0.,  3.,
        3.,  2.,  0.,  1.,  3.,  1.,  3.,  0.,  1.,  0.,  2.,  3.,  2.,
        1.,  0.,  1.,  3.,  3.,  3.,  0.,  0.,  3.,  0.,  3.,  1.,  0.,
        2.,  3.,  2.,  0.,  3.,  3.,  2.,  1.,  2.,  0.,  1.,  3.,  3.,
        2.,  0.,  3.,  3.,  2.,  1.,  0.,  1.,  2.,  0.,  0.,  1.,  0.,
        3.,  0.,  0.,  1.,  0.,  5.,  2.,  2.,  2.,  3.,  1.,  0.,  0.,
        1.,  1.,  1.,  0.,  1.,  3.,  5.,  0.,  1.,  3.,  3.,  1

In [53]:
gwas = analyze.GWAS(sample, np.array(concordant_segregating_loci), minor_alleles, run_id)

In [59]:
ccm = gwas.calculate_count_matrix('vandaleer_0_250_count_matrix.txt')

In [58]:
ps_svd = gwas.pop_struct_eigendecomp(ccm)

In [60]:
ps_svd

(array([ 8.108,  7.767,  7.176,  7.043,  6.963,  6.781,  6.687,  6.58 ,
         6.459,  6.385,  6.18 ,  0.   ,  5.827,  5.733,  5.614,  5.576,
         5.522,  5.362,  5.164,  5.05 ,  4.964,  4.873,  4.729,  4.664,
         4.604,  4.492,  4.445,  4.396,  4.278,  4.252,  4.125,  4.038,
         3.898,  3.955,  3.844,  3.739,  3.717,  3.63 ,  3.586,  3.529,
         3.47 ,  3.435,  3.383,  3.296,  3.277,  3.209,  3.059,  3.163,
         3.118,  2.966,  2.928,  2.909,  2.857,  2.817,  2.797,  2.743,
         2.695,  2.633,  2.617,  2.552,  2.47 ,  2.484,  2.392,  2.381,
         2.357,  2.315,  2.108,  2.286,  2.141,  2.181,  2.25 ,  2.213,
         2.218,  2.111,  2.08 ,  2.057,  2.033,  1.97 ,  1.901,  1.817,
         1.857,  1.873,  1.84 ,  1.791,  1.799,  1.745,  1.725,  1.709,
         1.663,  1.639,  1.645,  1.624,  1.616,  1.591,  1.561,  1.533,
         1.503,  1.481,  1.49 ,  1.455,  1.447,  1.43 ,  1.408,  1.375,
         1.359,  1.332,  1.306,  1.319,  1.294,  1.286,  1.275, 

In [61]:
gwas.population_structure_formatter(ps_svd[0], 
                                    ps_svd[1], 
                                    number_of_pcs=2,
                                    pop_struct_file_name='vandaleer_0_250_structure_matrix.txt')

In [62]:
gwas.hapmap_formatter('vandaleer_0_250_simulated_hapmap.txt')

In [63]:
gwas.calc_kinship_matrix(ccm, 'vandaleer_0_250_kinship_matrix.txt')

In [65]:
gwas.trait_formatter('vandaleer_0_250_phenotype_vector.txt')

In [64]:
gwas.replicate_tassel_gwas_configs(rep_id_name,
                                  gen_id_name,
                                  indir+name+'_simulated_hapmap.txt',
                                  indir+name+'_kinship_matrix.txt',
                                  indir+name+'_phenotype_vector.txt',
                                  indir+name+'_structure_matrix.txt',
                                  '/home/vakanas/tassel-5-standalone/output/',
                                  '/home/vakanas/tassel-5-standalone/example_gwas_pipeline.xml')

AttributeError: 'GWAS' object has no attribute 'indir'