In [1]:
import simuOpt
simuOpt.setOptions(alleleType='short', numThreads=4, quiet=True)
import simuPOP as sim
import pandas as pd
from saegus import breed, operators, simulate, analyze, parse, parameters
import shelve
import numpy as np
import random
import collections as col
np.set_printoptions(suppress=True, precision=3)

In [2]:
run_id='vandaleer'
sample_sizes=[250, 500]
number_of_qtl = 10
number_of_replicates = 5
founders = [[2, 26], [3, 25], [4, 24], [5, 23]]
os_per_pair = 500
recombination_rates = [0.01]*1478

In [3]:
prefounders = sim.loadPopulation('prefounders1478.pop')
prefounders.addInfoFields('generation')
multi_prefounders = sim.Simulator(prefounders, number_of_replicates, stealPops=False)
magic = breed.MAGIC(multi_prefounders, founders, recombination_rates)
sim.tagID(prefounders, reset=27)
magic.generate_f_one(founders, os_per_pair)
mrc = breed.MultiRandomCross(multi_prefounders, 4, os_per_pair)
mother_choices, father_choices = mrc.determine_random_cross()
multi_snd_ord_chooser = breed.MultiSecondOrderPairIDChooser(mother_choices, father_choices)
multi_prefounders.evolve(
    matingScheme=sim.HomoMating(
        sim.PyParentsChooser(multi_snd_ord_chooser.snd_ord_id_pairs),
        sim.OffspringGenerator(ops=[
            sim.IdTagger(),
            sim.PedigreeTagger(),
            sim.Recombinator(rates=0.01)
        ],
            numOffspring=1),
        subPopSize=[2000],
    ),
    gen=1,
)
final_mrc = breed.MultiRandomCross(multi_prefounders, 2, 1000)
final_mothers, final_fathers = final_mrc.determine_random_cross()
final_multi_snd_ord_chooser = breed.MultiSecondOrderPairIDChooser(final_mothers, final_fathers)
multi_prefounders.evolve(
    matingScheme=sim.HomoMating(
        sim.PyParentsChooser(final_multi_snd_ord_chooser.snd_ord_id_pairs),
        sim.OffspringGenerator(ops=[
            sim.IdTagger(),
            sim.PedigreeTagger(),
            sim.Recombinator(rates=0.01)
        ],
            numOffspring=1),
        subPopSize=[2000],
    ),
    gen=1,
)

(1, 1, 1, 1, 1)

In [4]:
study = analyze.Study(run_id)

In [5]:
sample_library = study.collect_samples(multi_prefounders, sample_sizes)

In [6]:
alleles = np.array(pd.read_hdf('parameters/alleles_at_1478_loci.hdf'))

In [7]:
for rep, sample_list in sample_library.items():
    for sample in sample_list:
        sim.stat(sample, alleleFreq=sim.ALL_AVAIL)

In [8]:
study.store_allele_frequencies(sample_library, alleles)

In [9]:
sets_of_segregating_loci = study.seg_loci_among_samples(sample_library)

In [10]:
concordant_segregating_loci = list(sets_of_segregating_loci.keys())[0]

In [11]:
qtl = tuple(sorted(random.sample(concordant_segregating_loci, number_of_qtl)))

In [12]:
add_trait = parameters.Trait()

In [13]:
exponential_allele_effects = add_trait.assign_allele_effects(alleles, qtl, np.random.exponential, 1)

In [None]:
seg_loc_storage = shelve.open('segregating_loci_storage')
seg_loc_storage[run_id] = concordant_segregating_loci
seg_loc_storage.close()

In [14]:
droppable_loci = [droppable for droppable in range(1478) if droppable not in concordant_segregating_loci]


In [15]:
analyze.store_allele_effect_frequency_tables(sample_library, alleles, 
                                             qtl,
                                             exponential_allele_effects,
                                            run_id, 'exponential')

In [None]:
loci_conversions = shelve.open(run_id+'_loci_conversions')
saegus_to_tassel_loci = {}
tassel_to_saegus_loci = {}
for idx, locus in enumerate(concordant_segregating_loci):
    saegus_to_tassel_loci[locus] = idx
    tassel_to_saegus_loci[idx] = locus
loci_conversions['saegus_to_tassel'] = saegus_to_tassel_loci
loci_conversions['tassel_to_saegus'] = tassel_to_saegus_loci
loci_conversions.close()

In [None]:
seg_loc_storage = shelve.open('segregating_loci_storage')
seg_loc_storage['vandaleer'] = concordant_segregating_loci
seg_loc_storage.close()

In [None]:
import importlib as imp
reload(analyze)

In [16]:
exponential_allele_effects

{152: {1: 0.7050938159052464, 3: 1.206597655278337},
 527: {0: 0.3324746377927107, 2: 0.638229920764347},
 736: {1: 0.9015851323738887, 3: 0.9913775451078031},
 746: {1: 2.43823904597767, 2: 0.6473108548758053},
 904: {1: 2.245437656956458, 3: 1.0251204936565261},
 957: {0: 1.5452089554164063, 2: 4.572899476597816},
 1013: {1: 0.1854742424309644, 2: 0.06479480426665307},
 1178: {1: 0.3907622330646265, 2: 1.9484651886595101},
 1461: {1: 0.22896379223367372, 2: 0.21701063405512394},
 1470: {0: 1.3960812529153177, 2: 1.0939114725748023}}

In [21]:
met_allele_data = analyze.gather_allele_data(sample_library[0][0])

In [22]:
minor_alleles = met_allele_data[:, 3]

In [17]:


exponential_allele_effects_table = analyze.generate_allele_effects_table(qtl, alleles, 
                                                exponential_allele_effects)

TypeError: unhashable type: 'numpy.ndarray'

In [18]:
mg = analyze.MultiGeneration('vandaleer')

In [23]:
af_table = mg.collect_allele_frequency_data(sample_library, minor_alleles)

In [24]:
af_table[0, :]

array([ 0.   ,  0.   ,  0.   , ...,  0.258,  0.142,  0.   ])

In [None]:
a

In [None]:
mg.multiple_sample_analyzer(sample_library, qtl, )

In [None]:
sample_library[0][0].infoFields()

## TASSEL Associated Input

In [25]:
sample = sample_library[0][0]

In [26]:
int_to_snp_map = {0: 'A', 1: 'C', 2: 'G', 3: 'T', 4: '-', 5: '+'}
        

In [None]:
sample

In [27]:
indir = "/home/vakanas/tassel-5-standalone/"

In [28]:
minor_allele_frqs = af_table[0, 2:]

In [29]:
gen_id_name = str(int(max(sample.indInfo('generation'))))

In [30]:
rep_id_name = str(0)

In [31]:
operators.assign_additive_g(sample, qtl, exponential_allele_effects)

In [32]:
operators.calculate_error_variance(sample, 0.7)

In [33]:
operators.calculate_p(sample)

In [48]:
run_id

'vandaleer'

In [49]:
name = run_id+'_'+'0'+'_250'

In [50]:
name

'vandaleer_0_250'

In [35]:
minor_alleles

array([ 0.,  3.,  1., ...,  0.,  0.,  0.])

In [36]:
minor_alleles[np.array(concordant_segregating_loci)]

array([ 3.,  1.,  2.,  0.,  2.,  2.,  2.,  3.,  1.,  0.,  0.,  1.,  3.,
        0.,  1.,  5.,  3.,  0.,  0.,  1.,  0.,  0.,  1.,  1.,  3.,  3.,
        1.,  4.,  2.,  3.,  1.,  2.,  1.,  2.,  3.,  1.,  1.,  3.,  3.,
        3.,  0.,  0.,  3.,  0.,  0.,  4.,  1.,  3.,  3.,  1.,  3.,  0.,
        0.,  3.,  3.,  5.,  3.,  0.,  3.,  2.,  0.,  0.,  3.,  0.,  2.,
        3.,  1.,  3.,  2.,  3.,  2.,  5.,  1.,  1.,  3.,  0.,  3.,  3.,
        3.,  0.,  3.,  3.,  2.,  0.,  3.,  1.,  3.,  1.,  3.,  0.,  3.,
        2.,  2.,  0.,  3.,  3.,  1.,  3.,  0.,  3.,  3.,  1.,  0.,  3.,
        3.,  2.,  0.,  1.,  3.,  1.,  3.,  0.,  1.,  0.,  2.,  3.,  2.,
        1.,  0.,  1.,  3.,  3.,  3.,  0.,  0.,  3.,  0.,  3.,  1.,  0.,
        2.,  3.,  2.,  0.,  3.,  1.,  2.,  3.,  2.,  0.,  1.,  3.,  3.,
        2.,  0.,  3.,  3.,  2.,  1.,  0.,  1.,  2.,  0.,  0.,  1.,  0.,
        3.,  0.,  0.,  1.,  0.,  5.,  2.,  2.,  2.,  3.,  1.,  0.,  0.,
        1.,  1.,  1.,  0.,  1.,  3.,  5.,  0.,  1.,  3.,  3.,  1

In [37]:
gwas = analyze.GWAS(sample, np.array(concordant_segregating_loci), minor_alleles, run_id)

In [38]:
ccm = gwas.calculate_count_matrix('vandaleer_0_250_count_matrix.txt')

In [39]:
ps_svd = gwas.pop_struct_eigendecomp(ccm)

In [None]:
ps_svd

In [40]:
gwas.population_structure_formatter(ps_svd[0], 
                                    ps_svd[1], 
                                    number_of_pcs=2,
                                    pop_struct_file_name='vandaleer_0_250_structure_matrix.txt')

In [41]:
gwas.hapmap_formatter('vandaleer_0_250_simulated_hapmap.txt')

In [42]:
gwas.calc_kinship_matrix(ccm, 'vandaleer_0_250_kinship_matrix.txt')

In [43]:
gwas.trait_formatter('vandaleer_0_250_phenotype_vector.txt')

In [51]:
gwas.tassel_gwas_config('/home/vakanas/tassel-5-standalone/example_gwas_pipeline.xml',
                                  indir+name+'_simulated_hapmap.txt',
                                  indir+name+'_kinship_matrix.txt',
                                  indir+name+'_phenotype_vector.txt',
                                  indir+name+'_structure_matrix.txt',
                                  '/home/vakanas/tassel-5-standalone/output/'+name
                                  )

In [46]:
name

'vandaleer_0_0'