In [None]:
import simuOpt
simuOpt.setOptions(alleleType='short', optimized=True, numThreads=4, quiet=True)
import simuPOP as sim
import pandas as pd
from saegus import breed, operators, simulate, analyze, parse, parameters
import shelve
import numpy as np
import xml.etree.ElementTree as ET
import lxml.etree as etree
import random
np.set_printoptions(suppress=True, precision=3)

In [None]:
prefounders = sim.loadPopulation('prefounders1478.pop')
multi_prefounders = sim.Simulator(prefounders, 10, stealPops=False)
founders = [[1, 2], [3, 4], [5, 6], [7, 8]]
os_per_pair = 500
magic = breed.MAGIC(multi_prefounders, [0.01]*1478)
sim.tagID(prefounders, reset=27)
magic.generate_f_one(founders, os_per_pair)
mrc = breed.MultiRandomCross(multi_prefounders, 4, 500)
mother_choices, father_choices = mrc.determine_random_cross()
multi_snd_ord_chooser = breed.MultiSecondOrderPairIDChooser(mother_choices, father_choices)
multi_prefounders.evolve(
    matingScheme=sim.HomoMating(
        sim.PyParentsChooser(multi_snd_ord_chooser.snd_ord_id_pairs),
        sim.OffspringGenerator(ops=[
            sim.IdTagger(),
            sim.PedigreeTagger(),
            sim.Recombinator(rates=0.01)
        ],
            numOffspring=1),
        subPopSize=[2000],
    ),
    gen=1,
)

In [None]:
final_mrc = breed.MultiRandomCross(multi_prefounders, 2, 1000)
final_mothers, final_fathers = final_mrc.determine_random_cross()
final_multi_snd_ord_chooser = breed.MultiSecondOrderPairIDChooser(final_mothers, final_fathers)
multi_prefounders.evolve(
    matingScheme=sim.HomoMating(
        sim.PyParentsChooser(final_multi_snd_ord_chooser.snd_ord_id_pairs),
        sim.OffspringGenerator(ops=[
            sim.IdTagger(),
            sim.PedigreeTagger(),
            sim.Recombinator(rates=0.01)
        ],
            numOffspring=1),
        subPopSize=[2000],
    ),
    gen=1,
)
mater = breed.MAGIC(multi_prefounders, [0.01]*1478)

In [None]:
mater.random_mating(3, 2000)

In [None]:
sample_sizes = [100, 200, 300]

In [None]:
run_id = 'blood_hands'

In [None]:
def collect_samples(replicate_populations, sample_sizes, run_id):
    """
    Testing for concordance of segregating loci among samples requires that
    the samples be gathered in advance. Collects samples from replicate_populations

    :param replicate_populations: Multi-replicate population to analyze
    :param sample_sizes: Size of sample to gather.

    :note: :py:func:`len(sample_sizez)` == number of samples gathered from each replicate.

    :param str run_id: Identifier
    :return: List of populations
    """
    sample_library = {}
    for rep in replicate_populations.populations():
        sample_library[rep.dvars().rep] = [sim.sampling.drawRandomSample(rep, sizes=sample_size) for sample_size in sample_sizes]
    return sample_library

In [None]:
samples = collect_samples(multi_prefounders, sample_sizes, run_id)

In [None]:
samples

In [None]:
alleles = np.array(pd.read_hdf('parameters\\alleles_at_1478_loci.hdf'))

In [None]:
def multi_sample_allele_frq_storage(library_of_samples, alleles, run_id='hdenies'):

    hdf_store = pd.HDFStore(run_id + '_library_storage.h5')

    for rep_id, samples in library_of_samples.items():
        for sample in samples:
            af = analyze.allele_data(sample, alleles,
                                 range(sample.totNumLoci()))

            name = run_id + '/' + str(rep_id) + '/' + str(sample.popSize())

            hdf_store.put(name, af)
    hdf_store.close()

In [None]:
multi_sample_allele_frq_storage(samples, alleles, run_id=run_id)

In [None]:
def seg_from_sample_lib(sample_library):
    for rep in sample_library.values():
        for sample in rep:
            sim.stat(sample, numOfSegSites=sim.ALL_AVAIL, vars=['segSites', 'numOfSegSites'])
    seg_of_samples = (tuple(sample.dvars().segSites) for rep in sample_library.values() for sample in rep)
    segregating_loci_counts = col.Counter(seg_of_samples)
    return segregating_loci_counts

In [None]:
seg_loci_of_samples = seg_from_sample_lib(samples)

In [None]:
set(seg_loci_of_samples.keys())

In [None]:
def count_qtl_concordance(array_of_seg_loci, qtl):
    qtl_agreement_counts = col.defaultdict(int, default=0)
    for i, row in enumerate(array_of_seg_loci):
        for locus in qtl:
            if locus in row:
                qtl_agreement_counts[i] += 1
    return qtl_agreement_counts

def test_qtl_concordance(agreement_counts, qtl):
    qtl_concordance = True
    for k, v in agreement_counts.items():
        if v != len(qtl) and k != 'default':
            qtl_concordance = False
            print("Disagrement of QTL at sample {}".format(k))
    return qtl_concordance

def count_segregating_site_concordance(replicate_):
    segregating_loci_concordance_counts = col.defaultdict(int, default=0)
    for row in array_of_seg_loci:
        segregating_loci_concordance_counts[tuple(row)] += 1
    return segregating_loci_concordance_counts

def test_segregatng_loci_concordance(seg_loci_agreement_counts):
    seg_loci_agreement = True
    if len(seg_loci_agreement_counts) > 2:
        seg_loci_agreement = False
    return seg_loci_agreement

In [None]:
seg_loci_concordance_counts = ()
for row in seg_of_samples:
    seg_loci_concordance_counts.append(tuple(row))

In [None]:
for rep in multi_prefounders.populations():
    sim.stat(rep, numOfSegSites=sim.ALL_AVAIL, vars=['segSites', 'numOfSegSites'])

In [None]:
seg_loci = (tuple(rep.dvars().segSites) for rep in multi_prefounders.populations())

In [None]:
repr_seg_loci = next(seg_loci)

In [None]:
import sys

In [None]:
import collections as col

In [None]:
test_segregatng_loci_concordance(seg_loc_counts)

In [None]:
class Study(object):
    def __init__(self, run_id):
        self._run_id = run_id
        self._concordance = None
        
    @property
     # the x property. the decorator creates a read-only property
    def concordance(self):
        return self._concordance

    @concordance.setter
    # the x property setter makes the property writeable
    def concordance(self, value):
        self._concordance = value

    @concordance.deleter
    def concordance(self):
        del self._concordance

In [None]:
stdy = Study('blood_hands')

In [None]:
stdy.concordance = True

In [None]:
stdy.concordance

In [None]:
seg_loci_of_samples = np.zeros((50, 866, 3), dtype=np.int32)
segarray = np.array(seg_loci_of_samples)

In [None]:
test_pop = samples[0][0]

In [None]:
def q_assign_qtl():
    sim.stat(test_pop, numOfSegSites=sim.ALL_AVAIL, vars=['segSites', 'numOfSegSites'])
    eligible = test_pop.dvars().segSites
    number_qtl = 10
    qtl = sorted(random.sample(eligible, number_qtl))
    qtl = [int(locus) for locus in qtl]
    return qtl

In [None]:
qtl = q_assign_qtl()

In [None]:
qtl

In [None]:
add_trait = parameters.Trait()
allele_effects = add_trait.assign_allele_effects(alleles, qtl, random.expovariate, 1, multiplicity=3)

In [None]:
allele_effects

In [None]:
test_pop = samples[0][0]

## Create Family Means to Sub Into Individual Values

In [None]:
def create_top_crosses(existing_pop, offspring_per_individual):
    new_pop_size = offspring_per_individual * existing_pop.popSize()
    
    existing_pop.evolve(
        matingScheme=sim.SelfMating(replacement=False, numOffspring=offspring_per_individual, subPopSize=new_pop_size,
                                   ops=[
                        sim.IdTagger(),
                        sim.PedigreeTagger(),
                        sim.Recombinator(rates=0.01)],),
        gen=1,
    )

In [None]:
def calculate_family_means(top_crossed_pop, individuals_per_family, qtl, allele_effects):
    if top_crossed_pop.numSubPop() > 1:
        top_crossed_pop.mergeSubPops()
    number_of_families = int(top_crossed_pop.popSize() / individuals_per_family)
    top_crossed_pop.splitSubPop(0, size=[individuals_per_family]*number_of_families)
    operators.assign_additive_g(test_pop, qtl, allele_effects)
    sim.stat(test_pop, meanOfInfo='g', vars=['meanOfInfo_sp', 'meanOfInfo'])
    family_mean_g = np.array([top_crossed_pop.dvars(sp).meanOfInfo['g'] 
                             for sp in range(top_crossed_pop.numSubPop())])
    return family_mean_g

def calculate_family_error_variance(top_crossed_pop, family_mean_g_values, heritability):
    variance_of_family_g = np.var(family_mean_g_values)
    family_epsilon = variance_of_family_g * (1/heritability - heritability)
    top_crossed_pop.dvars().family_epsilon = family_epsilon
    
def calculate_family_p(top_crossed_pop, family_mean_g_values):
    family_mean_p_values = np.zeros((top_crossed_pop.popSize()))
    for mean_g_value, mean_p_value in zip(family_mean_g_values, family_mean_p_values):
        mean_p_value = mean_g_value + random.normalvariate(0, top_crossed_pop.dvars().family_epsilon)
    return family_mean_p_values

In [None]:
def modify_existing_phenotypes(existing_phenotype_file, modded_values, modified_phenotype_file):
    existing_phenotypes = np.array(pd.read_csv(existing_phenotype_file, sep='\t'))
    modified_phenotypes = np.array(existing_phenotypes)
    modified_phenotypes[:, 1] = modded_values
    modified_pheno_output = pd.DataFrame(modified_phenotypes)
    header = "<Trait>\tsim\n"    
    with open(modified_phenotype_file, 'w') as mod_pheno_file:
        mod_pheno_file.write(header)
        modified_pheno_output.to_csv(mod_pheno_file, sep='\t', index=False, header=False)

In [None]:
def modify_gwas_config(rep_id, sample_size, new_run_id,
                                      new_phenotype_file_name,
                                      new_output_file_prefix,
                                      existing_config_file):



    tree = ET.parse(existing_config_file)
    root = tree.getroot()
    lxml_tree = etree.fromstring(ET.tostring(root))
    lxml_root = lxml_tree.getroottree()

    lxml_root.find('fork2/t').text = new_phenotype_file_name
    lxml_root.find('combine6/export').text = new_output_file_prefix

    lxml_root.write("C:\\tassel\\bin\\" + 'R' + rep_id + '_' + str(
        sample_size) + '_' + new_run_id + '_' + "_sim_gwas_pipeline.xml",
                    encoding="UTF-8",
                    method="xml", xml_declaration=True, standalone='',
                    pretty_print=True)


In [None]:
test_pop.popSize()

In [None]:
test_pop.numSubPop()

In [None]:
test_pop.indInfo('ind_id')

In [None]:
test_pop.indInfo('father_id')

In [None]:
test_pop.indInfo('g')

In [None]:
operators.assign_additive_g(test_pop, qtl, allele_effects)

In [None]:
test_pop.indInfo('g')

In [None]:
test_pop.indInfo('ind_id')

In [None]:
test_pop.setSubPopByIndInfo('father_id')

In [None]:
test_pop.mergeSubPops()

In [None]:
test_pop.popSize()

In [None]:
col.Counter(test_pop.indInfo('father_id'))

In [None]:
ind_ids = list(test_pop.indInfo('ind_id'))

In [None]:
test_pop.splitSubPop(0, sizes=[10]*100)

In [None]:
for inds in test_pop.individuals(0):
    print(inds.father_id)

In [None]:
sim.stat(test_pop, meanOfInfo='g', vars=['meanOfInfo_sp', 'meanOfInfo'])

In [None]:
for x in range(10):
    print(test_pop.dvars(x).meanOfInfo['g'])

In [None]:
test_pop.indInfo('father_id')

In [1]:
pwd

'C:\\Users\\DoubleDanks\\BISB\\wisser\\code\\rjwlab-scripts\\saegus_project\\devel\\magic\\1478'

In [3]:
import pandas as pd

In [9]:
results = pd.read_csv('heaven_denies_power_fpr_results.txt', sep='\t', index_col=0)

In [12]:
results.std()

power_100     0.000000
fpr_100       0.000000
power_250     0.042164
fpr_250       0.000000
power_500     0.048305
fpr_500       0.000493
power_750     0.097183
fpr_750       0.000000
power_1000    0.051640
fpr_1000      0.000000
power_1250    0.091894
fpr_1250      0.000493
dtype: float64

In [15]:
mean_stdev = pd.DataFrame([results.mean(), results.std()], index=['mean', 'stdev']).T

In [17]:
mean_stdev.to_csv('heaven_denies_power_fpr_mean_stdev.txt', sep='\t', index=False)

In [18]:
import shelve

In [19]:
hd = shelve.open('allele_effects_storage')

In [20]:
list(hd)

['caramon', 'beneath_these_waves', 'demonstration', 'heaven_denies']

In [22]:
hd['heaven_denies']

{189: {0: 4.284828888625308, 2: 0.7212546695504344},
 290: {2: 1.3862139007220504, 3: 2.642455160538799},
 589: {1: 4.787695340673567, 3: 1.3637443274662537},
 669: {0: 2.2468016374773283, 1: 2.881396308559403},
 806: {2: 3.742542881729241, 3: 0.6626878213492292},
 1036: {0: 4.291797716058894, 2: 1.8636809000550545},
 1060: {4: 2.410924219536518, 5: 2.775896119356275},
 1112: {1: 2.4660958755387257, 3: 2.591612800685531},
 1225: {4: 2.6238242991767544, 5: 3.8379486805851597},
 1446: {1: 1.6280833274073379, 3: 3.740474580390268}}