In [1]:
import simuOpt
simuOpt.setOptions(alleleType='short', quiet=True, numThreads=4)
import simuPOP as sim
import numpy as np
import pandas as pd
import h5py
from saegus import analyze, parse

In [2]:
np.set_printoptions(precision=3, suppress=True)

In [3]:
example_pop = sim.loadPopulation('example_pop.pop')

In [4]:
example_pop.addInfoFields(['ind_id', 'father_id', 'mother_id'])

In [5]:
sim.tagID(example_pop)

In [6]:
sim.initSex(example_pop)

In [7]:
tf = parse.TusonFounders()

In [8]:
recom_map = tf.parse_recombination_rates('genetic_map.txt')

In [9]:
allele_data = analyze.gather_allele_data(example_pop)

In [None]:
print(allele_data)

In [10]:
allele_frequencies = analyze.gather_allele_frequencies(example_pop, allele_data)

In [None]:
allele_frequencies

In [None]:
print(allele_frequencies)

In [11]:
genotype_frequencies = analyze.gather_genotype_frequencies(example_pop)

In [None]:
print(genotype_frequencies)

In [None]:
print(genotype_frequencies[0, 1, 1])

In [12]:
genotypes_by_locus = np.array(np.ndarray.nonzero(genotype_frequencies)).T

In [None]:
print(genotypes_by_locus)

In [None]:
print(genotype_frequencies[5, 1, 1])

In [None]:
print(genotype_frequencies[5, 1, 3])

In [None]:
print(genotype_frequencies[5, 3, 3])

In [None]:
allele_data[:, 3:][0][0]

## HDF5 Files : Data Storage

In [13]:
example_data = h5py.File('example_data.hdf5')

In [15]:
print(example_data)

<HDF5 file "example_data.hdf5" (mode r+)>


In [16]:
allele_group = example_data.create_group('allele')

In [17]:
allele_group['states'] = allele_data

In [18]:
print(example_data['allele/states'])

<HDF5 dataset "states": shape (44445, 5), type "<f8">


In [19]:
print(example_data['allele'])

<HDF5 group "/allele" (1 members)>


In [20]:
print(allele_group)

<HDF5 group "/allele" (1 members)>


In [21]:
print(type(allele_group))

<class 'h5py._hl.group.Group'>


In [22]:
print(example_data)

<HDF5 file "example_data.hdf5" (mode r+)>


In [23]:
print(example_data['allele'])

<HDF5 group "/allele" (1 members)>


In [24]:
type(example_data['allele/states'])

h5py._hl.dataset.Dataset

In [25]:
example_data['allele/states'].attrs['mdata'] = list(map(np.string_, ['locus', 'alpha', 'omega', 'minor', 'major']))

In [28]:
example_data['allele/states'].attrs['mdata']

array([b'locus', b'alpha', b'omega', b'minor', b'major'], 
      dtype='|S8')

In [30]:
np.array(example_data['allele/states'])

array([[     0.,      1.,      2.,      1.,      2.],
       [     1.,      2.,      3.,      2.,      3.],
       [     2.,      2.,      3.,      3.,      2.],
       ..., 
       [ 44442.,      1.,      2.,      2.,      1.],
       [ 44443.,      1.,      3.,      3.,      1.],
       [ 44444.,      1.,      3.,      1.,      3.]])

In [33]:
list(example_data['allele/states'].attrs)

['mdata']

In [34]:
print(allele_group['states'].attrs['mdata'])

[b'locus' b'alpha' b'omega' b'minor' b'major']


In [26]:
allele_group['states']

<HDF5 dataset "states": shape (44445, 5), type "<f8">

In [None]:
allele_group = example_data['allele']

In [None]:
print(np.array(allele_group['states']))

In [27]:
allele_group.attrs['columns'] = list(map(np.string_, ['locus', 'alpha', 'omega', 'minor', 'major']))

In [29]:
allele_group.attrs['columns']

array([b'locus', b'alpha', b'omega', b'minor', b'major'], 
      dtype='|S8')

In [37]:
allele_group.attrs['info'] = list(map(np.string_, ['Declaration of alpha, omega, minor and major alleles']))

In [39]:
print(allele_group.attrs['info'])

[b'Declaration of alpha, omega, minor and major alleles']


In [42]:
print(allele_group.attrs['info'][0].decode('UTF-8'))

Declaration of alpha, omega, minor and major alleles


In [None]:
list(example_data.keys())

In [None]:
allele_group.create_group('generation')

In [None]:
allele_group['generation/founder'] = allele_frequencies

In [None]:
list(allele_group.keys())

In [None]:
allele_group['generation']

In [None]:
print(np.array(allele_group['generation/founder']))

In [43]:
genotype_group = example_data.create_group('genotype')

In [44]:
genotype_group['genotypes_by_locus'] = genotypes_by_locus

In [45]:
genotype_group['generation/founder'] = genotype_frequencies

In [None]:
print(np.array(genotype_group['generation/founder']))

In [None]:
column_names = [name.decode('UTF-8') for name in allele_group.attrs['columns']]

In [None]:
print(column_names)

## Multi-Generational Data
    Under the current setup we would have to manually store the data after
    every generation.

### Generation 1

In [None]:
example_pop.popSize()

In [46]:
example_pop.evolve(
    matingScheme=sim.RandomMating(
        ops=[
            sim.IdTagger(),
            sim.PedigreeTagger(),
            sim.Recombinator(rates=recom_map)
        ], subPopSize=1000
    ),
    gen=1
)

1

In [49]:
example_pop.popSize()

1000

In [47]:
allele_group['generation/1'] = analyze.gather_allele_frequencies(example_pop, allele_data)

In [48]:
genotype_group['generation/1'] = analyze.gather_genotype_frequencies(example_pop)

### Generation 2

In [51]:
example_pop.evolve(
    matingScheme=sim.RandomMating(
        ops=[
            sim.IdTagger(),
            sim.PedigreeTagger(),
            sim.Recombinator(rates=recom_map)
        ], subPopSize=1000
    ),
    gen=1
)

1

In [52]:
example_data['allele/generation/2'] = analyze.gather_allele_frequencies(example_pop, allele_data)

In [53]:
example_data['genotype/generation/2'] = analyze.gather_genotype_frequencies(example_pop)

### Generation 3

In [54]:
example_pop.evolve(
    matingScheme=sim.RandomMating(
        ops=[
            sim.IdTagger(),
            sim.PedigreeTagger(),
            sim.Recombinator(rates=recom_map)
        ], subPopSize=1000
    ),
    gen=1
)

1

In [55]:
example_data['allele/generation/3'] = analyze.gather_allele_frequencies(example_pop, allele_data)

In [56]:
example_data['genotype/generation/3'] = analyze.gather_genotype_frequencies(example_pop)

### Generation 4

In [57]:
example_pop.evolve(
    matingScheme=sim.RandomMating(
        ops=[
            sim.IdTagger(),
            sim.PedigreeTagger(),
            sim.Recombinator(rates=recom_map)
        ], subPopSize=1000
    ),
    gen=1
)

1

In [58]:
example_data['allele/generation/4'] = analyze.gather_allele_frequencies(example_pop, allele_data)

In [59]:
example_data['genotype/generation/4'] = analyze.gather_genotype_frequencies(example_pop)

### Generation 5

In [60]:
example_pop.evolve(
    matingScheme=sim.RandomMating(
        ops=[
            sim.IdTagger(),
            sim.PedigreeTagger(),
            sim.Recombinator(rates=recom_map)
        ], subPopSize=1000
    ),
    gen=1
)

1

In [None]:
example_data['allele/generation/5'] = analyze.gather_allele_frequencies(example_pop, allele_data)

In [None]:
example_data['genotype/generation/5'] = analyze.gather_genotype_frequencies(example_pop)

In [None]:
example_data.close()