In [1]:
import ipcoal
import toytree
import h5py

In [2]:
tree = toytree.rtree.unittree(10, 100000)

In [3]:
mod = ipcoal.Model(tree, Ne=50000, nsamples=2)

In [11]:
mod.sim_loci(10, 100)

In [12]:
mod.apply_missing_mask(0.5)

In [15]:
mod.write_snps_to_hdf5(outdir="/tmp", diploid=False)

wrote 11 SNPs to /tmp/test.snps.hdf5


In [16]:
with h5py.File("/tmp/test.snps.hdf5", 'r') as io5:
    print(io5['snps'][:])

[[67 78 78 78 78 78 78 78 78 78 78]
 [78 67 78 78 78 71 65 65 65 78 78]
 [67 67 78 78 78 65 78 65 65 65 65]
 [84 78 78 78 78 65 78 65 78 78 78]
 [67 67 71 65 84 65 78 65 78 65 65]
 [78 78 71 71 84 78 65 65 78 65 65]
 [67 67 67 65 84 65 78 65 78 65 65]
 [78 67 71 65 65 65 65 78 84 65 65]
 [67 67 71 65 84 78 78 78 65 78 78]
 [78 67 78 78 78 65 78 65 65 78 78]
 [67 78 71 65 84 65 78 78 65 78 78]
 [67 78 71 65 84 65 65 65 65 65 65]
 [67 78 71 65 84 78 84 65 65 78 78]
 [78 78 78 78 78 78 65 78 78 84 65]
 [78 78 71 65 84 78 65 65 65 65 65]
 [67 67 71 65 84 78 78 65 65 65 65]
 [78 67 78 78 78 78 78 78 65 65 84]
 [67 78 71 65 84 78 78 78 78 65 65]
 [78 71 78 78 78 78 65 71 78 78 78]
 [67 67 78 78 78 78 78 78 65 78 78]]


In [10]:
import numpy as np

In [48]:
self = ipcoal.io.writer.Writer(mod)

if self.seqs.ndim == 2:
    self.seqs = self.seqs.T.reshape(
        self.seqs.shape[1], self.seqs.shape[0], 1)
    self.ancestral_seq = self.ancestral_seq.reshape(
        self.ancestral_seq.size, 1)

# case we want this not to transform the seqs data in place.
txf = ipcoal.io.transformer.Transformer(self.seqs, self.names, self.alleles, True)
txf.transform_seqs()

In [50]:
txf.dindex_map

{0: (0, 1),
 1: (2, 3),
 2: (4, 5),
 3: (6, 7),
 4: (8, 9),
 5: (10, 11),
 6: (12, 13),
 7: (14, 15),
 8: (16, 17),
 9: (18, 19)}

In [38]:
arr = np.concatenate(self.seqs, axis=1)

In [43]:
arr = np.concatenate(self.seqs, axis=1).view(np.uint8)
marr = np.ma.array(data=arr, mask=(arr == 9))
common = marr.mean(axis=0).round().astype(int)
varsites = np.where(np.any(marr != common, axis=0).data)[0]
nsites = varsites.size

In [None]:
ipcoal.io.transformer.Transformer()

In [73]:
genos = ipcoal.io.genos.Genos(arr, np.concatenate(self.ancestral_seq), varsites, txf.dindex_map)

In [70]:
np.concatenate(txf.seqs, 1)[:, varsites]

array([[b'N', b'C', b'K', b'C', b'C', b'T'],
       [b'R', b'N', b'T', b'C', b'G', b'N'],
       [b'A', b'C', b'T', b'N', b'G', b'N'],
       [b'A', b'N', b'T', b'N', b'S', b'G'],
       [b'N', b'C', b'T', b'T', b'N', b'G'],
       [b'N', b'C', b'N', b'N', b'N', b'G'],
       [b'G', b'C', b'T', b'N', b'G', b'G'],
       [b'G', b'M', b'T', b'C', b'N', b'G'],
       [b'G', b'N', b'N', b'C', b'N', b'G'],
       [b'A', b'C', b'T', b'C', b'N', b'G']], dtype='|S1')

In [76]:
txf.seqs

array([[[b'N'],
        [b'R'],
        [b'A'],
        [b'A'],
        [b'N'],
        [b'N'],
        [b'G'],
        [b'G'],
        [b'G'],
        [b'A']],

       [[b'N'],
        [b'A'],
        [b'N'],
        [b'A'],
        [b'A'],
        [b'A'],
        [b'N'],
        [b'A'],
        [b'N'],
        [b'A']],

       [[b'T'],
        [b'N'],
        [b'T'],
        [b'T'],
        [b'T'],
        [b'T'],
        [b'N'],
        [b'T'],
        [b'T'],
        [b'N']],

       [[b'N'],
        [b'C'],
        [b'C'],
        [b'C'],
        [b'N'],
        [b'C'],
        [b'C'],
        [b'C'],
        [b'C'],
        [b'C']],

       [[b'N'],
        [b'C'],
        [b'C'],
        [b'C'],
        [b'C'],
        [b'C'],
        [b'N'],
        [b'C'],
        [b'C'],
        [b'C']],

       [[b'C'],
        [b'N'],
        [b'C'],
        [b'N'],
        [b'C'],
        [b'C'],
        [b'C'],
        [b'M'],
        [b'N'],
        [b'C']],

       [[b'K'],
        [b'T

In [74]:
genos.get_alts_and_genos_matrix()

(chararray([b'A', b'A', b'G', b'T', b'G', b'T'], dtype='|S5'),
 array([[[9, 9],
         [0, 1],
         [1, 1],
         [1, 1],
         [9, 9],
         [9, 9],
         [0, 9],
         [9, 0],
         [0, 9],
         [9, 1]],
 
        [[9, 0],
         [9, 9],
         [9, 0],
         [9, 9],
         [9, 0],
         [9, 0],
         [0, 9],
         [0, 1],
         [9, 9],
         [9, 0]],
 
        [[0, 1],
         [0, 9],
         [9, 0],
         [0, 0],
         [9, 0],
         [9, 9],
         [9, 0],
         [0, 0],
         [9, 9],
         [9, 0]],
 
        [[0, 0],
         [9, 0],
         [9, 9],
         [9, 9],
         [1, 9],
         [9, 9],
         [9, 9],
         [0, 0],
         [0, 0],
         [9, 0]],
 
        [[9, 0],
         [9, 1],
         [1, 1],
         [0, 1],
         [9, 9],
         [9, 9],
         [1, 1],
         [9, 9],
         [9, 9],
         [9, 9]],
 
        [[9, 1],
         [9, 9],
         [9, 9],
         [9, 0],
    

In [39]:
arr.view(np.uint8)

array([[78, 78, 78, 78, 78, 78, 84, 67, 78, 78],
       [71, 78, 78, 67, 78, 78, 84, 78, 78, 78],
       [65, 78, 84, 67, 67, 78, 78, 78, 71, 78],
       [65, 78, 84, 78, 78, 78, 84, 78, 67, 78],
       [78, 65, 84, 78, 78, 78, 78, 84, 78, 71],
       [78, 65, 84, 67, 67, 78, 78, 78, 78, 71],
       [71, 78, 78, 67, 78, 67, 78, 78, 71, 78],
       [78, 65, 84, 67, 67, 67, 84, 67, 78, 71],
       [71, 78, 84, 78, 78, 78, 78, 67, 78, 71],
       [78, 65, 78, 67, 67, 78, 78, 78, 78, 71]], dtype=uint8)

In [26]:
marr = np.ma.array(data=arr, mask=(arr == 9))
common = marr.mean(axis=0).round().astype(int)
varsites = np.where(np.any(marr != common, axis=0).data)[0]
nsites = varsites.size

  marr = np.ma.array(data=arr, mask=(arr == 9))


TypeError: cannot perform reduce with flexible type

In [27]:
nsites

NameError: name 'nsites' is not defined

In [16]:
import h5py
with h5py.File("/tmp/test.seqs.hdf5", 'r') as io5:
    print(io5['phy'][:])

[[65 84 84 ... 67 67 67]
 [65 84 84 ... 67 67 67]
 [65 84 84 ... 67 67 67]
 ...
 [65 84 84 ... 67 67 67]
 [65 84 84 ... 67 67 67]
 [65 84 84 ... 67 67 67]]


In [13]:
print(mod.write_concat_to_nexus(idxs=[0, 1]))

#nexus
begin data;
  dimensions ntax=20 nchar=400;
  format datatype=DNA missing=N gap=- interleave=yes;
  matrix

  r0_0	GTATACCAAGGGCGGATTTACGCGCTGGCCGACACAATGAGACCAAAATCCGCCATGTCTCAGGGCGGTCTGGGCTTGGCCCTCATTAAAACTTCTGGGG
  r0_1	GTATACCAAGGGCGGATTTACGCGCTGGCCGACACAATGAGACCAAAATCCGCCATGTCTCAGGGCGGTCTGGGCTTGGCCCTCATTAAAACTTCTGGGG
  r1_0	GTATACCAAGGGCGGATTTACGCGCTGGCCGACACAATGAGACCAAAATCCGCCATGTCTCAGGGCGGTCTGGGCTTGGCCCTCATTAAAACTTCTGGGG
  r1_1	GTATACCAAGGGCGGATTTACGCGCTGGCCGACACAATGAGACCAAAATCCGCCATGTCTCAGGGCGGTCTGGGCTTGGCCCTCATTAAAACTTCTGGGG
  r2_0	GTATACCAAGGGCGGATTTACGCGCTGGCCGACACAATGAGACCAAAATCCGCCATGTCTCAGGGCGGTCTGGGCTTGGCCCTCATTAAAACTTCTGGGG
  r2_1	GTATACCAAGGGCGGATTTACGCGCTGGCCGACACAATGAGACCAAAATCCGCCATGTCTCAGGGCGGTCTGGGCTTGGCCCTCATTAAAACTTCTGGGG
  r3_0	GTATACCAAGGGCGGATTTACGCGCTGGCCGACACAATGAGACCAAAATCCGCCATGTCTCAGGGCGGTCTGGGCTTGGCCCTCATTAAAACTTCTGGGG
  r3_1	GTATACCAAGGGCGGATTTACGCGCTGGCCGACACAATGAGACCAAAATCCGCCATGTCTCAGGGCGGTCTGGGCTTGGCCCTCATTAAAACTTCTGGGG
  r4_0	GTATACCAAGGGCG