## Simulate introgression using msprime and seq gen

In [152]:
import dendropy
from dendropy.interop import seqgen
import subprocess
import toytree
import ete3

In [None]:
# seq-gen -mHKY -l 40 -s .2 <treefile >seqfile

In [120]:
p = subprocess.Popen(["ms","5", "10", "-T"], stdout=subprocess.PIPE,stderr=subprocess.PIPE)
ms_results = p.communicate()[0]
gentrees = [s.strip() for s in ms_results.splitlines()][4:]
gentrees = [q for q in gentrees if len(q)] # remove first four lines
gentrees = [q for q in gentrees if q != '//'] # remove the // lines

In [121]:
trees = dendropy.TreeList.get(data = "".join(gentrees), schema='newick')
s = seqgen.SeqGen()
s.scale_branch_lens = 0.2
s.seq_len = 40
s.char_model = s.GTR
sequences = s.generate(trees)
outseqs = "".join([sequences.char_matrices[i].as_string("phylip") for i in range(len(sequences.char_matrices))])
print(outseqs)

5 40
4  ATCAAGGCACTCAAAATTACTTGTGTTAAGCCCACGCAAC
1  CTCTATAACATCAGTTCTATTTGTCTCAAACAGAAACAAC
2  CTCTATAACATCAGTTCTATTTGTCTCAAAAAGAAACAAC
3  CTCTATAACATCAGTTCTATTTGTCTCAAACAGAAACAAC
5  CTCTATAACATCAGTTCTATTTGTCTCAAACAGAAACAAC
5 40
4  GAGGCCAGAAATATAGTAGAGGGCCCTTTTACAGTCGGCC
1  GAAGCCAGAAATATAGTAGAGGGCCCTTTTACAGTAGGCC
2  GAAGCCAGGAACATAGTAGGGGGCCCTTTTACTGTCGGCC
3  GAAGCCAGGAACATAGTAGGGGGCCCTTTTACTGTCGGCC
5  GAAGCCAGAAATATAGTACAGGGCCCTTTTACAGTCGGCC
5 40
4  GGGCTAAGGGGGTCAGAGGGTGTATTAGCAGTATCTACAA
1  GGGCTAAGCGCGTCAGGGGGTGTATTAGCACTATCTACAA
2  GCGGTAAGTACATCAGCGGGTGTATTAGCACTATCTACAG
3  CCGGTAAGTACATCAGCGGGTGTATTAGCACTATCTACAG
5  GGGCTAAGCGCGTCAGGGGGTGTATTAGCACTATCTACAA
5 40
4  GGATCTGCCGGTTTTAGGGGAAGTGCGTCCATTGGAACAT
1  GGATCTGCCGGTCTTAGGGGAAGTGCGTCCATTGGAACAT
2  GGATCTGCCGGTTTTAGGGGAAGTGCGTCCATTGGAACAT
3  CGATCTGCCGGTTTTACGGGAAGTGCGTCCATTGGAACAT
5  GGATCTGCCGGTGTTAGGGGAAGTGCGTCCATTGGAACAT
5 40
4  GGATTATGTTGGCCGGGCAGCGTTCCCTGGCCGAGCTTGA
1  GGATTATGTTAGCCGGGCAGCGGGCGCTGGCCGGGCTTAA
2  GGAT

In [128]:
test = toytree.tree(trees[0].as_string(schema = "newick", suppress_internal_node_labels = False))

In [129]:
test1 = toytree.tree(trees[1].as_string(schema = "newick", suppress_internal_node_labels = False))

In [145]:
[trees[i].print_plot() for i in range(len(trees))]

/---------------------------------------------------------------------------- 4
+                                                                              
|                  /--------------------------------------------------------- 1
\------------------+                                                           
                   |                  /-------------------------------------- 2
                   \------------------+                                        
                                      |                  /------------------- 3
                                      \------------------+                     
                                                         \------------------- 5
                                                                               
                                                                               
                                                   /------------------------- 2
/---------------------------------------

[None, None, None, None, None, None, None, None, None, None]

In [132]:
testtxt = trees.as_string(schema = "newick", suppress_internal_node_labels = False)

In [157]:
toytree.multitree(=trees)

AttributeError: 'TreeList' object has no attribute 'startswith'

In [125]:
test.draw_cloudtree(
    width=200,
    orient='right',
    use_edge_lengths=True,
    edge_style={"opacity": 0.3}
    );

AttributeError: 'Toytree' object has no attribute 'draw_cloudtree'

In [20]:
import msprime
import math

In [21]:
def out_of_africa():
    # First we set out the maximum likelihood values of the various parameters
    # given in Table 1.
    N_A = 7300
    N_B = 2100
    N_AF = 12300
    N_EU0 = 1000
    N_AS0 = 510
    # Times are provided in years, so we convert into generations.
    generation_time = 25
    T_AF = 220e3 / generation_time
    T_B = 140e3 / generation_time
    T_EU_AS = 21.2e3 / generation_time
    # We need to work out the starting (diploid) population sizes based on
    # the growth rates provided for these two populations
    r_EU = 0.004
    r_AS = 0.0055
    N_EU = N_EU0 / math.exp(-r_EU * T_EU_AS)
    N_AS = N_AS0 / math.exp(-r_AS * T_EU_AS)
    # Migration rates during the various epochs.
    m_AF_B = 25e-5
    m_AF_EU = 3e-5
    m_AF_AS = 1.9e-5
    m_EU_AS = 9.6e-5
    # Population IDs correspond to their indexes in the population
    # configuration array. Therefore, we have 0=YRI, 1=CEU and 2=CHB
    # initially.
    population_configurations = [
        msprime.PopulationConfiguration(
            sample_size=0, initial_size=N_AF),
        msprime.PopulationConfiguration(
            sample_size=1, initial_size=N_EU, growth_rate=r_EU),
        msprime.PopulationConfiguration(
            sample_size=1, initial_size=N_AS, growth_rate=r_AS)
    ]
    migration_matrix = [
        [      0, m_AF_EU, m_AF_AS],
        [m_AF_EU,       0, m_EU_AS],
        [m_AF_AS, m_EU_AS,       0],
    ]
    demographic_events = [
        # CEU and CHB merge into B with rate changes at T_EU_AS
        msprime.MassMigration(
            time=T_EU_AS, source=2, destination=1, proportion=1.0),
        msprime.MigrationRateChange(time=T_EU_AS, rate=0),
        msprime.MigrationRateChange(
            time=T_EU_AS, rate=m_AF_B, matrix_index=(0, 1)),
        msprime.MigrationRateChange(
            time=T_EU_AS, rate=m_AF_B, matrix_index=(1, 0)),
        msprime.PopulationParametersChange(
            time=T_EU_AS, initial_size=N_B, growth_rate=0, population_id=1),
        # Population B merges into YRI at T_B
        msprime.MassMigration(
            time=T_B, source=1, destination=0, proportion=1.0),
        # Size changes to N_A at T_AF
        msprime.PopulationParametersChange(
            time=T_AF, initial_size=N_A, population_id=0)
    ]
    # Use the demography debugger to print out the demographic history
    # that we have just described.
    dp = msprime.DemographyDebugger(
        Ne=N_A,
        population_configurations=population_configurations,
        migration_matrix=migration_matrix,
        demographic_events=demographic_events)
    dp.print_history()

In [66]:
import msprime
import numpy as np

# M is the overall symmetric migration rate, and d is the number
# of demes.
M = 0.2
d = 3
# We rescale m into per-generation values for msprime.
m = M / (4 * (d - 1))
# Allocate the initial sample. Because we are interested in the
# between deme coalescence times, we choose one sample each
# from the first two demes.
population_configurations = [
    msprime.PopulationConfiguration(sample_size=1),
    msprime.PopulationConfiguration(sample_size=1),
    msprime.PopulationConfiguration(sample_size=0)]
# Now we set up the migration matrix. Since this is a symmetric
# island model, we have the same rate of migration between all
# pairs of demes. Diagonal elements must be zero.
migration_matrix = [
    [0, m, m],
    [m, 0, m],
    [m, m, 0]]
# We pass these values to the simulate function, and ask it
# to run the required number of replicates.
num_replicates = 1e6
replicates = msprime.simulate(
    population_configurations=population_configurations,
    migration_matrix=migration_matrix,
    num_replicates=num_replicates)


In [56]:
# And then iterate over these replicates
T = np.zeros(int(num_replicates))
for i, tree_sequence in enumerate(replicates):
    tree = next(tree_sequence.trees())
    # Convert the TMRCA to coalecent units.
    T[i] = tree.get_time(tree.get_root()) / 4
# Finally, calculate the analytical expectation and print
# out the results
analytical = d / 2 + (d - 1) / (2 * M)
print("Observed  =", np.mean(T))
print("Predicted =", analytical)

('Observed  =', 6.4977975820738996)
('Predicted =', 6.0)


In [86]:
tmptree = replicates.next()
tmptree= next(tmptree.trees())
tmptree.get_time(tmptree.get_root())

45.17541007990667

In [226]:
tree=msprime.simulate(sample_size=8,
                Ne = 1000,
                length = 10000,
                recombination_rate = .0000001,
                num_replicates = 100)


In [235]:
treeseq=next(tree)
replicant_tree =treeseq.trees()
replicant_tree = next(replicant_tree)
replicant_tree.draw("testdraw.svg")

u'<svg baseProfile="full" height="200" version="1.1" width="200" xmlns="http://www.w3.org/2000/svg" xmlns:ev="http://www.w3.org/2001/xml-events" xmlns:xlink="http://www.w3.org/1999/xlink"><defs /><g id="lines" stroke="black"><line x1="62.5" x2="62.5" y1="125.283537821" y2="20.0" /><line x1="62.5" x2="98.75" y1="20.0" y2="20.0" /><line x1="35.0" x2="35.0" y1="157.310965643" y2="125.283537821" /><line x1="35.0" x2="62.5" y1="125.283537821" y2="125.283537821" /><line x1="20.0" x2="20.0" y1="180.0" y2="157.310965643" /><line x1="20.0" x2="35.0" y1="157.310965643" y2="157.310965643" /><line x1="50.0" x2="50.0" y1="165.854041976" y2="157.310965643" /><line x1="50.0" x2="35.0" y1="157.310965643" y2="157.310965643" /><line x1="40.0" x2="40.0" y1="180.0" y2="165.854041976" /><line x1="40.0" x2="50.0" y1="165.854041976" y2="165.854041976" /><line x1="60.0" x2="60.0" y1="180.0" y2="165.854041976" /><line x1="60.0" x2="50.0" y1="165.854041976" y2="165.854041976" /><line x1="90.0" x2="90.0" y1="138

In [236]:
list(treeseq.breakpoints())

[0,
 2495.661820866089,
 3321.0120101741077,
 6277.639431943567,
 6390.290515588198,
 6788.425172397035,
 7250.833571248416,
 10000.0]

In [199]:
treeseq=next(tree)

TypeError: TreeSequence object is not an iterator

In [196]:
replicant_tree =treeseq.trees()

In [197]:
replicant_tree = next(replicant_tree)

In [198]:
replicant_tree.draw("testdraw.svg")

u'<svg baseProfile="full" height="200" version="1.1" width="200" xmlns="http://www.w3.org/2000/svg" xmlns:ev="http://www.w3.org/2001/xml-events" xmlns:xlink="http://www.w3.org/1999/xlink"><defs /><g id="lines" stroke="black"><line x1="35.0" x2="35.0" y1="154.711846988" y2="20.0" /><line x1="35.0" x2="66.875" y1="20.0" y2="20.0" /><line x1="20.0" x2="20.0" y1="180.0" y2="154.711846988" /><line x1="20.0" x2="35.0" y1="154.711846988" y2="154.711846988" /><line x1="50.0" x2="50.0" y1="176.044043536" y2="154.711846988" /><line x1="50.0" x2="35.0" y1="154.711846988" y2="154.711846988" /><line x1="40.0" x2="40.0" y1="180.0" y2="176.044043536" /><line x1="40.0" x2="50.0" y1="176.044043536" y2="176.044043536" /><line x1="60.0" x2="60.0" y1="180.0" y2="176.044043536" /><line x1="60.0" x2="50.0" y1="176.044043536" y2="176.044043536" /><line x1="98.75" x2="98.75" y1="117.184638762" y2="20.0" /><line x1="98.75" x2="66.875" y1="20.0" y2="20.0" /><line x1="80.0" x2="80.0" y1="180.0" y2="117.184638762

In [118]:
import matplotlib

In [120]:
drawn

u'<svg baseProfile="full" height="200" version="1.1" width="200" xmlns="http://www.w3.org/2000/svg" xmlns:ev="http://www.w3.org/2001/xml-events" xmlns:xlink="http://www.w3.org/1999/xlink"><defs /><g id="lines" stroke="black"><line x1="40.0" x2="40.0" y1="180.0" y2="20.0" /><line x1="40.0" x2="70.0" y1="20.0" y2="20.0" /><line x1="100.0" x2="100.0" y1="167.751163507" y2="20.0" /><line x1="100.0" x2="70.0" y1="20.0" y2="20.0" /><line x1="80.0" x2="80.0" y1="180.0" y2="167.751163507" /><line x1="80.0" x2="100.0" y1="167.751163507" y2="167.751163507" /><line x1="120.0" x2="120.0" y1="180.0" y2="167.751163507" /><line x1="120.0" x2="100.0" y1="167.751163507" y2="167.751163507" /></g><g font-size="14" text-anchor="middle"><text dx="0" dy="-5" x="70.0" y="20.0">4</text><text dx="0" dy="20" x="40.0" y="180.0">0</text><text dx="-10" dy="-5" x="100.0" y="167.751163507">3</text><text dx="0" dy="20" x="80.0" y="180.0">1</text><text dx="0" dy="20" x="120.0" y="180.0">2</text></g><circle cx="70.0" c

In [26]:
migration_example()

TypeError: 'float' object cannot be interpreted as an index

In [22]:
out_of_africa()


Epoch: 0 -- 848.0 generations
     start     end      growth_rate |     0        1        2    
   -------- --------       -------- | -------- -------- -------- 
0 |1.23e+04 1.23e+04              0 |     0      3e-05   1.9e-05 
1 |2.97e+04   1e+03           0.004 |   3e-05      0     9.6e-05 
2 |5.41e+04    510           0.0055 |  1.9e-05  9.6e-05     0    

Events @ generation 848.0
   - Mass migration: lineages move from 2 to 1 with probability 1.0
   - Migration rate change to 0 everywhere
   - Migration rate change for (0, 1) to 0.00025
   - Migration rate change for (1, 0) to 0.00025
   - Population parameter change for 1: initial_size -> 2100 growth_rate -> 0 

Epoch: 848.0 -- 5600.0 generations
     start     end      growth_rate |     0        1        2    
   -------- --------       -------- | -------- -------- -------- 
0 |1.23e+04 1.23e+04              0 |     0     0.00025     0    
1 | 2.1e+03  2.1e+03              0 |  0.00025     0        0    
2 |5.41e+04 2.41e-07    