In [156]:
import stdpopsim
import msprime
import tskit
import os
import numpy as np

from contextlib import redirect_stdout

In [157]:
stdpopsim.__version__

'0.1.2'

In [158]:
species = stdpopsim.get_species("HomSap")
chromosome = "chr22"
WORK_DIR = '/home/kele/Documents/lai/test/test_2pop'
npop = 3 # 2xref 1xadmixed 
mut_rate = 1.44e-8
remember_gen = 20
length_multiplier=1

In [159]:
!mkdir {WORK_DIR}

SLiM_script_path = os.path.join(WORK_DIR, 'run.slim')
SLiM_ts_path = os.path.join(WORK_DIR, 'from_slim.trees')

mkdir: cannot create directory ‘/home/kele/Documents/lai/test/test_2pop’: File exists


In [172]:
# demographic parameter for this model
Ne = 5000 # constant for all pops 
T_split = 300 # the two source populations split this many generations ago 
T_admix = 12 # the pulse admixture event occured this many generations ago 
F_admix = 0.3 # The fraction of the admixed population fom sourceA

assert (T_admix + 5) < T_split  
assert 0<F_admix<1


populations = [
    stdpopsim.Population(id="sourceA", 
                         description="sourceA", 
                         sampling_time = 0
    ),
    stdpopsim.Population(id="sourceB", 
                         description="sourceB", 
                         sampling_time = 0
    ),
    stdpopsim.Population(id="admixed", 
                         description="admixed", 
                         sampling_time = 0
    )
]


sourceA = msprime.PopulationConfiguration(
    initial_size=Ne, 
    growth_rate=0, 
    metadata=populations[0].asdict()
)

sourceB = msprime.PopulationConfiguration(
    initial_size=Ne, 
    growth_rate=0, 
    metadata=populations[1].asdict()
)

# Asian and CHB
admixed = msprime.PopulationConfiguration(
    initial_size=Ne, 
    growth_rate=0, 
    metadata=populations[2].asdict()
)

population_configurations = [sourceA, sourceB, admixed]

migration_matrix = np.zeros((3,3))

admixture_event = [
    msprime.MassMigration(time=T_admix, source=2, destination=0, proportion=F_admix),
    msprime.MassMigration(time=T_admix+1, source=2, destination=1, proportion=1.0)
]

split_event = [
    msprime.MassMigration(time=T_split, source=1, destination=0, proportion=1.0)
]


demographic_events = admixture_event + split_event

newmodel = stdpopsim.DemographicModel(
        id="simple2pop",
        description="A simple two-population demographic model with pulse admixture.",
        long_description="None",
        populations=populations,
        citations='None',
        generation_time=1,
        population_configurations=population_configurations,
        migration_matrix=migration_matrix,
        demographic_events=demographic_events,
    )

In [173]:
newmap = msprime.RecombinationMap(
    positions=contig.recombination_map.get_positions(), 
    rates= [x*3 for x in contig.recombination_map.get_rates()]
)

newcontig = stdpopsim.Contig(recombination_map= newmap)

In [174]:
engine = stdpopsim.get_engine("slim")

with open(SLiM_script_path, "w") as f:
    with redirect_stdout(f):
        _ = engine.simulate(
            newmodel,
            newcontig,
            samples=newmodel.get_samples(*[x.initial_size for x in newmodel.population_configurations]),
            slim_script=True,
            verbosity=2
        )

In [175]:
# change the ts output location
#!sed -i 's+"/tmp/tmp.*.ts+"admixture.browningmodel.ts+g' {SLiM_script_path}
sed_cmd = f's+"/tmp/tmp.*.ts+"{SLiM_ts_path}+g' 
!sed -i {sed_cmd} {SLiM_script_path}

# add gene conversion
!sed -i 's+(recombination_rates, recombination_ends);+(recombination_rates, recombination_ends);\n    initializeGeneConversion(0.6666666, 300, 1.0);+g' {SLiM_script_path}

In [176]:
# check the edits 
!grep {SLiM_ts_path} {SLiM_script_path}
!grep 'initializeGeneConversion' {SLiM_script_path}

    defineConstant("trees_file", "/home/kele/Documents/lai/test/test_2pop/from_slim.trees");
    initializeGeneConversion(0.6666666, 300, 1.0);


In [177]:
remember_cmd = r's+// Admixture pulses\.+sim.registerLateEvent("s000000", "{dbg(self.source); sim.treeSeqRememberIndividuals(sim.subpopulations.individuals);}", G_end-20, G_end-20);\n    // Admixture pulses\.+g '
remember_cmd

's+// Admixture pulses\\.+sim.registerLateEvent("s000000", "{dbg(self.source); sim.treeSeqRememberIndividuals(sim.subpopulations.individuals);}", G_end-20, G_end-20);\\n    // Admixture pulses\\.+g '

In [178]:
!sed -i '{remember_cmd}' {SLiM_script_path}

In [179]:
!grep 's000000' {SLiM_script_path}

    sim.registerLateEvent("s000000", "{dbg(self.source); sim.treeSeqRememberIndividuals(sim.subpopulations.individuals);}", G_end-20, G_end-20);


In [180]:
# run SLiM
!slim {SLiM_script_path} > {SLiM_script_path}.log

In [183]:
ts = tskit.load(SLiM_ts_path)
ts = msprime.mutate(ts, rate=mut_rate)
ts

Tree Sequence,Unnamed: 1
Trees,1667336
Sequence Length,51304566.0
Time Units,unknown
Sample Nodes,50000
Total Size,265.8 MiB
Metadata,dict  SLiM:  dict  file_version: 0.6 generation: 50301 model_type: WF nucleotide_based: False separate_sexes: False spatial_dimensionality: spatial_periodicity: stage: late

Table,Rows,Size,Has Metadata
Edges,5988243,182.7 MiB,
Individuals,25000,1.8 MiB,✅
Migrations,0,8 Bytes,
Mutations,363214,12.8 MiB,
Nodes,388364,14.1 MiB,✅
Populations,3,3.0 KiB,✅
Provenances,2,14.2 KiB,
Sites,363214,8.7 MiB,


In [184]:
ts.Fst(sample_sets=[list(ts.samples(0)),list(ts.samples(1))])

array(0.01432271)

In [122]:
engine = stdpopsim.get_engine(id='msprime')
ts = engine.simulate(
    newmodel,
    contig,
    samples = newmodel.get_samples(*[x.initial_size for x in newmodel.population_configurations]),
    verbosity=2
)
ts

Tree Sequence,Unnamed: 1
Trees,41564
Sequence Length,51304566.0
Time Units,generations
Sample Nodes,3000
Total Size,9.5 MiB
Metadata,No Metadata

Table,Rows,Size,Has Metadata
Edges,166044,5.1 MiB,
Individuals,0,24 Bytes,
Migrations,0,8 Bytes,
Mutations,39003,1.4 MiB,
Nodes,32762,895.8 KiB,
Populations,3,143 Bytes,✅
Provenances,1,25.6 KiB,
Sites,39003,952.2 KiB,


In [91]:
ts.Fst(sample_sets=[list(ts.samples(0)),list(ts.samples(1))])

array(0.05393109)

In [92]:
newmap = msprime.RecombinationMap(
    positions=contig.recombination_map.get_positions(), 
    rates= [x*3 for x in contig.recombination_map.get_rates()]
)

newcontig = stdpopsim.Contig(recombination_map = newmap)

In [97]:
engine = stdpopsim.get_engine("slim")
ts = engine.simulate(
            newmodel,
            newcontig,
            samples = samples,
            verbosity=2
        )

ts = msprime.mutate(ts, rate=mut_rate)



In [99]:
population_configurations

Tree Sequence,Unnamed: 1
Trees,31144
Sequence Length,51304566.0
Time Units,unknown
Sample Nodes,20
Total Size,5.4 MiB
Metadata,dict  SLiM:  dict  file_version: 0.7 generation: 10301 model_type: WF nucleotide_based: False separate_sexes: False spatial_dimensionality: spatial_periodicity: stage: late

Table,Rows,Size,Has Metadata
Edges,105449,3.2 MiB,
Individuals,10,2.7 KiB,✅
Migrations,0,8 Bytes,
Mutations,11904,431.3 KiB,
Nodes,19086,705.6 KiB,✅
Populations,3,3.0 KiB,✅
Provenances,6,17.3 KiB,
Sites,11904,290.6 KiB,


In [98]:
ts.Fst(sample_sets=[list(ts.samples(0)),list(ts.samples(1))])

array(0.0682471)

# This uses the Isolation with Migration generic model

In [28]:
#NA (float) – The initial ancestral effective population size
#N1 (float) – The effective population size of population 1
#N2 (float) – The effective population size of population 2
#T (float) – Time of split between populations 1 and 2 (in generations)
#M12 (float) – Migration rate from population 1 to 2
#M21 (float) – Migration rate from population 2 to 1
NA = ancestral_Ne
N1 = NA
N2 = NA
T = NA*.33
M12 = 0
M21 = 0
model = stdpopsim.IsolationWithMigration(NA, N1, N2, T, M12, M21)
samples = model.get_samples(10,10)
contig = species.get_contig(chromosome, length_multiplier=length_multiplier)

## simulate with msprime

In [29]:
engine = stdpopsim.get_engine(id='msprime')
ts = engine.simulate(
    model,
    contig,
    samples = samples,
    verbosity=2
)
ts

Tree Sequence,Unnamed: 1
Trees,9989
Sequence Length,51304566.0
Time Units,generations
Sample Nodes,20
Total Size,2.1 MiB
Metadata,No Metadata

Table,Rows,Size,Has Metadata
Edges,33755,1.0 MiB,
Individuals,0,24 Bytes,
Migrations,0,8 Bytes,
Mutations,10690,386.3 KiB,
Nodes,6325,173.0 KiB,
Populations,3,139 Bytes,✅
Provenances,1,2.2 KiB,
Sites,10690,261.0 KiB,


In [30]:
ts.Fst(sample_sets=[list(ts.samples(0)),list(ts.samples(1))])

array(0.0878934)

# Write with SLiM

In [87]:
newmap = msprime.RecombinationMap(
    positions=contig.recombination_map.get_positions(), 
    rates= [x*3 for x in contig.recombination_map.get_rates()]
)

newcontig = stdpopsim.Contig(recombination_map = newmap)

In [103]:
engine = stdpopsim.get_engine("slim")

with open(SLiM_script_path, "w") as f:
    with redirect_stdout(f):
        _ = engine.simulate(
            #model,
            newmodel,
            newcontig,
            samples = samples,
            slim_script=True,
            verbosity=2
        )

In [33]:
# change the ts output location
#!sed -i 's+"/tmp/tmp.*.ts+"admixture.browningmodel.ts+g' {SLiM_script_path}
sed_cmd = f's+"/tmp/tmp.*.ts+"{SLiM_ts_path}+g' 
!sed -i {sed_cmd} {SLiM_script_path}

# add gene conversion
!sed -i 's+(recombination_rates, recombination_ends);+(recombination_rates, recombination_ends);\n    initializeGeneConversion(0.6666666, 300, 1.0);+g' {SLiM_script_path}

In [34]:
# check the edits 
!grep {SLiM_ts_path} {SLiM_script_path}
!grep 'initializeGeneConversion' {SLiM_script_path}

    defineConstant("trees_file", "/home/kele/Documents/lai/test/test_2pop/from_slim.trees");
    initializeGeneConversion(0.6666666, 300, 1.0);


In [35]:
remember_cmd = r's+// Admixture pulses\.+sim.registerLateEvent("s000000", "{dbg(self.source); sim.treeSeqRememberIndividuals(sim.subpopulations.individuals);}", G_end-20, G_end-20);\n    // Admixture pulses\.+g '
remember_cmd

's+// Admixture pulses\\.+sim.registerLateEvent("s000000", "{dbg(self.source); sim.treeSeqRememberIndividuals(sim.subpopulations.individuals);}", G_end-20, G_end-20);\\n    // Admixture pulses\\.+g '

In [36]:
!sed -i '{remember_cmd}' {SLiM_script_path}

In [37]:
!grep 's000000' {SLiM_script_path}

    sim.registerLateEvent("s000000", "{dbg(self.source); sim.treeSeqRememberIndividuals(sim.subpopulations.individuals);}", G_end-20, G_end-20);


In [38]:
# check slim version
!slim --version

SLiM version 3.6, built Mar 26 2021 14:42:53


In [39]:
# run SLiM
!slim {SLiM_script_path} > {SLiM_script_path}.log

In [40]:
ts = tskit.load(SLiM_ts_path)
ts

Tree Sequence,Unnamed: 1
Trees,400339
Sequence Length,51304566.0
Time Units,unknown
Sample Nodes,12000
Total Size,57.8 MiB
Metadata,dict  SLiM:  dict  file_version: 0.6 generation: 10331 model_type: WF nucleotide_based: False separate_sexes: False spatial_dimensionality: spatial_periodicity: stage: late

Table,Rows,Size,Has Metadata
Edges,1417921,43.3 MiB,
Individuals,6000,446.8 KiB,✅
Migrations,0,8 Bytes,
Mutations,0,1.2 KiB,
Nodes,91094,3.3 MiB,✅
Populations,3,3.0 KiB,✅
Provenances,1,13.3 KiB,
Sites,0,16 Bytes,


In [41]:
recapmap = msprime.RecombinationMap(
    positions=[0.0, ts.get_sequence_length()], 
    rates= contig.recombination_map.get_rates(), 
    num_loci = int(ts.get_sequence_length())
)