In [3]:
#pragma cling load("./build/libkastore.so")
#pragma cling load("./build/libtskit.so")
#pragma cling add_include_path("./tskit/c")
#pragma cling add_include_path("./kastore/c")

#include "./kastore/c/kastore.h"
#include "./tskit/c/tskit.h"

#include <tskit.h>
#include <stdio.h>
#include <stdlib.h>

#define check_tsk_error(val)                                                            \
    if (val < 0) {                                                                      \
        fprintf(stderr, "line %d: %s", __LINE__, tsk_strerror(val));                    \
        exit(EXIT_FAILURE);                                                             \
    }

In [4]:
#include <numeric>
#include <random>

In [76]:
// table collection
int ret;
tsk_table_collection_t tables;
ret = tsk_table_collection_init(&tables, 0);
check_tsk_error(ret);
tsk_table_collection_free(&tables);

In [77]:
// nodes collection
tsk_node_table_t nodes;
tsk_node_table_init(&nodes, 0);
for (int i=0; i<5; i++) {
    tsk_node_table_add_row(&nodes, TSK_NODE_IS_SAMPLE,i, TSK_NULL, TSK_NULL, NULL, 0);
}  
tsk_node_table_free(&nodes);

In [78]:
//tsk_node_t node;
//tsk_node_table_get_row(&nodes, 0, &node);

In [79]:
//tsk_node_table_print_state(&nodes, stdout)

In [80]:
#include <fstream>

In [81]:
struct mutation_info {
    std::vector<int> origin_generation;
    double selection_coefficient;
    double dominance_coefficient;
    std::vector<int> abs_population_freq;
    std::vector<int> num_active_mutations;
};

In [82]:
struct recorder_t {

    // keys
    std::vector<double> positions;
    std::unordered_map<double, mutation_info> data;
    void insert(double position, int origin_generation, int abs_population_freq, double selection_coefficient,double dominance_coefficient, int num_active_mutations) 
        
    {
        if (data.find(position) == data.end()) {
            data[position] = mutation_info{ .origin_generation = std::vector<int>{origin_generation},
                                     .selection_coefficient = selection_coefficient,
                                     .dominance_coefficient = dominance_coefficient,
                                     .abs_population_freq = std::vector<int>{abs_population_freq},
                                     .num_active_mutations = std::vector<int>{num_active_mutations},
                                   };
        } else {
            data[position].abs_population_freq.emplace_back(abs_population_freq);
            data[position].num_active_mutations.emplace_back(num_active_mutations);
            data[position].origin_generation.emplace_back(origin_generation); 
        }
    }



    void save(std::string file) 
    {
        std::ofstream outfile{file};
        outfile << "position,origin_generation,abs_population_freq,selection_coefficient,dominance_coefficient,active_mutations" << std::endl;
        for (auto &it : data) { 
            std::vector<int> abs_population_freq = it.second.abs_population_freq; 
            std::vector<int> num_active_mutations = it.second.num_active_mutations; 
            std::vector<int> origin_generation = it.second.origin_generation;
            double selection_coefficient = it.second.selection_coefficient;
            double dominance_coefficient = it.second.dominance_coefficient;
            

            for (int j=0; j<abs_population_freq.size(); j++) {
                double position = it.first;
                outfile << position << "," << origin_generation[j] << ","
                    <<  abs_population_freq[j] << "," << selection_coefficient << "," << dominance_coefficient << ","
                    << num_active_mutations[j] << std::endl;
            }
        }
        
    }
};

In [83]:
std::vector<int> s3_num_roots(tsk_table_collection_t &tables)
{
    tsk_treeseq_t ts;
    tsk_tree_t tree;
    int ret, iter;
    std::vector<int> num_roots{};


    ret = tsk_table_collection_build_index(&tables, 0);
    check_tsk_error(ret);
    ret = tsk_treeseq_init(&ts, &tables, 0);
    check_tsk_error(ret);
    ret = tsk_tree_init(&tree, &ts, 0); 
    check_tsk_error(ret);

    for (iter = tsk_tree_first(&tree); iter == 1; iter = tsk_tree_next(&tree)) {
        num_roots.emplace_back(tsk_tree_get_num_roots(&tree));
    }

    tsk_tree_free(&tree);
    tsk_treeseq_free(&ts);
    return num_roots;
}

In [84]:
bool s3_mrca_found(tsk_table_collection_t &tables)
{
    std::vector<int> num_roots = s3_num_roots(tables);
    for (auto num_root : num_roots) {
        if (num_root != 1) { 
            return false; };
    }
    return true;
}

In [85]:
void s3_reverse_time(tsk_node_table_t &nodes, int begin, int end = 0)
{
    std::vector<double> time_before; 
    time_before.reserve(begin);
    std::vector<double> time_mid; 
    time_before.reserve(begin+nodes.num_rows-end);
    std::vector<double> time_after;
    
    if (end == 0) end = nodes.num_rows;
    for (int i=0; i<begin; i++) time_before.emplace_back(nodes.time[i]);
    
    double max_element{0};
    for (int i=begin; i<end; i++) {
        if (nodes.time[i] >= max_element) {
            max_element = nodes.time[i];
        }
    }
    
    for (int i=begin; i<end; i++) {
        time_mid.emplace_back((nodes.time[i] - max_element) * (-1));
    }
    
    for (int i=end; i<nodes.num_rows; i++) {
        time_after.emplace_back(nodes.time[i]);
    }
    
    time_before.insert(std::end(time_before), std::begin(time_mid), std::end(time_mid));
    time_before.insert(std::end(time_before), std::begin(time_after), std::end(time_after));
    
    for (int i=0; i<nodes.num_rows; i++) {
        nodes.time[i] = time_before[i];
    }
    
}

In [86]:
// testing s3_reverse_time
tsk_node_table_t nodes;
tsk_node_table_init(&nodes, 0);
for (int i=0; i<5; i++) {
    tsk_node_table_add_row(&nodes, TSK_NODE_IS_SAMPLE,i, TSK_NULL, TSK_NULL, NULL, 0);
}  
// s3_reverse_time(nodes, 0);
s3_reverse_time(nodes, 2, 5);
//tsk_node_table_print_state(&nodes, stdout);
tsk_node_table_free(&nodes);

In [87]:
void s3_add_time(tsk_node_table_t &nodes, int by, int begin, int end = 0)
{
    std::vector<double> time_before; 
    time_before.reserve(begin);
    std::vector<double> time_mid; 
    time_before.reserve(begin+nodes.num_rows-end);
    std::vector<double> time_after;
    
    if (end == 0) end = nodes.num_rows;
    for (int i=0; i<begin; i++) time_before.emplace_back(nodes.time[i]);
    

    
    for (int i=begin; i<end; i++) {
        time_mid.emplace_back(nodes.time[i] + by);
    }
    
    for (int i=end; i<nodes.num_rows; i++) {
        time_after.emplace_back(nodes.time[i]);
    }
    
    time_before.insert(std::end(time_before), std::begin(time_mid), std::end(time_mid));
    time_before.insert(std::end(time_before), std::begin(time_after), std::end(time_after));
    
    for (int i=0; i<nodes.num_rows; i++) {
        nodes.time[i] = time_before[i];
    }
    
}

In [88]:
// testing s3_add_time
tsk_node_table_t nodes;
tsk_node_table_init(&nodes, 0);
for (int i=0; i<5; i++) {
    tsk_node_table_add_row(&nodes, TSK_NODE_IS_SAMPLE,i, TSK_NULL, TSK_NULL, NULL, 0);
}  
s3_add_time(nodes,10, 0);
// s3_add_time(nodes,10, 2, 5);
//tsk_node_table_print_state(&nodes, stdout);
tsk_node_table_free(&nodes);

In [89]:
std::random_device rd;
std::mt19937 generator(rd());

In [90]:
void stats_random_discrete(std::vector<int> &output, const std::vector<double> &weights, int n)
{
    std::discrete_distribution<> d(weights.begin(), weights.end());
    for (int i=0; i<n; i++) { output.emplace_back(d(generator)); }
}

In [91]:
void stats_random_poisson(std::vector<int> &output, double lambda, int n)
{
    std::poisson_distribution<> d(lambda);
    for (int i=0; i<n; i++) {
        output.emplace_back(d(generator));
    }
}

In [92]:
void stats_random_poisson(int &output, double lambda)
{
    std::poisson_distribution<> d(lambda);
    output = d(generator);
}

In [93]:
void stats_random_real(double &output, int start, int end)
{
    std::uniform_real_distribution<> d(start, end);
    output = d(generator); 
}

In [94]:
void s3_dormancy_weights(std::vector<double> &weights, double b, tsk_id_t m)
{
    for (int i=0; i<m; i++) {
        weights.emplace_back(b * pow((1-b), i-1));
    }
    double sum = std::accumulate(std::begin(weights), std::end(weights), 0.0);
    for (int i=0; i<weights.size(); i++) {
        weights[i] = weights[i] / sum;
    }
}

In [95]:
void s3_dormancy_generation(std::vector<tsk_id_t> &dormancy_generations, std::vector<double> &weights, tsk_id_t N) 
{
    stats_random_discrete(dormancy_generations, weights, N);
}

In [96]:
std::vector<double> weights;
s3_dormancy_weights(weights, 0.9, 5);

std::vector<tsk_id_t> dormancy_generations;
s3_dormancy_generation(dormancy_generations, weights, 10);
dormancy_generations

{ 0, 0, 0, 0, 1, 0, 0, 0, 0, 0 }

In [97]:
void s3_infsites(std::vector<double> &output, double mu, std::map<double, bool> &lookup, int L, int m_i=1)
{
    int nmut = 0;
    int accum_mutations = 0;
    for (int j=0; j<m_i; j++) {
        stats_random_poisson(accum_mutations, mu);
        nmut +=accum_mutations;
    }
        
    stats_random_poisson(nmut, nmut);
    
    
    if (nmut == 0) {
        return;
    }
    int i=0;
    while (i < nmut) {
        double pos{0};
        stats_random_real(pos, 0, L);
        while (lookup.find(pos) != lookup.end()) {
            stats_random_real(pos, 0, L);
        }
        output.emplace_back(pos);
        lookup[pos] = true;
        i++;
    }
}

In [98]:
std::vector<double> out;
std::map<double, bool> l;
s3_infsites(out, 1.0, l, 1, 100);
out

{ 0.77882731, 0.92235390, 0.59643319, 0.40956742, 0.60964130, 0.081711360, 0.46248018, 0.94633395, 0.41281448, 0.56683190, 0.55858652, 0.62720455, 0.33502758, 0.95686271, 0.055473618, 0.092962646, 0.073611306, 0.88690895, 0.34269657, 0.12210072, 0.48860681, 0.37370100, 0.10592234, 0.21734814, 0.73402580, 0.72041474, 0.88085031, 0.31122915, 0.86656005, 0.98875304, 0.13743746, 0.30672839, 0.071303190, 0.50700497, 0.39347895, 0.16988041, 0.43123679, 0.46884502, 0.20129902, 0.22602577, 0.38508672, 0.24449199, 0.031711786, 0.40691801, 0.076453842, 0.75318784, 0.076315402, 0.64552315, 0.82410777, 0.69741019, 0.30020888, 0.066365801, 0.99705704, 0.87893068, 0.18525432, 0.24688846, 0.60299270, 0.37623994, 0.98947763, 0.89812934, 0.98872698, 0.042954190, 0.94305211, 0.73406762, 0.60986810, 0.40412682, 0.086777401, 0.14528748, 0.44022829, 0.34960561, 0.37396107, 0.15898881, 0.95209844, 0.77979247, 0.010105798, 0.35299855, 0.19655446, 0.35941530, 0.21781259, 0.61316242, 0.52448329, 0.071800118, 0

In [99]:
std::vector<double> output;
std::map<double, bool> lookup;
s3_infsites(output, 2, lookup, 10);
output

{ 9.7290391, 3.7415750, 1.3135355, 6.5768366, 9.7085655, 7.8311087, 9.6004431, 4.5446931 }

In [100]:
void s3_fsites(std::vector<double> &output, double mu, double position, std::map<double, bool> &lookup)
{
    int nmut;
    stats_random_poisson(nmut, mu);
    if (nmut == 0) {
        return;
    }
    int i=0;
    while (i < nmut) {
        double pos{0};
        stats_random_real(pos, 0, 1);
        pos += position;
        while (lookup.find(pos) != lookup.end()) {
            stats_random_real(pos, 0, 1);
            pos += position;
        }
        output.emplace_back(pos);
        lookup[pos] = true;
        i++;
    }
}

In [101]:
std::vector<double> output;
std::map<double, bool> lookup;
s3_fsites(output, 2, 10, lookup);
output

{ 10.375002, 10.600816 }

In [102]:
#include <iostream>
#include <vector>
#include <utility>
#include <algorithm>

In [103]:
template<typename T>
std::vector<T> MultiplyVec(std::vector<T> v, int n)
{
    std::vector<T> rv;
    rv.reserve(n * v.size());
    for (int i=0; i<n; i++){
        for (auto e : v){ rv.emplace_back(e); }
    }
    return rv;
} 

In [104]:
template<typename T>
std::vector<T> get_keys(const std::map<T, std::vector<tsk_id_t>>& map)
{
    std::vector<T> keys;
    keys.reserve(map.size());
    for (const auto& it : map)
        keys.push_back(it.first);
    return keys;
}

In [105]:
struct recombination_event {
    double left;
    double right;
    tsk_id_t parent;
    tsk_id_t child;
};

In [106]:
recombination_event re = {1,2,3,4};

In [107]:
void s3_recombination_events(std::vector<recombination_event> &recombination_events, double r,
                       std::pair<tsk_id_t, tsk_id_t> parent_idxs, tsk_id_t next_offspring_id, double L)
{
    int nbreaks;
    stats_random_poisson(nbreaks, r);

    if (nbreaks == 0) {
                
        recombination_event re = {0, L, parent_idxs.first, next_offspring_id};
        recombination_events.emplace_back(re);
        
        
    } else {
        std::vector<double> b;
        b.reserve(nbreaks);
        int i{0};
        while (i < nbreaks){
            double p;
            stats_random_real(p, 0, L);
            b.emplace_back(p);
            i++;
        }
        std::sort(b.begin(), b.end());
        if (b.back() != L) { b.emplace_back(L); }
        if (*b.begin() != 0.0) { 
            b.insert(b.begin(), 0.0);
        } else {
            int tmp_parent_idx = parent_idxs.first;
            parent_idxs.first = parent_idxs.second;
            parent_idxs.second = tmp_parent_idx;
        }
        std::vector<int> p_idxs{parent_idxs.first, parent_idxs.second};
        std::vector<int> pgams = MultiplyVec(p_idxs, b.size() / 2);
        for (int i=0; i<b.size()-1; i++) {
            recombination_event re = {b[i], b[i+1], pgams[i], next_offspring_id};
            recombination_events.emplace_back(re);
            //std::cout <<  b[i] << " " << b[i+1] << " " <<  pgams[i] << std::endl;
        }  
    }
}

In [108]:
//std::vector<recombination_event> recombination_events;
//s3_recombination_events(recombination_events, 2, std::pair<tsk_id_t, tsk_id_t>{3,4}, 7,10)

In [109]:
tsk_id_t s3_gamete_position_recombination(double position, std::vector<recombination_event> &recombination_events)
{
    for (int i=0; i<recombination_events.size(); i++) {
        if (position >= recombination_events[i].left && position <= recombination_events[i].right) {
            return recombination_events[i].parent;
        }
    }
    return -9;
}

In [110]:
//3_gamete_position_recombination(9.8, recombination_events)

In [111]:
struct MutationMetaData {
    tsk_id_t origin;
    double position;
}

In [112]:
std::pair<tsk_id_t, MutationMetaData>{}

@0x55b71b9b90b0

// table collection
int ret;
tsk_table_collection_t tables;
ret = tsk_table_collection_init(&tables, 0);
for (int i=0; i<5; i++) {
    tsk_node_table_add_row(&tables.nodes, TSK_NODE_IS_SAMPLE,i, TSK_NULL, TSK_NULL, NULL, 0);
}  

int sample_generation = 2;
std::vector<tsk_id_t> samples;
for (int i=0; i<tables.nodes.num_rows; i++) {
    if ((int) tables.nodes.time[i] < sample_generation) {
        samples.emplace_back(i);
    }
}
samples

In [113]:
std::vector<std::pair<tsk_id_t, MutationMetaData>>{}

{}

In [114]:
void s3_simplify(tsk_table_collection_t &tables, std::vector<std::pair<tsk_id_t, MutationMetaData>> temp_mutations,
              tsk_id_t sample_generation)
{
    std::vector<tsk_id_t> samples;
    for (int i=0; i<tables.nodes.num_rows; i++) {
        if ((int) tables.nodes.time[i] < sample_generation) {
            samples.emplace_back(i);
        }
    }
    for (int i=0; i<temp_mutations.size(); i++) {
        tsk_site_table_add_row(&tables.sites, temp_mutations[i].second.position, "0", 1, NULL, 0);
        
        tsk_mutation_table_add_row(&tables.mutations, tables.sites.num_rows-1,
                                   temp_mutations[i].first, TSK_NULL, TSK_UNKNOWN_TIME , "1", 1, NULL, 0);
    }
    
    
    tsk_id_t* samples_array = &samples[0];
    ret = tsk_table_collection_sort(&tables, NULL, 0); 
    //std::cout << "ret sort: " << std::endl;
    check_tsk_error(ret);
    ret = tsk_table_collection_simplify(&tables, samples_array, samples.size(), TSK_FILTER_SITES, NULL); 
    //std::cout << "ret simp: " << std::endl;
    check_tsk_error(ret);

}

In [115]:
//s3_simplify(tables, std::vector<std::pair<tsk_id_t, MutationMetaData>>{}, 1)

std::vector<double> get_keys(const std::map<double, std::vector<tsk_id_t>>& map)
{
    std::vector<double> keys;
    keys.reserve(map.size());
    for (const auto& it : map)
        keys.push_back(it.first);
    return keys;
}

In [116]:
template<typename T>
std::vector<T> get_keys(const std::map<T, std::vector<tsk_id_t>>& map)
{
    std::vector<T> keys;
    keys.reserve(map.size());
    for (const auto& it : map)
        keys.push_back(it.first);
    return keys;
}

In [117]:
typedef std::vector<std::vector<tsk_id_t>> genotype_matrix;

In [118]:
template<typename T>
std::vector<T> get_keys(const std::map<T, genotype_matrix>& map)
{
    std::vector<T> keys;
    keys.reserve(map.size());
    for (const auto& it : map)
        keys.push_back(it.first);
    return keys;
}

In [119]:
template<typename T>
std::vector<T> get_keys(const std::map<T, bool>& map)
{
    std::vector<T> keys;
    keys.reserve(map.size());
    for (const auto& it : map)
        keys.push_back(it.first);
    return keys;
}

In [120]:
void print_double_vector(std::vector<double> v) {
    for (int i=0; i<v.size(); i++) {
        std::cout << v[i] << " ";
    }
    std::cout << std::endl;
}

In [121]:
void print_int_vector(std::vector<int> v) {
    for (int i=0; i<v.size(); i++) {
        std::cout << v[i] << " ";
    }
    std::cout << std::endl;
}

In [122]:
void print_int_vector2(std::vector<int> v) {
    for (int i=0; i<v.size(); i++) {
        std::cout << v[i] << "";
    }
    std::cout << std::endl;
}

In [123]:
void print_double_vector_vector(std::vector<std::vector<double>> v) {
    for (int i=0; i<v.size(); i++) {
        print_double_vector(v[i]);
    }
    std::cout << std::endl;
}

In [124]:
void s3_merge_selection_genotypes(std::map<int, genotype_matrix> &selection_genotypes_output,
    std::map<double, genotype_matrix> selection_genotypes)
{
    std::vector<double> keys = get_keys(selection_genotypes);
    double current_key = keys[0];
    genotype_matrix current_genotype = selection_genotypes[current_key];
    std::vector<genotype_matrix> merged_genotypes;
    
    for (int i=1; i<keys.size(); i++) {
        if (((int) current_key) == ((int) keys[i])) {
            
            for (int k=0; k<selection_genotypes[keys[i]].size(); k++){
                for (int j=0; j<selection_genotypes[keys[i]][k].size(); j++) {
                    if (selection_genotypes[keys[i]][k][j] > 0 || current_genotype[k][j] > 0) {
                        current_genotype[k][j] = 1;
                    }
                }
            }
            
        } else {
            merged_genotypes.emplace_back(current_genotype);
            current_key = keys[i];
            current_genotype = selection_genotypes[keys[i]];
        }
    }
    
    std::vector<int> int_keys(keys.begin(), keys.end());
    int_keys.erase(std::unique(int_keys.begin(), int_keys.end()), int_keys.end());

    merged_genotypes.emplace_back(current_genotype);
    for (int i=0; i<int_keys.size(); i++) {
        selection_genotypes_output[int_keys[i]] = merged_genotypes[i];
    }
}

In [125]:
//s3_merge_selection_genotypes(selection_genotypes_output, selection_genotypes)

In [126]:
//selection_genotypes_output

In [127]:
void s3_weights(std::vector<double> &weights, std::vector<tsk_id_t> selection_genotype, 
                double dominance_coefficient, double selection_coefficient)
{
    for (int i=0; i<selection_genotype.size(); i+=2) {
        tsk_id_t genotype_A = selection_genotype[i];
        tsk_id_t genotype_a = selection_genotype[i+1];
        if (genotype_A == 0 && genotype_a == 0) {
            weights.emplace_back(1);
        } else if ((genotype_A == 1 && genotype_a == 0) || (genotype_A == 0 && genotype_a == 1)) {
            weights.emplace_back(1.0 + (2.0 * dominance_coefficient * selection_coefficient));
        } else {
            weights.emplace_back(1.0 + (2.0 * selection_coefficient));
        }
    }
}

In [128]:
#include <new>

In [129]:
genotype_matrix(2, std::vector(2*5, 0))

{ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }

In [130]:
genotype_matrix(1, std::vector(2*5, 0))

{ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }

In [131]:
template<typename T>
void print_genotypes(std::map<T, genotype_matrix> g)
{
    std::vector<T> keys = get_keys(g);
    for (auto k : keys) {
        std::cout << k << std::endl;
        for (int i=0; g[k].size(); i++) {
            print_int_vector(g[k][i]);
        }
    }
}

In [132]:
void dormancy(tsk_table_collection_t &tables,
              recorder_t &recorder,
              tsk_id_t num_generations,
              tsk_id_t N, 
              tsk_id_t m, 
              double b,
              tsk_id_t gc,
              double mu,
              double r,
              double L,
              std::vector<double> mu_selection_rates,
              std::vector<double> selection_coefficients,
              std::vector<double> dominance_coefficients,
              std::vector<double> selection_positions,
              tsk_id_t selection_activation_generation = 0,
              bool stop_after_mrca = false,
              bool mutations_in_seeds = true,
              bool debug_print = false
              
             ) {
    
    
    
    
    std::map<double, double> position_coefficient_mapping;
    std::map<double, double> position_dominance_mapping;
    for (int i=0; i<selection_positions.size(); i++) {
        position_coefficient_mapping[selection_positions[i]] = selection_coefficients[i];
        position_dominance_mapping[selection_positions[i]] = dominance_coefficients[i];
    }
    
    //tsk_node_table_t nodes = tables.nodes;
    //tsk_edge_table_t edges = tables.edges;
    //tsk_site_table_t sites = tables.sites;
    //tsk_mutation_table_t mutations = tables.mutations;
    
    
    mu = 4 * N * mu * L;
    r = 4 * N * r * L;
    for (int i=0; i<mu_selection_rates.size(); i++) {
        mu_selection_rates[i] = 4 * N * mu_selection_rates[i];
    }
    
    
    
    std::map<double, genotype_matrix> selection_genotypes;
    std::map<double, genotype_matrix> next_selection_genotypes;

    std::map<double, bool> lookup;
    std::map<double, bool> lookup_selection;
    
    std::vector<std::pair<tsk_id_t, MutationMetaData>> temp_mutations;
    std::map<double, bool> mutation_adding_generation;
    
    std::vector<double> mpos_selection;
    std::vector<double> mpos;
    
    std::vector<tsk_id_t> parents;
    
    int gen = 0;
    int selection_generation = 0;
    
    for (int i=0; i<m; i++) {
        for (int j=0; j<2*N; j++) {
            tsk_node_table_add_row(&tables.nodes, TSK_NODE_IS_SAMPLE, (double) gen, TSK_NULL, TSK_NULL, NULL, 0);
        }
        gen++;
    }
    
    s3_reverse_time(tables.nodes, 0);    
    s3_simplify(tables, std::vector<std::pair<tsk_id_t, MutationMetaData>>{}, m);
    
    while (num_generations > 0) {

        tsk_id_t next_offspring_index = tables.nodes.num_rows;
        
        tsk_id_t num_non_sample{0};
        for (int i=0; i<tables.nodes.num_rows; i++) {
            if (tables.nodes.flags[i] != 1) {
                num_non_sample++;
            }
        }
        
        tsk_id_t first_parental_index = next_offspring_index - (2*N) - num_non_sample;
        
        for (int c=0; c<gc; c++) {
            
            std::vector<double> dorm_weights;
            s3_dormancy_weights(dorm_weights, b, m);

            std::vector<tsk_id_t> dormancy_generations;
            s3_dormancy_generation(dormancy_generations, dorm_weights, 2*N);
            
            std::vector<double> keys = get_keys(selection_genotypes);
            if (selection_activation_generation && gen > selection_activation_generation && keys.size() != 0) {
                
                // probably should be a function
                std::map<int, genotype_matrix> selection_genotypes_output;
                s3_merge_selection_genotypes(selection_genotypes_output, selection_genotypes);
                std::vector<int> int_keys = get_keys(selection_genotypes_output);
                
                
                // update recorder
                std::vector<double> keys = get_keys(selection_genotypes);
                for (auto k : keys) {
                    genotype_matrix gm = selection_genotypes[k];
                    std::vector<tsk_id_t> genotype_current_gen = gm[m-1];
                    tsk_id_t genotype_sum = std::accumulate(std::begin(genotype_current_gen), std::end(genotype_current_gen), 0);
                    if (genotype_sum != 0) {
                        recorder.insert(k, selection_generation, genotype_sum, position_coefficient_mapping[k], position_dominance_mapping[k], keys.size());
                    }
                }
                
                
                
                for (auto k : int_keys) {
                    genotype_matrix gm = selection_genotypes_output[k];
                    std::vector<tsk_id_t> genotype_current_gen = gm[m-1];
                    
                    if (debug_print) {std::cout << k << " : ";}
                    if (debug_print) {print_int_vector2(genotype_current_gen);}
                    
                    tsk_id_t genotype_sum = std::accumulate(std::begin(genotype_current_gen), std::end(genotype_current_gen), 0);
                    if (genotype_sum == 2*N) {
                        
                        s3_add_time(tables.nodes, c, 0, tables.nodes.num_rows-2*N*(c));
                        s3_reverse_time(tables.nodes, tables.nodes.num_rows -2*N*(c), tables.nodes.num_rows);  
                        
                        s3_simplify(tables,temp_mutations, 1);
                        
                        return;
                    }
                }
                
                
                
                
                
                
                
                
                parents.clear();
                
                for(auto d : dormancy_generations) {
                    
                    std::vector<std::vector<double>> total_weights{};
                    for (auto k : int_keys) {
                            std::vector<double> weights;
                            s3_weights(weights, selection_genotypes_output[k][m-1-d],
                                      position_dominance_mapping[k],
                                      position_coefficient_mapping[k]);                      
                            total_weights.emplace_back(weights);   
                    }
                        
                    std::vector<double> multiplied_weights;
                    for (int j=0; j<total_weights[0].size(); j++) {
                        double weight = 1;
                        for (int i=0; i<total_weights.size(); i++) {
                            weight *= total_weights[i][j];
                        }
                        multiplied_weights.emplace_back(weight);
                    }
                    
                    
                    double sum = std::accumulate(std::begin(multiplied_weights), std::end(multiplied_weights), 0.0);
                    for (int i=0; i<multiplied_weights.size(); i++) {
                        multiplied_weights[i] = (double) multiplied_weights[i] / sum;
                        
                    }
                    std::vector<tsk_id_t> parent;
                    stats_random_discrete(parent, multiplied_weights, 1);
                    parents.emplace_back(parent[0]);
                }
                    
                

            } else {
                parents.clear();
                stats_random_discrete(parents, std::vector<double>(N, 1), 2*N);
            }
            
            tsk_id_t i_genotype = 0;
            for (int p=0; p<parents.size(); p+=2) {
                
                
                
                tsk_id_t parent1 = parents[p];
                tsk_id_t parent2 = parents[p+1];
                
                tsk_id_t dormancy_updated_first_parental_index_1 = first_parental_index - (2*N*dormancy_generations[parent1]); 
                tsk_id_t dormancy_updated_first_parental_index_2 = first_parental_index - (2*N*dormancy_generations[parent2]); 
                
                if (c > 0 && (2*N*dormancy_generations[parent1]) >= c*2*N) {
                       dormancy_updated_first_parental_index_1 -= num_non_sample;
                }
                
                if (c > 0 && (2*N*dormancy_generations[parent2]) >= c*2*N) {
                       dormancy_updated_first_parental_index_2 -= num_non_sample;
                }

                tsk_id_t p1g1 = dormancy_updated_first_parental_index_1 + 2*parent1;
                tsk_id_t p1g2 = p1g1+1;
                tsk_id_t p2g1 = dormancy_updated_first_parental_index_2 + 2*parent2;
                tsk_id_t p2g2 = p2g1+1;
                    
                double mendel1 = 0;
                double mendel2 = 0;
                stats_random_real(mendel1, 0, 1);
                stats_random_real(mendel2, 0, 1);
                
                if (mendel1 < 0.5) {
                    tsk_id_t tmp = p1g1;
                    p1g1 = p1g2;
                    p1g2 = tmp;
                }
 
                if (mendel2 < 0.5) {
                    tsk_id_t tmp = p2g1;
                    p2g1 = p2g2;
                    p2g2 = tmp;
                }
                
                
                tsk_node_table_add_row(&tables.nodes, TSK_NODE_IS_SAMPLE, gen, TSK_NULL, TSK_NULL, NULL, 0);
                tsk_node_table_add_row(&tables.nodes, TSK_NODE_IS_SAMPLE, gen, TSK_NULL, TSK_NULL, NULL, 0);
                    
                std::vector<recombination_event> recombination_events;
                s3_recombination_events(recombination_events, r, std::pair<tsk_id_t, tsk_id_t>{p1g1,p1g2},next_offspring_index, L);
                
                if (selection_activation_generation && gen > selection_activation_generation) {     
                    
                    
                    std::vector<double> lookup_selection_keys = get_keys(lookup_selection);
                    for (auto k : lookup_selection_keys) {
                        if (!mutation_adding_generation[k]) {
                            tsk_id_t genotype = selection_genotypes[k][m-1-dormancy_generations[parent1]][s3_gamete_position_recombination(k, recombination_events) - dormancy_updated_first_parental_index_1];
                            //std::cout << "genotype: " << genotype << std::endl;
                            next_selection_genotypes[k][0][i_genotype] = genotype;
                        }
                    }
                    for (int i=0; i<mu_selection_rates.size(); i++) {
                        mpos_selection.clear();
                        s3_fsites(mpos_selection, mu_selection_rates[i], selection_positions[i], lookup_selection);
                        

                        for (auto mi : mpos_selection) {
                            recorder.insert(mi, selection_generation, 1, selection_coefficients[i], dominance_coefficients[i], lookup_selection.size());
                            mutation_adding_generation[mi] = true;
                            temp_mutations.emplace_back(std::pair<tsk_id_t, MutationMetaData>{next_offspring_index, {gen,mi}});
                            selection_genotypes[mi] = genotype_matrix(m, std::vector(2*N, 0));
                            next_selection_genotypes[mi] = genotype_matrix(1, std::vector(2*N, 0));
                            next_selection_genotypes[mi][0][i_genotype] = 1;
                        }
                    }                    
                }
                
                for (int re=0; re<recombination_events.size(); re++) {
                    ret = tsk_edge_table_add_row(&tables.edges, recombination_events[re].left, recombination_events[re].right, recombination_events[re].parent, recombination_events[re].child, NULL, 0);
                }
                
                mpos.clear();
                if (mutations_in_seeds) {
                     s3_infsites(mpos, mu, lookup, L, dormancy_generations[parent1]+1);
                } else {
                     s3_infsites(mpos, mu, lookup, L, 1);
                }
                for (int mi=0; mi<mpos.size(); mi++) {
                    temp_mutations.emplace_back(std::pair<tsk_id_t, MutationMetaData>{next_offspring_index, {gen,mpos[mi]}});
                }
                i_genotype++;
                next_offspring_index++;
                
                
                recombination_events.clear();
                s3_recombination_events(recombination_events, r, std::pair<tsk_id_t, tsk_id_t>{p2g1,p2g2},next_offspring_index, L);
                if (selection_activation_generation && gen > selection_activation_generation) {
                    std::vector<double> lookup_selection_keys = get_keys(lookup_selection);
                    for (auto k : lookup_selection_keys) {
                        if (!mutation_adding_generation[k]) {
                            tsk_id_t genotype = selection_genotypes[k][m-1-dormancy_generations[parent2]][s3_gamete_position_recombination(k, recombination_events) - dormancy_updated_first_parental_index_2];
                            //std::cout << "genotype: " << genotype << std::endl;
                            next_selection_genotypes[k][0][i_genotype] = genotype;
                        }
                    }
                    for (int i=0; i<mu_selection_rates.size(); i++) {
                        mpos_selection.clear();
                        s3_fsites(mpos_selection, mu_selection_rates[i], selection_positions[i], lookup_selection);
                        for (auto mi : mpos_selection) {
                            recorder.insert(mi, selection_generation, 1, selection_coefficients[i], dominance_coefficients[i], lookup_selection.size());
                            mutation_adding_generation[mi] = true;
                            temp_mutations.emplace_back(std::pair<tsk_id_t, MutationMetaData>{next_offspring_index, {gen,mi}});
                            selection_genotypes[mi] = genotype_matrix(m, std::vector(2*N, 0));
                            next_selection_genotypes[mi] = genotype_matrix(1, std::vector(2*N, 0));
                            next_selection_genotypes[mi][0][i_genotype] = 1;
                        }
                    }
                    selection_generation++;
                }
  

                    
                
                for (int re=0; re<recombination_events.size(); re++) {
                    ret = tsk_edge_table_add_row(&tables.edges, recombination_events[re].left, recombination_events[re].right, recombination_events[re].parent, recombination_events[re].child, NULL, 0);
                }
                
                mpos.clear();
                if (mutations_in_seeds) {
                     s3_infsites(mpos, mu, lookup, L, dormancy_generations[parent2]+1);
                } else {
                     s3_infsites(mpos, mu, lookup, L, 1);
                }                
                
                
                for (int mi=0; mi<mpos.size(); mi++) {
                    temp_mutations.emplace_back(std::pair<tsk_id_t, MutationMetaData>{next_offspring_index, {gen,mpos[mi]}});
                }
                i_genotype++;
                next_offspring_index++;
                
            }
            
            
            //tsk_node_table_print_state(&tables.nodes, stdout);
            
    
            
            first_parental_index = tables.nodes.num_rows - 2*N;
            
            std::vector<double> lookup_selection_keys = get_keys(lookup_selection);
            if (m > 1) {
                for (int mi=0; mi<lookup_selection_keys.size(); mi++) {
                    for (int i=1; i<m; i++) {
                        selection_genotypes[lookup_selection_keys[mi]][i-1] =  selection_genotypes[lookup_selection_keys[mi]][i]; 
                    }
                }
            }
            
            
            
            std::vector<double> lost_mutations;
            //std::vector<double> fixed_mutations;
            
            for (auto k : lookup_selection_keys) {
                //if (std::accumulate(std::begin(next_selection_genotypes[k][0]), std::end(next_selection_genotypes[k][0]),0) == 2*N) {
                    
                    //std::cout << "loop: ";
                    //print_int_vector(next_selection_genotypes[k][0]);
                    
                    
                    //fixed_mutations.emplace_back(k);
                //}
                //else 
                if (std::accumulate(std::begin(next_selection_genotypes[k][0]), std::end(next_selection_genotypes[k][0]),0) == 0) {
                    lost_mutations.emplace_back(k);
                } else {
                    selection_genotypes[k][m-1] = next_selection_genotypes[k][0];
                    next_selection_genotypes[k] = genotype_matrix(1, std::vector(2*N, 0));
                }
            }
            
            for (int i=0; i<lost_mutations.size(); i++) {
                lookup_selection.erase(lost_mutations[i]);
                selection_genotypes.erase(lost_mutations[i]);
                next_selection_genotypes.erase(lost_mutations[i]);
            }
            
            /*
            for (int i=0; i<fixed_mutations.size(); i++) {
                
                std::cout << "fixed: " << fixed_mutations[i] << std::endl;
                print_int_vector(selection_genotypes[fixed_mutations[i]][m-1]);
                
                s3_add_time(tables.nodes, c+1, 0, tables.nodes.num_rows-2*N*(c+1));
                s3_reverse_time(tables.nodes, tables.nodes.num_rows -2*N*(c+1), tables.nodes.num_rows);  
                s3_simplify(tables,temp_mutations, 1);
                return;
            }
            */
            
            
            std::vector<double> mutation_adding_generation_keys = get_keys(mutation_adding_generation);
            for (int i=0; i<mutation_adding_generation_keys.size(); i++) {
                mutation_adding_generation[mutation_adding_generation_keys[i]] = false;
            }
            
            gen++;
             
        }
        
        s3_add_time(tables.nodes, gc, 0, tables.nodes.num_rows-2*N*gc);
        s3_reverse_time(tables.nodes, tables.nodes.num_rows-2*N*gc, tables.nodes.num_rows);  
        s3_simplify(tables,temp_mutations, m);
        
        
        
        
        /*
        ret = tsk_table_collection_build_index(&tables, 0);
        check_tsk_error(ret);
        tsk_treeseq_t ts;
        ret = tsk_treeseq_init(&ts, &tables, 0);
        check_tsk_error(ret);
        int num_mutations = tsk_treeseq_get_num_mutations(&ts);
        int num_samples = tsk_treeseq_get_num_samples(&ts);
        tsk_vargen_t vargen;
        tsk_variant_t *variant;
        tsk_vargen_init(&vargen, &ts, ts.samples, num_samples, NULL , 0);
        int i=0;
        while(tsk_vargen_next(&vargen, &variant) != 0) {

            double position = tables.sites.position[i];
            std::cout << i << " " << position << " : ";
            int population_abundance = 0;
            int n = 0;
            for (int s=0; s<2*N; s++) {
                int state = (int) vargen.variant.genotypes.i8[s];
                if (state == 1) {
                    n++;
                }
                std::cout << state;
            }
            std::cout << std::endl;
            if (n == 2*N) return;
            i++;
        }
        
        std::vector<double> lookup_selection_keys = get_keys(lookup_selection);
        if (lookup_selection_keys.size() != 0) {
            //print_genotypes(selection_genotypes);
    
            for (auto k : lookup_selection_keys) {
                std::cout << "k " << k << " : ";
                print_int_vector2(selection_genotypes[k][m-1]);
                std::cout << std::endl;
            }
        }
        
        */
        
        
        if (stop_after_mrca && s3_mrca_found(tables)) {
            return;
        }
        
        temp_mutations.clear();
        num_generations--;
        //std::cout << "num_generations: " << num_generations << std::endl;
            
            
        
    }
        

    

    s3_simplify(tables, temp_mutations, 1) ;
    
}

In [133]:
1 * 1e-3 * 5000

5.0000000

In [134]:
tsk_id_t num_generations = 1000;
tsk_id_t N = 50;
tsk_id_t m = 1;
double b =  1;
tsk_id_t gc = 20;
double mu = 0; //1e-3;
double r = 0.5e-6;
double L = 5000;
std::vector<double> mu_selection_rates = std::vector<double>{1e-6};
std::vector<double> selection_coefficients = std::vector<double>{1};
std::vector<double> dominance_coefficients = std::vector<double>{1};
std::vector<double> selection_positions = std::vector<double>{0.5};
tsk_id_t selection_activation_generation = 500;
bool mutation_in_seeds = false;
bool stop_after_mrca = false;

// table collection
int ret;
tsk_table_collection_t tables;

ret = tsk_table_collection_init(&tables, 0);
tables.sequence_length = L;

check_tsk_error(ret);
//tsk_table_collection_free(&tables);


dormancy(tables,num_generations, N, m, b, gc, mu, 0.5, L,
         std::vector<double>{0.00001},
         std::vector<double>{1.0},
         std::vector<double>{0.5},
         std::vector<double>{2500},
         selection_activation_generation, false, mutation_in_seeds,
         true
         
        );
    

tsk_table_collection_build_index(&tables, 0);
tsk_table_collection_dump(&tables, "test.trees", 0);


In [135]:
recorder_t recorder;

In [136]:
// table collection
int ret;
tsk_table_collection_t tables;

ret = tsk_table_collection_init(&tables, 0);
tables.sequence_length = L;

check_tsk_error(ret);
//tsk_table_collection_free(&tables);


dormancy(tables,recorder,num_generations, N, m, b, gc, mu, r, L,
         std::vector<double>{5e-8},
         std::vector<double>{2.0},
         std::vector<double>{0.5},
         std::vector<double>{2500},
         selection_activation_generation, false, mutation_in_seeds,
         true
        );
    

tsk_table_collection_build_index(&tables, 0);
tsk_table_collection_dump(&tables, "test.trees", 0);


2500 : 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 
2500 : 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
2500 : 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 
2500 : 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 
2500 : 1 0 0 0 0 0 1 1 0 1 1 1 0 0 0 1 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 1 0 1 0 0 0 0 1 1 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1

In [137]:
recorder.save("run.txt")

In [138]:
!cat run.txt

position,origin_generation,abs_population_freq,selection_coefficient,dominance_coefficient,active_mutations
2500.64,25199,1,2,0.5,1
2500.64,25200,1,2,0.5,1
2500.64,25250,2,2,0.5,1
2500.64,25300,5,2,0.5,1
2500.64,25350,10,2,0.5,1
2500.64,25400,21,2,0.5,1
2500.64,25450,37,2,0.5,1
2500.64,25500,67,2,0.5,1
2500.64,25550,82,2,0.5,1
2500.64,25600,83,2,0.5,1
2500.64,25650,87,2,0.5,1
2500.64,25700,91,2,0.5,1
2500.64,25750,99,2,0.5,1
2500.64,25800,100,2,0.5,1


// table collection
int ret;
tsk_table_collection_t tables;

ret = tsk_table_collection_init(&tables, 0);
tables.sequence_length = L;

check_tsk_error(ret);
//tsk_table_collection_free(&tables);


dormancy(tables,num_generations, N, m, b, gc, mu, r, L,
         std::vector<double>{0.0001,0.0001,0.0001,0.0001},
         std::vector<double>{-0.5, -0.5, -0.5, -0.5},
         std::vector<double>{0.5, 0.5, 0.5, 0.5},
         std::vector<double>{1000, 2000, 3000, 4000},
         selection_activation_generation, false
        );
    

tsk_table_collection_build_index(&tables, 0);
tsk_table_collection_dump(&tables, "test.trees", 0);


In [139]:
recorder

@0x7fdedb59ff90

In [140]:
//tsk_node_table_print_state(&tables.nodes, stdout);
//tsk_edge_table_print_state(&tables.edges, stdout);

In [141]:
//!rm test.trees