In [61]:
using Random
using Random123
using Distributions
using Plots
using StatsPlots
using StatsBase
using Statistics
using DataFrames
using Chain
using Dates
include("Utils.jl")

initialize_prngs

In [83]:
# Initializing our relevant arguments
loci = 20 # since the bitwidth of the genome is 128, we can't actually model any arbitrary maximum genome bitsize. what we can do instead is enforce a maximum genome size (effectively, the bitwidth within a UInt128). 
#init_active_loci = 3 # how many loci, out of the total number of loci in the landscape defined above, are unlocked at the start?
max_init_genotype_bits = 3 # makes the initial genotype reproducible between resticted and unlocked landscapes of the same size. max size is the number of init active loci
# what the above lets us model is how the restrictedness of the landscape at initialization (init_active_loci) determines what peaks are reached. max_init_genotype_bits allows
# us to control for the initial genotype when we change the number of unlocked loci at the start, otherwise we would just generate a different starting genotype (potentially one that wasn't available on the restricted landscape)
total_population = 5000
σ_epi::Float64 = 0 # standard deviation of the epistatic effects
μ = (total_population^-1)/10 # Mutation rate of the genotypes with some genome. Claudia says than Nμ = 1 is a weird parameter regime, so we adjust it a bit lower
M = μ#*10^-1 # rate of genome evolution. expansion/streamlining events happen an order of magnitude less, on average, than mutations
simulation_length = 200000


2000

In [85]:
final_genome_sizes = DataFrame(Init_active = Int[], Replicate = Int[], Final_size = Int[])
for i in 100:133
    for init_active_loci in (3, 10, 19)
        rng_default, rng_additive, rng_init_genotype, rng_init_genome, rng_mutation = initialize_prngs(additive_seed = i)
        additive_effects = generate_additive_effects(128)#zeros(128) # we will always need to generate the full range of additive effects (128 bit)
        df_genotypes = simulate(loci, init_active_loci, max_init_genotype_bits, total_population, σ_epi, μ, M, simulation_length)#generate_plots(df_genotypes)
        #generate_plots(df_genotypes)
        sweeps = process_data(df_genotypes)[7] # assigns only the data from the selective sweep dataframe
        if nrow(sweeps) > 0
            push!(final_genome_sizes, (init_active_loci, i, sum(digits(sweeps[!,:Genome][end], base = 2)))) # genome size of major genotype at end of simulation
        else 
            NaN
        end
    end
end
println(final_genome_sizes)

Iteration: 2000[1m9×3 DataFrame[0m
[1m Row [0m│[1m Init_active [0m[1m Replicate [0m[1m Final_size [0m
     │[90m Int64       [0m[90m Int64     [0m[90m Int64      [0m
─────┼────────────────────────────────────
   1 │           3        100           9
   2 │          10        100          13
   3 │          19        100          19
   4 │           3        101           6
   5 │          10        101          10
   6 │          19        101          18
   7 │           3        102           3
   8 │          10        102          11
   9 │          19        102          18
