In [19]:
import timeit
import time
from pathlib import Path
import random
import sys
sys.path.append(str(Path.cwd().parent / "classes"))
sys.path.append(str(Path.cwd().parent))

import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns
from scipy import stats

from sim_config import SimConfiguration
from simulation import Simulation
from msa import Msa


TREES_PATH = Path.cwd() / "chiroptera_data"

In [None]:
def refresh_sim(tree_file):
    ROOT_SEQUENCE_LENGTH = 2000
    indel_rate = random.uniform(0.0001, 0.05)
    length_param_alpha = random.uniform(1.01, 2.0)
    seed = time.time_ns()%1000000
    print("seed=", seed, "indel_rate=",indel_rate)

    sim_config = SimConfiguration(original_sequence_length=ROOT_SEQUENCE_LENGTH, indel_length_alpha=length_param_alpha,
                                indel_truncated_length=50,
                                rate_ins=indel_rate, rate_del=indel_rate,
                                deletion_extra_edge_length=50,
                                switch_factor=200,
                                seed=seed)
    sim = Simulation(input_tree=str(tree_file), config=sim_config)
    return sim

In [23]:
time_log = {"blocklist": [],
            "naive": []
            }

for tree in TREES_PATH.iterdir():
    print(tree.stem)
    times_of_list, times_of_naive = [], []
    for i in range(10):
        current_sim = refresh_sim(str(tree))

        blocklist_time = timeit.timeit(current_sim.msa_from_blocklist, number=1)
        times_of_list.append(blocklist_time)
        print("Blocklist time","is", blocklist_time, "sec")
        
        naive_time = timeit.timeit(current_sim.msa_from_naive, number=1)
        print("Naive_time","is", naive_time, "sec")
        times_of_naive.append(naive_time)
    time_log["blocklist"].append(np.mean(times_of_list))
    time_log["naive"].append(np.mean(times_of_naive))



7288_NT_AL_AA
seed= 835593 indel_rate= 0.019174688024054535
Blocklist time is 0.026388086000224575 sec
Naive_time is 0.25019753699962166 sec
seed= 597000 indel_rate= 0.03836849185141822
Blocklist time is 0.02528645799975493 sec
Naive_time is 0.27112231199862435 sec
seed= 55329 indel_rate= 0.0413455224716563
Blocklist time is 0.02676103599878843 sec
Naive_time is 0.3136040410008718 sec
seed= 701379 indel_rate= 0.022947699712374637
Blocklist time is 0.024778393999440596 sec
Naive_time is 0.2731318130008731 sec
seed= 546117 indel_rate= 0.04320628534639074
Blocklist time is 0.025056397998923785 sec
Naive_time is 0.2828783379991364 sec
seed= 740298 indel_rate= 0.0482882767971225
Blocklist time is 0.02576983699873381 sec
Naive_time is 0.30063079799947445 sec
seed= 921105 indel_rate= 0.048652504410841825
Blocklist time is 0.02538137800002005 sec
Naive_time is 0.268717780998486 sec
seed= 218877 indel_rate= 0.04737500779719968
Blocklist time is 0.026072736998685286 sec
Naive_time is 0.325358227

In [24]:
pd.DataFrame(time_log)

Unnamed: 0,blocklist,naive
0,0.025547,0.281615
1,0.025084,0.268677
2,0.025119,0.280378
3,0.025639,0.269029
4,0.025642,0.295416
5,0.0248,0.266108
6,0.024613,0.260383
7,0.025138,0.27059
8,0.024862,0.241323
9,0.024395,0.226481
