In [21]:
import time
import psutil
import os

In [22]:
%run BurrowsWheelerTransform.ipynb
%run BurrowsWheelerTransformImproved.ipynb
%run BurrowsWheelerTransformSearchOverGenome.ipynb

In [None]:
"""Test samples"""

test1 = "Tomorrow_and_tomorrow_and_tomorrow$"
test2 = "It_was_the_best_of_times_it_was_the_worst_of_times$"
test3 = "in_the_jingle_jangle_morning_Ill_come_following_you$"
test4 = "GATTTGGGGTTCAAAGCAGTATCGATCAAATAGTAAATCCATTTGTTCAACTCACAGTTTGATTTGG\
GGTTCAAAGCAGTAATTTGGGGTTCAAAGCAGTATCGACAAATAGTAAATCCATTTGTTCATTCAAAGCAGTAATT\
TGGGGTTATTTGGGGTTCAAAGCAGTATCGATCAAATAGTAAATCCATTTGTTCAACTCACAGTTT$"

In [None]:
""" Performance measurement and benchmarking """

%timeit BWTViaBWM(test1)
%timeit BWTViaBWM(test2)
%timeit BWTViaBWM(test3)
%timeit BWTViaBWM(test4)

print()

%timeit BWTViaSA(test1)
%timeit BWTViaSA(test2)
%timeit BWTViaSA(test3)
%timeit BWTViaSA(test4)

print()

%timeit ReverseBWT(bwt1)
%timeit ReverseBWT(bwt2)
%timeit ReverseBWT(bwt3)
%timeit ReverseBWT(bwt4)

In [None]:
!python "./memTest/bwmTest.py" $test1
!python "./memTest/bwmTest.py" $test2
!python "./memTest/bwmTest.py" $test3
!python "./memTest/bwmTest.py" $test4

In [None]:
!python "./memTest/saTest.py" $test1
!python "./memTest/saTest.py" $test2
!python "./memTest/saTest.py" $test3
!python "./memTest/saTest.py" $test4

In [None]:
!python "./memTest/reverseTest.py" $bwt1
!python "./memTest/reverseTest.py" $bwt2
!python "./memTest/reverseTest.py" $bwt3
!python "./memTest/reverseTest.py" $bwt4

In [None]:
!python "./memTest/bwmTestFiles.py" 1
!python "./memTest/bwmTestFiles.py" 2
!python "./memTest/bwmTestFiles.py" 3

In [None]:
!python "./memTest/saTestFiles.py" 1
!python "./memTest/saTestFiles.py" 2
!python "./memTest/saTestFiles.py" 3

In [23]:
"""Benchmarking Data"""

dataSet = [
    {"file" : "./data/13443_ref_Cara_1.0_chr1c.fa",
     "patterns" : [
     "ATGCATG",
     "TCTCTCTA",
     "TTCACTACTCTCA"
     ]},
    {"file" : "./data/10093_ref_PAHARI_EIJ_v1.1_chrX.fa",
     "patterns" : [
     "ATGATG",
     "CTCTCTA",
     "TCACTACTCTCA"
     ]},
    {"file" : "./data/144034_ref_Pbar_UMD_V03_chrUn.fa",
     "patterns": [
     "CGCGAG",
     "GTCGAAT",
     "GGGCGTCATCGCGCG"
     ]}
]

In [30]:
def BenchmarkSearchOverGenomeWithImprovedSort(stepSize):
    for data in dataSet:
        file = data.get("file")
        genome = GetWholeGenomeFromFile(file)
        patterns = data.get("patterns")
        
        for pattern in patterns:
            print(f"{file} : {pattern} executed in:")
            print(SearchOverGenomeWithImprovedSort(genome, pattern, stepSize)[1])

In [17]:
def BenchmarkSearchOverGenomeWithImprovedDict(stepSize):
    for data in dataSet:
        file = data.get("file")
        genome = GetWholeGenomeFromFile(file)
        patterns = data.get("patterns")
        
        for pattern in patterns:
            print(f"{file} : {pattern} executed in:")
            SearchOverGenomeWithImprovedDict(genome, pattern, stepSize)[1]

In [18]:
def BenchmarkSearchOverGenomeWithOldSA(stepSize):
    for data in dataSet:
        file = data.get("file")
        genome = GetWholeGenomeFromFile(file)
        patterns = data.get("patterns")
        
        for pattern in patterns:
            print(f"{file} : {pattern} executed in:")
            SearchOverGenomeWithOldSA(genome, pattern, stepSize)[1]

In [19]:
def BenchmarkSearchOverGenomeWithBWM(stepSize):
    for data in dataSet:
        file = data.get("file")
        genome = GetWholeGenomeFromFile(file)
        patterns = data.get("patterns")
        
        for pattern in patterns:
            print(f"{file} : {pattern} executed in:")
            SearchOverGenomeWithBWM(genome, pattern, stepSize)[1]

In [20]:
def BenchmarkSearchViaImprovedSortOverWholeFile(file, pattern):
    genome = GetWholeGenomeFromFile(file)
    
    startTime = time.time()
    SearchViaImprovedSort(genome, pattern)
    endTime = time.time()
    duration = endTime - startTime
    
    print(f"{file} : {pattern} executed in: {duration}")

In [31]:
BenchmarkSearchOverGenomeWithImprovedSort(100000)

./data/13443_ref_Cara_1.0_chr1c.fa : ATGCATG executed in:
40.697954416275024
./data/13443_ref_Cara_1.0_chr1c.fa : TCTCTCTA executed in:
40.85098099708557
./data/13443_ref_Cara_1.0_chr1c.fa : TTCACTACTCTCA executed in:
40.97299408912659
./data/10093_ref_PAHARI_EIJ_v1.1_chrX.fa : ATGATG executed in:
116.10301899909973
./data/10093_ref_PAHARI_EIJ_v1.1_chrX.fa : CTCTCTA executed in:
116.97501254081726
./data/10093_ref_PAHARI_EIJ_v1.1_chrX.fa : TCACTACTCTCA executed in:
117.80299496650696
./data/144034_ref_Pbar_UMD_V03_chrUn.fa : CGCGAG executed in:
185.68597507476807
./data/144034_ref_Pbar_UMD_V03_chrUn.fa : GTCGAAT executed in:
187.54194235801697
./data/144034_ref_Pbar_UMD_V03_chrUn.fa : GGGCGTCATCGCGCG executed in:
181.7559175491333
