In [1]:
%run SequencingSimulator.ipynb

In [2]:
#test for getStandardDeviation function

assert getStandardDeviation(35) == 1
assert getStandardDeviation(125) == 1
assert getStandardDeviation(45) == 2
assert getStandardDeviation(115) == 2
assert getStandardDeviation(65) == 5
assert getStandardDeviation(95) == 5
assert getStandardDeviation(75) == 10

In [3]:
#test for generateQuality function

def calculateSum(readLength, quality, readQuality):
    sum = 0
    for ch in readQuality:
        sum += ord(ch)
    sum = (int)(sum/readLength)
    return sum
    
def generateQualityTest(quality, readLength):
    readQuality = generateQuality(quality, readLength)
    assert len(readQuality) == readLength
    sum = calculateSum(readLength, quality, readQuality)
    assert sum <= quality + 2
    assert sum >= quality - 2

readLength = 100
quality = 70
generateQualityTest(quality, readLength)

readLength = 150
quality = 50
generateQualityTest(quality, readLength)

In [4]:
#test for generateSingleRead function and getLeftMostPosition

refGenome = ['D','T','C','A','G','T','C','G',0,2]
readSize = 4
refPos = 2
direction = 1
readAndIndex = generateSingleRead(readSize, refGenome, refPos, direction, lambda x: x)
assert len(readAndIndex[0]) == 4
assert readAndIndex[0] == "CAGT"
assert readAndIndex[1] == 5
leftmostPosition = getLeftmostPosition(refGenome, refPos, readAndIndex[1])
assert leftmostPosition == 3

readSize = 4
refPos = 7
direction = -1
readAndIndex = generateSingleRead(readSize, refGenome, refPos, direction, lambda x: complNucleotids[x])
assert len(readAndIndex[0]) == 4
assert readAndIndex[0] == "CGAC"
assert readAndIndex[1] == 4
leftmostPosition = getLeftmostPosition(refGenome, refPos, readAndIndex[1])
assert leftmostPosition == 8

readSize = 5
refPos = 0
direction = 1
readAndIndex = generateSingleRead(readSize, refGenome, refPos, direction, lambda x: x)
assert len(readAndIndex[0]) == 5
assert readAndIndex[0] == "TCAGT"
assert readAndIndex[1] == 5
leftmostPosition = getLeftmostPosition(refGenome, refPos, readAndIndex[1])
assert leftmostPosition == 1

readSize = 4
refPos = 6
direction = 1
readAndIndex = generateSingleRead(readSize, refGenome, refPos, direction, lambda x: x)
assert len(readAndIndex[0]) == 4
assert readAndIndex[0] == "CGAG"
assert readAndIndex[1] == 9
leftmostPosition = getLeftmostPosition(refGenome, refPos, readAndIndex[1])
assert leftmostPosition == 7

refGenome = ['D',7,6,5,4,0]
readSize = 5
refPos = 0
direction = 1
readAndIndex = generateSingleRead(readSize, refGenome, refPos, direction, lambda x: x)
assert len(readAndIndex[0]) == 5
assert readAndIndex[0] == "TGCAA"
assert readAndIndex[1] == 5
leftmostPosition = getLeftmostPosition(refGenome, refPos, readAndIndex[1])
assert leftmostPosition == 1

In [13]:
#test for readGenome function and insertMutations function

fileName = "GenomTest.fa"
refGenomeDict = readGenome(fileName)
assert len(refGenomeDict) == 2
assert ("sequence1" in refGenomeDict) == True
assert len(refGenomeDict["sequence1"]) == 15
assert ("sequence2" in refGenomeDict) == True
assert len(refGenomeDict["sequence2"]) == 17

errorSnv = 0.1
errorInDel = 0.05
originalRefGenome = readGenome(fileName)
insertMutations(refGenomeDict, errorSnv, errorInDel)
expectedNumOfMut = (round)((len(refGenomeDict["sequence1"]) + len(refGenomeDict["sequence2"])) * (errorSnv + errorInDel))
actualNumOfMut = 0
j = 0
k = 0
i = 0
origGenome = list(originalRefGenome.values())
refGenome = list(refGenomeDict.values())
for genome in refGenome:
    ogenome = origGenome[j]
    print(ogenome)
    print(genome)
    while i < len(ogenome):
        if (genome[k] != ogenome[i]):
            if (genome[k] != 0 and genome[k] != 1 and genome[k] != 2 and genome[k] != 3):
                i += 1 
            k += 1
            actualNumOfMut += 1
        else :
            k += 1
            i += 1
    j += 1
    print(actualNumOfMut)
print(actualNumOfMut)
            

['A', 'T', 'C', 'G', 'T', 'T', 'A', 'A', 'T', 'G', 'C', 'C', 'G', 'T', 'A']
['D', 'T', 'C', 'G', 'T', 'T', 'A', 'A', 4, 5, 'C', 'C', 'G', 'T', 'A']
3
['A', 'T', 'G', 'G', 'A', 'C', 'G', 'T', 'T', 'A', 'C', 'G', 'T', 'T', 'C', 'G', 'A']
['A', 'T', 'G', 'G', 'A', 'C', 'G', 'T', 'T', 'A', 'C', 7, 'T', 'T', 'C', 7, 'A']
4
4


In [None]:
#test for validateParameters function

assert validateParameters(55, 5, 9, 10, 0.2, 0.1) == True
assert validateParameters(25, 5, 9, 10, 0.2, 0.1) == False
assert validateParameters(55, -5, 9, 10, 0.2, 0.1) == False
assert validateParameters(55, 5, -9, 10, 0.2, 0.1) == False
assert validateParameters(55, 5, 9, -10, 0.2, 0.1) == False
assert validateParameters(55, 5, 10, 9, 0.2, 0.1) == False
assert validateParameters(55, 5, 9, 10, -0.2, 0.1) == False
assert validateParameters(55, 5, 9, 10, 0.2, -0.1) == False
assert validateParameters(55, 5, 9, 10, 1.2, 0.1) == False
assert validateParameters(55, 5, 9, 10, 0.2, 1.1) == False
assert validateParameters(55, 5, 9, 10, 0.8, 0.7) == False

In [None]:
#test for simulatePairedEndSequencing function

readLength = 7
simulatePairedEndSequencing("GenomTest.fa", 70, 3, readLength, 12)
samFile = open("GenomTest.sam")
i = 0
for line in samFile.readlines():
    if len(line) != 0:
        if i == 0:
            assert line == "@SQ SN:sequence1 LN:15\n"
            i += 1
        elif i == 1:
            assert line == "@SQ SN:sequence2 LN:17\n"
            i += 1
        else:
            words = line.split()
            assert len(words) == 4
            assert len(words[2]) == readLength
            assert len(words[3]) == readLength
            i += 1
assert i == (2 + 6 + 8) #2 - there are 2 sequences, 6  - there are 6 reads from first sequence, 8- there are 8 reads from second sequence
samFile.close()