Testing of BioKotlin Ranges

In [None]:
//If this does not exist run from cmdline: ./gradlew shadowjar
@file:DependsOn("../build/libs/biokotlin-0.03-all.jar")

In [None]:
import biokotlin.genome.*
// import seq* as Ranges will use NucSeqRecord
import biokotlin.seq.NUC.*
import biokotlin.seq.*
import java.util.*


In [None]:
val seq = Seq("GCAGAT")

In [None]:
val rec1 = NucSeqRecord(NucSeq("ATAACACAGAGATATATC"),"1")

In [None]:
// This creates a subset of the sequence in rec1 - just seq from positions 1 to 6
// Note the array positions are 0-based, so this should pull TAACAC
val subSettedSeq = rec1[1..6]
subSettedSeq

In [None]:
// create a Sequence Range (SRange) object
val sRange = rec1.range(8..12)

In [None]:
println(sRange)

In [None]:
val sRangeFlanked = sRange.flankBoth(5)

In [None]:
println(sRangeFlanked)

In [None]:
val sRangeFlankRight = sRange.flankRight(5)

In [None]:
println(sRangeFlankRight)

In [None]:
import kotlin.collections.*
import biokotlin.genome.SeqRangeSort.leftEdge

// Create some DNA strings, make range from these strings
val dnaString = "ACGTGGTGAATATATATGCGCGCGTGCGTGGATCAGTCAGTCATGCATGCATGTGTGTACACACATGTGATCGTAGCTAGCTAGCTGACTGACTAGCTGAC"
val dnaString2 = "ACGTGGTGAATATATATGCGCGCGTGCGTGGACGTACGTACGTACGTATCAGTCAGCTGAC"
val record1 = NucSeqRecord(NucSeq(dnaString), "Sequence 1", description = "The first sequence",
                annotations = mapOf("key1" to "value1"))
val record2 = NucSeqRecord(NucSeq(dnaString2), "Sequence 2", description = "The second sequence",
                annotations = mapOf("key1" to "value1"))

var range1 = SeqPositionRanges.of(record1,8..28)
var range2 = SeqPositionRanges.of(record2,3..19)
var range3 = SeqPositionRanges.of(SeqPosition(record1, 27),SeqPosition(record1,40))
var range4 = record2.range(25..40)



In [None]:
// create a list of ranges
var srangeList = mutableListOf<SRange>()
srangeList.add(range1)
srangeList.add(range4)
srangeList.add(range3)
srangeList.add(range2)

println("\nRanges in the List are:")
for(range in srangeList) {
    println(range.toString())
}

In [None]:
// Create a set of non-merged ranges
val comparator: Comparator<SRange> = SeqRangeSort.by(SeqRangeSort.numberThenAlphaSort,leftEdge)
val nonCoalsedSet = nonCoalescingSetOf(comparator, srangeList)

println("\nThe noncoalsedSet has these values:")
for (range in nonCoalsedSet) {
    println(range.toString())
}

In [None]:
// Create set, merge the ranges
val coalesedSet = coalescingSetOf(comparator,srangeList)

println("\nthe coalsedSet has these values:")
for (range in coalesedSet) {
    println(range.toString())
}


In [None]:
// this will sort the set by seqRecord, then lowerEndpoint site
var setRangesSorted = nonCoalsedSet.toSortedSet(comparator)
for (range in setRangesSorted) {
    println(range.toString())
}


In [None]:
%use krangl
val dnaString = "ACGTGGTGAATATATATGCGCGCGTGCGTGGATCAGTCAGTCATGCATGCATGTGTGTACACACATGTGATCGTAGCTAGCTAGCTGACTGACTAGCTGACCGTACGTACGTATCAGTCAGCTGACACGTGGTGAATATATATGCGCGCGTGCGTGGATCAGTCAGTCATGCATGCATGTGTGTACACA"
    val dnaString2 = "ACGTGGTGAATATATATGCGCGCGTGCGTGGACGTACGTACGTACGTATCAGTCAGCTGAC"
    val dnaString3 = "TCAGTGATGATGATGCACACACACACACGTAGCTAGCTGCTAGCTAGTGATACGTAGCAAAAAATTTTTT"
    val record1 = NucSeqRecord(NucSeq(dnaString), "Seq1")
    val record2 = NucSeqRecord(NucSeq(dnaString2), "Seq2")
    val record3 = NucSeqRecord(NucSeq(dnaString3), "Seq3")
    val record4 = NucSeqRecord(NucSeq(dnaString2), "Seq2-id2")
    val sr1 = record1.range(27..44)
    val sr2 = record1.range(1..15)
    val sr3 = record3.range(18..33)
    val sr4 = record2.range(25..35)
    val sr5 = record2.range(3..13)
    val sr6 = record1.range(20..28)
val srSet1 = nonCoalescingSetOf(SeqRangeSort.by(SeqRangeSort.numberThenAlphaSort, SeqRangeSort.leftEdge), sr1,sr2,sr3,sr5,sr4)
var df:DataFrame = srSet1.toDataFrame()
df.print()
