Testing of BioKotlin Ranges

In [1]:
//If this does not exist run from cmdline: ./gradlew shadowjar
@file:DependsOn("../build/libs/biokotlin-0.03-all.jar")

In [13]:
import biokotlin.genome.*
// import seq* as Ranges will use NucSeqRecord
import biokotlin.seq.NUC.*
import biokotlin.seq.*
import java.util.*


In [3]:
val seq = Seq("GCAGAT")

In [4]:
val rec1 = NucSeqRecord(NucSeq("ATAACACAGAGATATATC"),"1")

In [5]:
// This creates a subset of the sequence in rec1 - just seq from positions 1 to 6
// Note the array positions are 0-based, so this should pull TAACAC
val subSettedSeq = rec1[1..6]

In [6]:
// create a Sequence Range (SRange) object
val sRange = rec1.range(8..12)

In [7]:
println(sRange)

1:8..1:12


In [8]:
val sRangeFlanked = sRange.flankBoth(5)

In [9]:
println(sRangeFlanked)

[1:3..1:7, 1:13..1:17]


In [10]:
val sRangeFlankRight = sRange.flankRight(5)

In [11]:
println(sRangeFlankRight)

1:13..1:17


In [32]:
import kotlin.collections.*
import biokotlin.genome.SeqRangeSort.leftEdge

val dnaString = "ACGTGGTGAATATATATGCGCGCGTGCGTGGATCAGTCAGTCATGCATGCATGTGTGTACACACATGTGATCGTAGCTAGCTAGCTGACTGACTAGCTGAC"
val dnaString2 = "ACGTGGTGAATATATATGCGCGCGTGCGTGGACGTACGTACGTACGTATCAGTCAGCTGAC"
val record1 = NucSeqRecord(NucSeq(dnaString), "Sequence 1", description = "The first sequence",
                annotations = mapOf("key1" to "value1"))
val record2 = NucSeqRecord(NucSeq(dnaString2), "Sequence 2", description = "The second sequence",
                annotations = mapOf("key1" to "value1"))

var range1 = SeqPositionRanges.of(record1,8..28)
var range2 = SeqPositionRanges.of(record2,3..19)
var range3 = SeqPositionRanges.of(SeqPosition(record1, 27),SeqPosition(record1,40))
var range4 = record2.range(25..40)

// is there a way to default to this setOf without typing entire class?
// the setOf in genome.Ranges is a java Navigable set, which does the sorting
//var setRanges = overlappingSetOf(SeqPositionRangeComparator.sprComparator,range1, range4, range3, range2)

 var srangeList = mutableListOf<SRange>()
srangeList.add(range1)
srangeList.add(range4)
srangeList.add(range3)
srangeList.add(range2)

println("\nRanges in the List are:")
for(range in srangeList) {
    println(range.toString())
}

// don't merge the ranges in the set
val comparator: Comparator<SRange> = SeqRangeSort.by(SeqRangeSort.numberThenAlphaSort,leftEdge)
val nonCoalsedSet = nonCoalescingSetOf(comparator, srangeList)

println("\nThe noncoalsedSet has these values:")
for (range in nonCoalsedSet) {
    println(range.toString())
}

// merge the ranges in the set
val coalesedSet = coalescingSetOf(comparator,srangeList)

println("\nthe coalsedSet has these values:")
for (range in coalesedSet) {
    println(range.toString())
}



Ranges in the List are:
Sequence 1:8..Sequence 1:28
Sequence 2:25..Sequence 2:40
Sequence 1:27..Sequence 1:40
Sequence 2:3..Sequence 2:19

The noncoalsedSet has these values:
Sequence 1:8..Sequence 1:28
Sequence 1:27..Sequence 1:40
Sequence 2:3..Sequence 2:19
Sequence 2:25..Sequence 2:40

the coalsedSet has these values:
Sequence 1:8..Sequence 1:40
Sequence 2:3..Sequence 2:19
Sequence 2:25..Sequence 2:40


In [34]:
// if a Java/Kotlin non-sorted set were used above, this would sort the set by seqRecord, then lowerEndpoint site
var setRangesSorted = nonCoalsedSet.toSortedSet(comparator)


In [35]:
for (range in setRangesSorted) {
    println(range.toString())
    println()
}

Sequence 1:8..Sequence 1:28

Sequence 1:27..Sequence 1:40

Sequence 2:3..Sequence 2:19

Sequence 2:25..Sequence 2:40



In [36]:
%use krangl
val dnaString = "ACGTGGTGAATATATATGCGCGCGTGCGTGGATCAGTCAGTCATGCATGCATGTGTGTACACACATGTGATCGTAGCTAGCTAGCTGACTGACTAGCTGACCGTACGTACGTATCAGTCAGCTGACACGTGGTGAATATATATGCGCGCGTGCGTGGATCAGTCAGTCATGCATGCATGTGTGTACACA"
    val dnaString2 = "ACGTGGTGAATATATATGCGCGCGTGCGTGGACGTACGTACGTACGTATCAGTCAGCTGAC"
    val dnaString3 = "TCAGTGATGATGATGCACACACACACACGTAGCTAGCTGCTAGCTAGTGATACGTAGCAAAAAATTTTTT"
    val record1 = NucSeqRecord(NucSeq(dnaString), "Seq1")
    val record2 = NucSeqRecord(NucSeq(dnaString2), "Seq2")
    val record3 = NucSeqRecord(NucSeq(dnaString3), "Seq3")
    val record4 = NucSeqRecord(NucSeq(dnaString2), "Seq2-id2")
    val sr1 = record1.range(27..44)
    val sr2 = record1.range(1..15)
    val sr3 = record3.range(18..33)
    val sr4 = record2.range(25..35)
    val sr5 = record2.range(3..13)
    val sr6 = record1.range(20..28)
val srSet1 = nonCoalescingSetOf(SeqRangeSort.by(SeqRangeSort.numberThenAlphaSort, SeqRangeSort.leftEdge), sr1,sr2,sr3,sr5,sr4)
var df:DataFrame = srSet1.toDataFrame()
df.print()


A DataFrame: 5 x 4
      ID   end    range   start
1   Seq1    15    1..15       1
2   Seq1    44   27..44      27
3   Seq2    13    3..13       3
4   Seq2    35   25..35      25
5   Seq3    33   18..33      18

