In [1]:
using PureSeq

In [2]:
type Sam_Writer
    #a output stream object to write to
    Outstream
    contigs::ReferenceContigs
    cur_ref::Int64
end

In [3]:
#constructor for sam_Writer. Automatically writes the header as it is instantiated
function SamWriter(output_stream, contigs)
    sw = Sam_Writer(output_stream, contigs, 1)
    writeHeader(sw)
    sw
end

SamWriter (generic function with 1 method)

In [4]:
function writeHeader(sw::Sam_Writer)
    write(sw.Outstream, "@HD\tVN:1.0\tSO:coordinate\n")
    for j in 1:sw.contigs.count
        name = sw.contigs.names[j]
        size = sw.contigs.sizes[j]
        write(sw.Outstream, "@SQ\tSN:$(name)\tLN:$(size)\n")
    end
    write(sw.Outstream, "@PG\tID:PureSeq\tPN:PureSeq\n")
end

writeHeader (generic function with 1 method)

In [5]:
function writeRead(sw::Sam_Writer, POS::Int64, FLAG::Int64; MAPQ::Int64=15, LENGTH::Int64=0)

    #Figure out the ref_name
    while POS > sw.contigs.offsets[sw.cur_ref]+sw.contigs.sizes[sw.cur_ref]
        sw.cur_ref += 1
    end
    
    #What we are writing out
    QNAME = "PureSeq"
    FLAG = FLAG
    RNAME = sw.contigs.names[sw.cur_ref]
    POS = POS-sw.contigs.offsets[sw.cur_ref]
    if POS < 0
        write(STDERR, "ERROR: reads need to be fed in order")
        return -1 
    end
    
    MAPQ = MAPQ
    if LENGTH == 0
        CIGAR = "*"
    else
        CIGAR = "$(LENGTH)M"
    end
    RNEXT = "*"
    PNEXT = 0
    TLEN = 0
    SEQ = "*"
    QUAL = "*"
    
    output = "$(QNAME)\t$(FLAG)\t$(RNAME)\t$(POS)\t$(MAPQ)\t$(CIGAR)\t$(RNEXT)\t$(PNEXT)\t$(TLEN)\t$(SEQ)\t$(QUAL)\n"
    write(sw.Outstream, output)
end

writeRead (generic function with 1 method)

In [6]:
sw = SamWriter(STDOUT, ReferenceContigs_hg38)
writeRead(sw, 100, 16)
writeRead(sw, 500000000, 16)

@HD	VN:1.0	SO:coordinate
@SQ	SN:chr1	LN:248956422
@SQ	SN:chr10	LN:133797422
@SQ	SN:chr11	LN:135086622
@SQ	SN:chr11_KI270721v1_random	LN:100316
@SQ	SN:chr12	LN:133275309
@SQ	SN:chr13	LN:114364328
@SQ	SN:chr14	LN:107043718
@SQ	SN:chr14_GL000009v2_random	LN:201709
@SQ	SN:chr14_GL000225v1_random	LN:211173
@SQ	SN:chr14_KI270722v1_random	LN:194050
@SQ	SN:chr14_GL000194v1_random	LN:191469
@SQ	SN:chr14_KI270723v1_random	LN:38115
@SQ	SN:chr14_KI270724v1_random	LN:39555
@SQ	SN:chr14_KI270725v1_random	LN:172810
@SQ	SN:chr14_KI270726v1_random	LN:43739
@SQ	SN:chr15	LN:101991189
@SQ	SN:chr15_KI270727v1_random	LN:448248
@SQ	SN:chr16	LN:90338345
@SQ	SN:chr16_KI270728v1_random	LN:1872759
@SQ	SN:chr17	LN:83257441
@SQ	SN:chr17_GL000205v2_random	LN:185591
@SQ	SN:chr17_KI270729v1_random	LN:280839
@SQ	SN:chr17_KI270730v1_random	LN:112551
@SQ	SN:chr18	LN:80373285
@SQ	SN:chr19	LN:58617616
@SQ	SN:chr1_KI270706v1_random	LN:175055
@SQ	SN:chr1_KI270707v1_random	LN:32032
@SQ	SN:chr1_KI270708v1_random	LN:127682
@SQ

42