# code for using the HTSeq Library

### The Python code for importing the HTSeq library (First, it is to be downloaded/installed)


In [2]:
# install library
!pip install HTSeq

from HTSeq import FastqReader, SAM_Reader, GFF_Reader
from HTSeq import GenomicArray, GenomicInterval 

from matplotlib import pyplot
from numpy import array




### The Python code for importing a FASTQ-format sequence including a header file





In [4]:
def fastq_import(fastq_file):
    fileObj = FastqReader(fastq_file)
    
    for seqRead in fileObj:
        print(seqRead.name)
        print(seqRead.seq)
        print(seqRead.get_reverse_complement()[::-1])
        

### The Python code for reading a genome alignment file



In [5]:
def read_align(align_file):
    chromosomes = set()
    
    for alignment in SAM_Reader(alignFile):
        
      if alignment.aligned:
        seqRead = alignment.read
        print(seqRead.name)
        print(seqRead.seq)
        
    genomeRegion = alignment.iv
    chromo = genomeRegion.chrom
    strand = genomeRegion.strand
    start = genomeRegion.start
    end = genomeRegion.end
    chromosomes.add(chromo)
    print(chromo, start, end, strand)
    
    chromosomes = list(chromosomes)
    hitMap = GenomicArray(chromosomes, stranded=True, typecode='i')
    
    
    for alignment in SAM_Reader(alignFile):
        
      if alignment.aligned:
        genomeRegion = alignment.iv
        
        if genomeRegion.strand == '+':
          hitMap[genomeRegion] = 1
        else:
          hitMap[genomeRegion] = -1
        
    chromo = chromosomes[0]
    endPoint = 2000000
    plusStrand  = GenomicInterval(chromo, 0, endPoint, '+')
    minusStrand = GenomicInterval(chromo, 0, endPoint, '-')
    bothStrands = GenomicInterval(chromo, 0, endPoint, '.')
    
    pyplot.plot(list(hitMap[plusStrand]))
    pyplot.plot(list(hitMap[minusStrand]))
    pyplot.show()


### The Python code for annotating a high-throughput sequence file

In [6]:
def annotate_htseq(gff_file):
    fileObj = GFF_Reader(gff_file)
    
    for genomeFeature in fileObj:
        genomeRegion = genomeFeature.iv
        data = (genomeRegion.chrom,
                genomeRegion.start,
                genomeRegion.end,
                genomeRegion.strand)
        
        print('%s %s - %s (%s)' % data)
        
        data = (genomeFeature.name,
                genomeFeature.type,
                genomeFeature.source)
        
        print('%s %s (%s)' % data)
        print(genomeFeature.attr)
        