# Anotações, features e qualifiers

ATP10A ATPase phospholipid transporting 10A (putative) [Homo sapiens (human)]

In [1]:
from Bio import Seq
from Bio import SeqIO

In [2]:
G = SeqIO.read("sequence1.gb", "genbank")
print('ID:',G.id)
print('Name:',G.name)
print('Size:',len(G.seq))
print('Description:',G.description)
print('Seq:',G.seq)

ID: NC_000015.10
Name: NC_000015
Size: 192852
Description: Homo sapiens chromosome 15, GRCh38.p14 Primary Assembly
Seq: GCCTCAAGGGACCTCTGCCTGAGGAATCAGCATTTGGCTTTGTAAGCACCAGGAGTACTCCTGGAAGCTCCAGGGTAAGCAAATGGAATGAGGCGGTAATTACTTTGGCAACAGTGTGGACTGGCTGAAGCTGGGAGTCTGGTAGGCGCGGACACCCCTGGAGACTTTGGGCGAAGGGTCCACGGGGATGCCTGTGAAGACAGGAGGCCGAAGTGTAGGGAGGCCGAAGGCCTCGGGGGTCACCAGGTGGGCTGGTAGAGGAAAACACGGAGCCATGGGATGTGAGGCCTCAAGACTGCAGGCGCTGTGGGCCTTCATTTGCAGTTGATGCTCCGTTTGGGTTTCTGCTGGTTGACTGTGAGACCTGACTCCTTTCCCCAACAATGCAGACTTCAGCCAAACCTTTCCCTGATCTGGGCCTCTACATGTCAAAAGCCCACCCCTGTGACTGGGGGATGGGATTATTTTCTTCACACCTGTCACCACTCTACCTTCTAACACTGGGTTCTGGGCAACATTGTGGATGGTGGCAAAACTGGGCAGAGATGTAAGAATGTCATAACTGGAGCCGGGTAAGCCAGGGTGCTGGAGGGGAGGGCCTCCCCAAGCCAGGGTCCTGGCGAAGACGTGATTTGAGTCCATCCAGGGCAGTGCCTTGTTAGCAGGCACCCCCAGGAAGAGCCTGGGGCCACGGCCAACCGCCCAGCCACGGCCTCCTGGGCAAGAAATAACTGCTTCTCATCTCTGTACTTCTTGAGTGGGAGGCAAACAGTCACAGCCATGAAACATCTGTGGGGTCTCACTGCGGACGTGGGGGACTCCCAGCTCCTTTTTCCATGCCGTGCTTATTCTCCCACCCCTTCTCGGATGCCCCATGATG

In [3]:
#Anotações
annotations=G.annotations
print('Acession Code:',G.annotations['accessions'])
print('Moleculer type:',G.annotations['molecule_type'])
print('Topology:',G.annotations['topology'])
print('Organism:',G.annotations['organism'])
print('Taxonomy:',G.annotations['taxonomy'])
print('Date:',G.annotations['date'])
print('Version:',G.annotations['sequence_version'])
print('Keywords:',G.annotations['keywords'])
print('Source:',G.annotations['source'])
print('References:',G.annotations['references'])
print('Comments:',G.annotations['comment'])

Acession Code: ['NC_000015', 'REGION:', 'complement(25672237..25865088)']
Moleculer type: DNA
Topology: linear
Organism: Homo sapiens
Taxonomy: ['Eukaryota', 'Metazoa', 'Chordata', 'Craniata', 'Vertebrata', 'Euteleostomi', 'Mammalia', 'Eutheria', 'Euarchontoglires', 'Primates', 'Haplorrhini', 'Catarrhini', 'Hominidae', 'Homo']
Date: 07-OCT-2023
Version: 10
Keywords: ['RefSeq']
Source: Homo sapiens (human)
References: [Reference(title='Analysis of the DNA sequence and duplication history of human chromosome 15', ...), Reference(title='Finishing the euchromatic sequence of the human genome', ...), Reference(title='Initial sequencing and analysis of the human genome', ...)]
Comments: REFSEQ INFORMATION: The reference sequence is identical to
CM000677.2.
On Feb 3, 2014 this sequence version replaced NC_000015.9.
Assembly Name: GRCh38.p14 Primary Assembly
The DNA sequence is composed of genomic sequence, primarily
finished clones that were sequenced as part of the Human Genome
Project. PCR 

In [4]:
# Features/Qualifiers

print("O número de features é: ",len(G.features),)

print("Tipo e localização")
for f in G.features: 
    print(f.type, f.location)

O número de features é:  28
Tipo e localização
source [0:192852](+)
gene [0:192852](+)
mRNA join{[0:74](+), [1886:2441](+), [83865:84070](+), [128947:129033](+), [137822:137929](+), [139006:139138](+), [141067:141198](+), [143179:143432](+), [146689:146907](+), [148164:148359](+), [150847:151415](+), [156788:156892](+), [156986:157113](+), [162988:163173](+), [169942:170270](+), [173297:173374](+), [177260:177386](+), [181602:181803](+), [184014:184095](+), [184174:184279](+), [184780:184968](+), [185114:186377](+)}
mRNA join{[1115:1458](+), [1886:2441](+), [83865:84070](+), [128947:129033](+), [137822:137929](+), [139006:139138](+), [141067:141198](+), [143179:143432](+), [146689:146907](+), [148164:148359](+), [150847:151415](+), [156788:156892](+), [156986:157113](+), [162988:163173](+), [169942:170270](+), [173297:173374](+), [177260:177386](+), [181602:181803](+), [184014:184095](+), [184174:184279](+), [184780:184968](+), [185114:186377](+)}
mRNA join{[1761:2441](+), [83865:84070

In [5]:
print("Índice das sequências codificantes")
feat_cds = []
for f in range(len(G.features)):
    if G.features[f].type == "CDS":
        feat_cds.append(f)
print(feat_cds)

Índice das sequências codificantes
[7, 8, 9, 10, 23, 24, 25, 26, 27]


In [6]:
#Gene ARHGAP24
print ("Qualifiers de CDS")
for f in range (len(G.features)):
    if G.features[f].type=="CDS":
        print (G.features[f].qualifiers)

Qualifiers de CDS
OrderedDict([('gene', ['ATP10A']), ('gene_synonym', ['ATP10C; ATPVA; ATPVC']), ('note', ['Derived by automated computational analysis using gene prediction method: Gnomon.']), ('codon_start', ['1']), ('product', ['phospholipid-transporting ATPase VA isoform X2']), ('protein_id', ['XP_011520130.1']), ('db_xref', ['GeneID:57194', 'HGNC:HGNC:13542', 'MIM:605855']), ('translation', ['MEREPAGTEEPGPPGRRRRREGRTRTVRSNLLPPPGAEDPAAGAAKGERRRRRGCAQHLADNRLKTTKYTLLSFLPKNLFEQFHRPANVYFVFIALLNFVPAVNAFQPGLALAPVLFILAITAFRDLWEDYSRHRSDHKINHLGCLVFSREEKKYVNRFWKEIHVGDFVRLRCNEIFPADILLLSSSDPDGLCHIETANLDGETNLKRRQVVRGFSELVSEFNPLTFTSVIECEKPNNDLSRFRGCIIHDNGKKAGLYKENLLLRGCTLRNTDAVVGIVIYAGHETKALLNNSGPRYKRSKLERQMNCDVLWCVLLLVCMSLFSAVGHGLWIWRYQEKKSLFYVPKSDGSSLSPVTAAVYSFLTMIIVLQVLIPISLYVSIEIVKACQVYFINQDMQLYDEETDSQLQCRALNITEDLGQIQYIFSDKTGTLTENKMVFRRCTVSGVEYSHDANAQRLARYQEADSEEEEVVPRGGSVSQRGSIGSHQSVRVVHRTQSTKSHRRTGSRAEAKRASMLSKHTAFSSPMEKDITPDPKLLEKVSECDKSLAVARHQEHLLAHLSPELSDVFDFFIALTICNTVVVTSPDQPRTKVRVRFEL

In [10]:
feat=G.features

In [12]:
for i in range(len(feat)):
    if feat[i].type == "CDS":
        feat_cds.append(i)
for k in feat_cds:
    print ('Location:',feat[k].location)

Location: join{[1992:2441](+), [83865:84070](+), [128947:129033](+), [137822:137929](+), [139006:139138](+), [141067:141198](+), [143179:143432](+), [146689:146907](+), [148164:148359](+), [150847:151415](+), [156788:156892](+), [156986:157113](+), [162988:163173](+), [169942:170270](+), [173297:173374](+), [177260:177386](+), [181602:181803](+), [184014:184095](+), [184174:184279](+), [184780:184968](+), [185114:185490](+), [191676:191685](+)}
Location: join{[1992:2441](+), [83865:84070](+), [128947:129033](+), [137822:137929](+), [139006:139138](+), [141067:141198](+), [143179:143432](+), [146689:146907](+), [148164:148359](+), [150847:151415](+), [156788:156892](+), [156986:157113](+), [162988:163173](+), [169942:170270](+), [173297:173374](+), [177260:177386](+), [181602:181803](+), [184014:184095](+), [184174:184279](+), [184780:184968](+), [185114:185748](+)}
Location: join{[1992:2441](+), [83865:84070](+), [128947:129033](+), [137822:137929](+), [139006:139138](+), [141067:14119

In [14]:
for k in feat_cds:
    print('Start codons:',feat[k].qualifiers['codon_start'])
    print('Stop codons:', feat[k].qualifiers['note'])
    print('Protein ID:',feat[k].qualifiers['protein_id'])
    print('Product:',feat[k].qualifiers['product'])
    print('Translation:',feat[k].qualifiers['translation'])

Start codons: ['1']
Stop codons: ['Derived by automated computational analysis using gene prediction method: Gnomon.']
Protein ID: ['XP_011520130.1']
Product: ['phospholipid-transporting ATPase VA isoform X2']
Translation: ['MEREPAGTEEPGPPGRRRRREGRTRTVRSNLLPPPGAEDPAAGAAKGERRRRRGCAQHLADNRLKTTKYTLLSFLPKNLFEQFHRPANVYFVFIALLNFVPAVNAFQPGLALAPVLFILAITAFRDLWEDYSRHRSDHKINHLGCLVFSREEKKYVNRFWKEIHVGDFVRLRCNEIFPADILLLSSSDPDGLCHIETANLDGETNLKRRQVVRGFSELVSEFNPLTFTSVIECEKPNNDLSRFRGCIIHDNGKKAGLYKENLLLRGCTLRNTDAVVGIVIYAGHETKALLNNSGPRYKRSKLERQMNCDVLWCVLLLVCMSLFSAVGHGLWIWRYQEKKSLFYVPKSDGSSLSPVTAAVYSFLTMIIVLQVLIPISLYVSIEIVKACQVYFINQDMQLYDEETDSQLQCRALNITEDLGQIQYIFSDKTGTLTENKMVFRRCTVSGVEYSHDANAQRLARYQEADSEEEEVVPRGGSVSQRGSIGSHQSVRVVHRTQSTKSHRRTGSRAEAKRASMLSKHTAFSSPMEKDITPDPKLLEKVSECDKSLAVARHQEHLLAHLSPELSDVFDFFIALTICNTVVVTSPDQPRTKVRVRFELKSPVKTIEDFLRRFTPSCLTSGCSSIGSLAANKSSHKLGSSFPSTPSSDGMLLRLEERLGQPTSAIASNGYSSQADNWASELAQEQESERELRYEAESPDEAALVYAARAYNCVLVERLHDQVSVELPHLGRLTFELLHTLGFDSVRKRMSVVIRHPLTDEINVYTKGADSVVMDL