<a href="https://colab.research.google.com/github/mhayeri1994/hello-jupyter/blob/main/biopython_nes.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Install biopython

In [1]:
!pip install biopython

Collecting biopython
  Downloading biopython-1.84-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Downloading biopython-1.84-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.2/3.2 MB[0m [31m5.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: biopython
Successfully installed biopython-1.84


# DNA Sequence Operations

## Create DNA Sequence

In [4]:
from Bio.Seq import Seq

# Create a DNA sequence
dna_sequence = Seq("ACGTACGTAGCTAGCTAGCTA")
print("DNA Sequence:", dna_sequence)

DNA Sequence: ACGTACGTAGCTAGCTAGCTA


## Transcription (DNA to RNA)

In [5]:
# Transcribe DNA to RNA
rna_sequence = dna_sequence.transcribe()
print("RNA Sequence:", rna_sequence)

RNA Sequence: ACGUACGUAGCUAGCUAGCUA


## Translation (RNA to Protein)

In [6]:
# Translate RNA to Protein
protein_sequence = rna_sequence.translate()
print("Protein Sequence:", protein_sequence)

Protein Sequence: TYVAS*L


## Complement and Reverse Complement

In [None]:
# Get the complement of the DNA sequence
complement_sequence = dna_sequence.complement()
print("Complement Sequence:", complement_sequence)

# Get the reverse complement of the DNA sequence
reverse_complement_sequence = dna_sequence.reverse_complement()
print("Reverse Complement Sequence:", reverse_complement_sequence)


## Finding a Subsequence

In [None]:
# Find a subsequence in the DNA sequence
subsequence = "ACGT"
start_index = dna_sequence.find(subsequence)
print("Subsequence found at index:", start_index)


# Reading DNA Sequences from a File

In [None]:
from Bio import SeqIO

# Read sequences from a FASTA file
for record in SeqIO.parse("example.fasta", "fasta"):
    print("ID:", record.id)
    print("Sequence:", record.seq)

# Writing DNA Sequences to a File

In [1]:
from Bio import SeqIO

# Create a new sequence record
new_record = SeqIO.SeqRecord(Seq("ACGTACGTAGCTAGCTAGCTA"), id="example", description="Example DNA sequence")

# Write to a FASTA file
with open("output.fasta", "w") as output_file:
    SeqIO.write(new_record, output_file, "fasta")


# Sample of ...

In [None]:
from Bio import Entrez, SeqIO

# Always use your email when using Entrez
Entrez.email = "user3435@testhost23.com"

# Fetch a sequence by its accession number
accession_number = "NM_001301717"
handle = Entrez.efetch(db="nucleotide", id=accession_number, rettype="gb", retmode="text")
record = SeqIO.read(handle, "genbank")
handle.close()

# Print the sequence
print(record.seq)


CTCTAGATGAGTCAGTGGAGGGCGGGTGGAGCGTTGAACCGTGAAGAGTGTGGTTGGGCGTAAACGTGGACTTAAACTCAGGAGCTAAGGGGGAAACCAATGAAAAGCGTGCTGGTGGTGGCTCTCCTTGTCATTTTCCAGGTATGCCTGTGTCAAGATGAGGTCACGGACGATTACATCGGAGACAACACCACAGTGGACTACACTTTGTTCGAGTCTTTGTGCTCCAAGAAGGACGTGCGGAACTTTAAAGCCTGGTTCCTCCCTATCATGTACTCCATCATTTGTTTCGTGGGCCTACTGGGCAATGGGCTGGTCGTGTTGACCTATATCTATTTCAAGAGGCTCAAGACCATGACCGATACCTACCTGCTCAACCTGGCGGTGGCAGACATCCTCTTCCTCCTGACCCTTCCCTTCTGGGCCTACAGCGCGGCCAAGTCCTGGGTCTTCGGTGTCCACTTTTGCAAGCTCATCTTTGCCATCTACAAGATGAGCTTCTTCAGTGGCATGCTCCTACTTCTTTGCATCAGCATTGACCGCTACGTGGCCATCGTCCAGGCTGTCTCAGCTCACCGCCACCGTGCCCGCGTCCTTCTCATCAGCAAGCTGTCCTGTGTGGGCATCTGGATACTAGCCACAGTGCTCTCCATCCCAGAGCTCCTGTACAGTGACCTCCAGAGGAGCAGCAGTGAGCAAGCGATGCGATGCTCTCTCATCACAGAGCATGTGGAGGCCTTTATCACCATCCAGGTGGCCCAGATGGTGATCGGCTTTCTGGTCCCCCTGCTGGCCATGAGCTTCTGTTACCTTGTCATCATCCGCACCCTGCTCCAGGCACGCAACTTTGAGCGCAACAAGGCCATCAAGGTGATCATCGCTGTGGTCGTGGTCTTCATAGTCTTCCAGCTGCCCTACAATGGGGTGGTCCTGGCCCAGACGGTGGCCAACTTCAACATCACCAGTAGCACCTGTGAGCTCAGTAAGCAACTCAACATCG

In [None]:
len(record.seq)

2191

In [2]:
from Bio import Entrez, SeqIO

# Set your email (NCBI requires an email address for identification)
Entrez.email = "user3435@testhost23.com"

# Search for the HBB gene associated with sickle cell anemia
search_term = "HBB[Gene] AND Homo sapiens[Organism]"
handle = Entrez.esearch(db="nucleotide", term=search_term)
record = Entrez.read(handle)
handle.close()

# Get the first ID from the search results
hbb_id = record["IdList"][0]

# Fetch the sequence using the ID
handle = Entrez.efetch(db="nucleotide", id=hbb_id, rettype="gb", retmode="text")
sequence_record = handle.read()
handle.close()

# Print the sequence record
print(sequence_record)

# Optionally, you can parse the sequence and extract just the nucleotide sequence
from Bio import SeqIO
from io import StringIO

# Use StringIO to read the fetched data
sequence_data = StringIO(sequence_record)
for seq_record in SeqIO.parse(sequence_data, "genbank"):
    print("Gene ID:", seq_record.id)
    print("Description:", seq_record.description)
    print("Sequence:", seq_record.seq)

LOCUS       NG_059281              10106 bp    DNA     linear   PRI 10-OCT-2024
DEFINITION  Homo sapiens hemoglobin subunit beta (HBB), RefSeqGene (LRG_1232)
            on chromosome 11.
ACCESSION   NG_059281
VERSION     NG_059281.1
KEYWORDS    RefSeq; RefSeqGene.
SOURCE      Homo sapiens (human)
  ORGANISM  Homo sapiens
            Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi;
            Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini;
            Catarrhini; Hominidae; Homo.
REFERENCE   1  (bases 1 to 10106)
  AUTHORS   Calvo,S.E., Pagliarini,D.J. and Mootha,V.K.
  TITLE     Upstream open reading frames cause widespread reduction of protein
            expression and are polymorphic among humans
  JOURNAL   Proc Natl Acad Sci U S A 106 (18), 7507-7512 (2009)
   PUBMED   19372376
REFERENCE   2  (bases 1 to 10106)
  AUTHORS   Langer,A.L.
  TITLE     Beta-Thalassemia
  JOURNAL   (in) Adam MP, Feldman J, Mirzaa GM, Pagon RA, Wallace SE and
            Am