# Reading and Writing Sequences

In [3]:
from Bio import SeqIO

# Read sequences from a FASTA file
sequences = SeqIO.parse("sequence.fasta", "fasta")

# Iterate over the sequences and print their IDs and lengths
for sequence in sequences:
    print("ID:", sequence.id)
    print("Length:", len(sequence))


ID: NP_197847.3
Length: 451


# Calculating Sequence Properties

In [4]:
from Bio.SeqUtils import GC

sequence = "ATGCTGACTAGCTAGCTAGC"
gc_content = GC(sequence)
print(f"GC content: {gc_content}%")

GC content: 50.0%


# Sequence Alignment

In [5]:
from Bio import pairwise2

seq1 = "ACGT"
seq2 = "AGT"

alignments = pairwise2.align.globalxx(seq1, seq2)

for alignment in alignments:
    print("Aligned Sequence 1:", alignment.seqA)
    print("Aligned Sequence 2:", alignment.seqB)
    print("Score:", alignment.score)
    print()

Aligned Sequence 1: ACGT
Aligned Sequence 2: A-GT
Score: 3.0



# Local Alignment

In [1]:
from Bio import pairwise2
from Bio.pairwise2 import format_alignment

# Define the scoring system (match, mismatch, gap open, gap extension)
match_score = 2
mismatch_score = -1
gap_open_score = -5
gap_extension_score = -0.5

# Sample sequences (replace these with your own sequences)
seq1 = "ATCGAGCTAGC"
seq2 = "ATGGGCTAGC"

# Perform local alignment
alignments = pairwise2.align.localms(seq1, seq2, match_score, mismatch_score, gap_open_score, gap_extension_score)

# Print the alignments
for alignment in alignments:
    print(format_alignment(*alignment))


4 GAGCTAGC
  |.||||||
3 GGGCTAGC
  Score=13



# Gloabal Alignment

In [2]:
from Bio import pairwise2
from Bio.pairwise2 import format_alignment

# Define the scoring system (match, mismatch, gap open, gap extension)
match_score = 2
mismatch_score = -1
gap_open_score = -5
gap_extension_score = -0.5

# Sample sequences (replace these with your own sequences)
seq1 = "ATCGAGCTAGC"
seq2 = "ATGGGCTAGC"

# Perform global alignment
alignments = pairwise2.align.globalms(seq1, seq2, match_score, mismatch_score, gap_open_score, gap_extension_score)

# Print the alignments
for alignment in alignments:
    print(format_alignment(*alignment))


ATCGAGCTAGC
|| |.||||||
AT-GGGCTAGC
  Score=12

ATCGAGCTAGC
||.| ||||||
ATGG-GCTAGC
  Score=12



## Performing Multiple Sequence Alignment (MSA)

In [8]:
from Bio.Align import MultipleSeqAlignment
from Bio import AlignIO
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord

# Define the sequences
sequences = [
    SeqRecord(Seq("ATGCTGACTAGCTAGCTAGC"), id="Seq1"),
    SeqRecord(Seq("ATGCTGACTAGCTAGCTGGC"), id="Seq2"),
    SeqRecord(Seq("ATGCTGACTAGCTAGCTTGC"), id="Seq3")
]

# Create a MultipleSeqAlignment object
alignment = MultipleSeqAlignment(sequences)

# Save the alignment to a file
AlignIO.write(alignment, "alignment.fasta", "fasta")

# Print the alignment
print(alignment)

Alignment with 3 rows and 20 columns
ATGCTGACTAGCTAGCTAGC Seq1
ATGCTGACTAGCTAGCTGGC Seq2
ATGCTGACTAGCTAGCTTGC Seq3


# Searching for Motifs

In [7]:
import re
from Bio.Seq import Seq

sequence = Seq("ATGCTGACTAGCTAGCTAGC")
motif = "CTG"

matches = [match.start() for match in re.finditer(motif, str(sequence))]
print("Motif Matches:", ", ".join(map(str, matches)))

Motif Matches: 3


# Transcription and Translation

In [8]:
from Bio.Seq import Seq

dna_sequence = Seq("ATGCTGACTAGCTAGCTAGC")
rna_sequence = dna_sequence.transcribe()
protein_sequence = rna_sequence.translate()

print("DNA Sequence:", dna_sequence)
print("RNA Sequence:", rna_sequence)
print("Protein Sequence:", protein_sequence)

DNA Sequence: ATGCTGACTAGCTAGCTAGC
RNA Sequence: AUGCUGACUAGCUAGCUAGC
Protein Sequence: MLTS*L




# Calculating Molecular Weight

In [9]:
# Importing necessary functions and classes from Biopython
from Bio.SeqUtils import molecular_weight
from Bio.Seq import Seq

# Defining a DNA sequence using the Seq class
sequence = Seq("ATGCTGACTAGCTAGCTAGC")

# Calculating the molecular weight of the DNA sequence using the molecular_weight function
weight = molecular_weight(sequence)

# Printing the calculated molecular weight
print("Molecular Weight:", weight)

Molecular Weight: 6196.952300000001


# Blast for Organism Identification

In [3]:
from Bio.Blast import NCBIWWW
from Bio.Blast import NCBIXML
from Bio import Entrez
import re

# Set the BLAST parameters
program = "blastp"  # BLAST program (e.g., blastp, blastn, etc.)
database = "nr"  # BLAST database (e.g., nr, nt, etc.)
query_sequence = "JAO23553.1"  # Query sequence accession or identifier

# Perform BLAST search
result_handle = NCBIWWW.qblast(program, database, query_sequence)

# Read and parse the BLAST results
blast_record = NCBIXML.read(result_handle)

# Get the description of the top hit
top_hit_description = blast_record.alignments[0].title

# Extract the organism information from the description using regular expressions
organism_match = re.search(r'\[.*?\]', top_hit_description)
if organism_match:
    organism = organism_match.group(0).strip("[]")
else:
    organism = "Organism information not available"

# Print the organism information
print("Organism:", organism)

# Save the BLAST results to a file
with open("blast_results.xml", "w") as out_handle:
    out_handle.write(result_handle.read())

# Close the result handle
result_handle.close()


Organism: Poeciliopsis prolifica
