In [3]:
from Bio import SeqIO

# Replace 'sequences.fasta' with the path to your FASTA file
for record in SeqIO.parse("sequences.fasta", "fasta"):
    print("ID:", record.id)
    print("Description:", record.description)
    print("Sequence:", record.seq)
    print("Length:", len(record.seq))

# storing the sequence information
sequences = list(SeqIO.parse("sequences.fasta", "fasta"))
print("Total Sequences:", len(sequences))

ID: VDOA01000010.1
Description: VDOA01000010.1 Micrococcus luteus strain DE0571 NODE_10_length_103078_cov_57.143542, whole genome shotgun sequence
Sequence: CTGATGGCCACCAGCGGGAAGAAACACTGGCCGCCTATGGGCAGAACTCAATGGCCCTTGACAGCCGTACTCCGCTGCACCGATGCACCGATGCACCGATGCACCGATGGTGATGATGAGGCCCATCGCCGCAACCACCGGCCAGCCCTTTCTTCCTGACCGAGGAGCTGTCGACAGTGCGTGGCAGGTGCCCGTGCGGACGCGGCCACGGGCTCTCCGGAGGCCGGGCCGGCCCCGCCCCCTCGGACGGTGCAGAGGGGGCGGAGGGGGCCTCGTCCGGCGCCCACCGAAAGTGTCACCCTCACCTCGCCACGTGGACCGTCACAAGCGACGGCCCTGCTGGCCTGCACCCCGTGCGACTCCCCTGCCCGGTCTCTACGTGACGAGGCTCGTTCCGGCCGCTGAGAGCGGCGTAGGAGTCGCGGGCACGGGCATCAACGGGACGCGCGACGGGACGGCCCATTCAAGTGCCGCCGCGGCACGCAGACGTCGTCATGGGCAGCGCGGTGAGGCGCTCCTGGCGCGCCGCTGACTCTGCGCGGCGGTCGATGCCCGGGCCTACGCAGACCACGAGACAGCGGGACGGGCCACCAGGCGAACCCTTCACCCGGGCACCACGCCTGGCCCCCGAGCGGCGAGTAGGCCGGTCGCCGTGACCGGCCCCGCGGCGCAGGAGCTGGCTGCCCGCGACGGCGCCGTGTCGCTCCGCAGCACACGCGGCATGAAAACGCCGCAGGCCGGACCCCAAGGGGTCCGGCCTGCGGCGTTCGACGACGGTCGGGTCAGTGGGCGTACTTGCCCCGAGAGAACTCGTAGACCCACCCGAAGGTGAAGTAGGCGCCC

In [4]:
# calculating GC content 
def gc_content(sequence):
    return 100 * float(sequence.count("G") + sequence.count("C")) / len(sequence)

for record in sequences:
    print(f"GC content of {record.id}: {gc_content(record.seq):.2f}%")

GC content of VDOA01000010.1: 67.45%
GC content of VDOA01000011.1: 76.85%


In [5]:
# transcribing DNA to RNA
for record in sequences:
    rna_seq = record.seq.transcribe()
    print(f"RNA sequence for {record.id} is {rna_seq}")

RNA sequence for VDOA01000010.1 is CUGAUGGCCACCAGCGGGAAGAAACACUGGCCGCCUAUGGGCAGAACUCAAUGGCCCUUGACAGCCGUACUCCGCUGCACCGAUGCACCGAUGCACCGAUGCACCGAUGGUGAUGAUGAGGCCCAUCGCCGCAACCACCGGCCAGCCCUUUCUUCCUGACCGAGGAGCUGUCGACAGUGCGUGGCAGGUGCCCGUGCGGACGCGGCCACGGGCUCUCCGGAGGCCGGGCCGGCCCCGCCCCCUCGGACGGUGCAGAGGGGGCGGAGGGGGCCUCGUCCGGCGCCCACCGAAAGUGUCACCCUCACCUCGCCACGUGGACCGUCACAAGCGACGGCCCUGCUGGCCUGCACCCCGUGCGACUCCCCUGCCCGGUCUCUACGUGACGAGGCUCGUUCCGGCCGCUGAGAGCGGCGUAGGAGUCGCGGGCACGGGCAUCAACGGGACGCGCGACGGGACGGCCCAUUCAAGUGCCGCCGCGGCACGCAGACGUCGUCAUGGGCAGCGCGGUGAGGCGCUCCUGGCGCGCCGCUGACUCUGCGCGGCGGUCGAUGCCCGGGCCUACGCAGACCACGAGACAGCGGGACGGGCCACCAGGCGAACCCUUCACCCGGGCACCACGCCUGGCCCCCGAGCGGCGAGUAGGCCGGUCGCCGUGACCGGCCCCGCGGCGCAGGAGCUGGCUGCCCGCGACGGCGCCGUGUCGCUCCGCAGCACACGCGGCAUGAAAACGCCGCAGGCCGGACCCCAAGGGGUCCGGCCUGCGGCGUUCGACGACGGUCGGGUCAGUGGGCGUACUUGCCCCGAGAGAACUCGUAGACCCACCCGAAGGUGAAGUAGGCGCCCGCGAUGACGCCGAUGAUGAAGAUCCACCAGCCGACAGCCAGACCGGUCACGAUGAUCGCGCAGGCCAGGCCGAGGCCCAGCGGCCACCAGCUCCAGGGAGCGAAGGUGCCGUAGGUGCCGGC

In [7]:
# translating RNA to protein
for record in sequences: 
    protein_seq = record.seq.transcribe().translate() 
    print(f"Protein sequence for {record.id} is {protein_seq}")

Protein sequence for VDOA01000010.1 is LMATSGKKHWPPMGRTQWPLTAVLRCTDAPMHRCTDGDDEAHRRNHRPALSS*PRSCRQCVAGARADAATGSPEAGPAPPPRTVQRGRRGPRPAPTESVTLTSPRGPSQATALLACTPCDSPARSLRDEARSGR*ERRRSRGHGHQRDARRDGPFKCRRGTQTSSWAAR*GAPGAPLTLRGGRCPGLRRPRDSGTGHQANPSPGHHAWPPSGE*AGRRDRPRGAGAGCPRRRRVAPQHTRHENAAGRTPRGPACGVRRRSGQWAYLPRENS*THPKVK*APAMTPMMKIHQPTARPVTMIAQARPRPSGHQLQGAKVP*VPA*SAIWPSASSAGPTPYFRSVTFRYHAIIQDIPPTA*MAGMPAHSVHSTQNP*RVAAAMKNRPMIQKMLAATFTGRPSLRRPQTCSEASAGRAPGGADPAPDARSGCAAGSGSCAAAAGTKWSTRARRRSPTGRPR*PSCHSAR*CRRSRRTGSATRRERRRTPRRTRSSPGSCPPPCSPRSDGACR*HRASAGSGTCRGSPRTASRTGSCRDARSASCR*TSATRRRSRRTSRTPRCRRRRSGSAPRRSTCRTPGSPGAATPG*RRSGHRTGR*P*SRASTASGSRR*SNRATWCRSSSRTSRRSGPR*AWSSRRRTA
Protein sequence for VDOA01000011.1 is VGEPLQLPSASYLLRERPAGLVGTGPSQQRHALLQLDSRP*PRSPPPPPRSTSPWSTPCASSARARSTPSTRPAAASACTARTSEPSRT*WPARRPVSPSRPRTSAPTCTSPGPRPPPSWTAWPPRATSAATGTRVTGGACWWSTRTRPAATDSARSNRCPSG*APPSRGSPPRSCGPRCGCSRRPPPH*RTPPRRTRDPTGAHRPGRPLGVGQDRPRRRARGPDRAVRARRGDRDRVHVPGLGRAGRRGGRRRPLPGRRARAAGAGHGDLDHVGLAPRRP



In [8]:
# generating reverse complement of DNA sequence
for record in sequences:
    rev_comp = record.seq.reverse_complement()
    print(f"Reverse complement sequence for {record.id} is {rev_comp}")

Reverse complement sequence for VDOA01000010.1 is TGCCGTTCTTCTCCGTGATGACCATGCTCATCGCGGTCCCGACCGGCGTGAAGTTCGTGAACTGGATCGGCACCATGTGGCGCGGTTCGATCACCTTCGAGACCCCGATGCTGTGGACGCTCGGCTTCATGGTCACCTTCCTGTTCGGTGGCCTGACCGGCGTCATCCTGGCGTCGCCGCCCCTGGACTTCCAGGTGTCCGACACGTACTTCGTCGTGGCGCACTTCCACTACGTCGTCTTCGGCACCGTGGTGTTCGCGATGTTCGCCGGCTTCTTCTTCTGGTGGCCGAAGTTCACCGGCAAGATGCTGAACGAGCGTCTCGGCAAGATCCAGTTCTGGATGCTGTTCGTGGGCTTCCACGGCACGTTCCTGATCCAGCACTGGCTCGGTGTCATCGGCATGCCCCGTCGCTACGCGGACTACATGGTGGAGGACAGCTTCCAGGCGATGAACGCGTTCTCCTCGGTGTTCTCCTTCGTTCTCGGCGCGTCGCTGATCCCGTTCTTCTGGAACGTCTACATCACCTCGCGCTATGGCAAGAAGGTCACCGTGGACGACCCGTGGGGCTTCGGCGCCTCGCTCGAGTGGACCACTTCGTGCCCGCCGCCGCGGCACAACTTCCACTCCCTGCCGCGCATCCGCTCCGAGCGTCCGGCGCTGGATCTGCACCACCCGGAGCTCTTCCCGCACTCGCCTCAGAACACGTCTGAGGCCTTCGGAGAGAAGGTCGCCCAGTGAAGGTCGCAGCCAACATCTTCTGGATCATCGGCCTGTTCTTCATCGCAGCCGCCACCCTCTACGGGTTCTGGGTCGAGTGGACCGAGTGGGCAGGCATCCCGGCCATCTACGCCGTGGGCGGGATGTCCTGGATGATCGCGTGGTACCTGAACGTCACCGAGCGGAAGTACGGCGTCGGGCCCGCCGATGATGCGGACGGCCAGATCGCCG