In [1]:
from Bio import SeqIO

def get_sequence_lengths(fasta_filepath):
    """
    Extracts sequence lengths from a FASTA file.

    Args:
        fasta_filepath (str): Path to the FASTA file.

    Returns:
        dict: A dictionary where keys are sequence IDs and values are their lengths.
    """
    sequence_lengths = {}
    for record in SeqIO.parse(fasta_filepath, "fasta"):
        sequence_lengths[record.id] = len(record.seq)
    return sequence_lengths

In [12]:
filepath = "data/barcodes/rbcL_fasta.fasta" 
lengths = get_sequence_lengths(filepath)

for seq_id, length in lengths.items():
    print(f"Sequence {seq_id}: Length {length}")

print(lengths.keys())

Sequence PBRU00041911: Length 443
Sequence PBRU00077056: Length 444
Sequence PBRU00021776: Length 484
Sequence PBRU00047050: Length 484
Sequence PBRU00050298: Length 469
Sequence PBRU00078736: Length 494
Sequence PBRU00055711: Length 471
Sequence PBRU00064307: Length 468
Sequence PBRU00071780: Length 468
dict_keys(['PBRU00041911', 'PBRU00077056', 'PBRU00021776', 'PBRU00047050', 'PBRU00050298', 'PBRU00078736', 'PBRU00055711', 'PBRU00064307', 'PBRU00071780'])


In [11]:
filepath = "data/barcodes/trnL_fasta.fasta" 
lengths = get_sequence_lengths(filepath)

for seq_id, length in lengths.items():
    print(f"Sequence {seq_id}: Length {length}")

print(lengths.keys())

Sequence PBRU00064538: Length 400
Sequence PBRU00025308: Length 400
Sequence PBRU00077056: Length 408
Sequence PBRU00073743: Length 457
Sequence PBRU00064307: Length 422
Sequence PBRU00071780: Length 465
Sequence PBRU00050298: Length 474
Sequence PBRU00078736: Length 480
Sequence PBRU00055711: Length 260
Sequence PBRU00047866: Length 290
dict_keys(['PBRU00064538', 'PBRU00025308', 'PBRU00077056', 'PBRU00073743', 'PBRU00064307', 'PBRU00071780', 'PBRU00050298', 'PBRU00078736', 'PBRU00055711', 'PBRU00047866'])
