In [3]:
! pip install biopython

Collecting biopython
  Downloading biopython-1.83-cp38-cp38-win_amd64.whl (2.7 MB)
Installing collected packages: biopython
Successfully installed biopython-1.83


In [12]:
# Importing SeqIO module from Biopython to parse FASTA files
from Bio import SeqIO

"""
This function reads a FASTA file, modifies the 30th nucleotide in each sequence according to the variant
specified in the header, and writes the modified sequences to a new FASTA file.
"""

def process_fasta_file(input_file, output_file):
 # Open the input FASTA file for reading and output FASTA file for writing
    with open(input_file, "r") as fasta_in, open(output_file, "w") as fasta_out:
        # Iterate over each record (sequence) in the FASTA file
        for record in SeqIO.parse(fasta_in, "fasta"):
            #Convert the sequence object to a string
            sequence = str(record.seq)

            # Ensure that the sequence is at least 30 nucleotides long
            if len(sequence) >= 30:
                original_nucleotide = sequence[29] # 30th nucleotide (0-based index)
                variant_nucleotide = record.id[-1] # Assuming the variant is the last character in the ID as shown in the example

                # Replace the 30th nucleotide with the variant
                modified_sequence = sequence[:29] + variant_nucleotide + sequence[30:]

                # Update the header to include the original nucleotide before the variant
                new_id = f"{record.id[:-1]}_{original_nucleotide}{variant_nucleotide}"

                # Write to the output file
                fasta_out.write(f">{new_id}\n{modified_sequence}\n")

if __name__ == "__main__":

 # Process the FASTA file
 process_fasta_file('ngs.fa/ngs.fa', 'ngs_variants.fa')