In [60]:
# q1
from Bio import Entrez, SeqIO 
def download_save(accession_number, output_file):
    Entrez.email = "sargun22450@iiitd.ac.in" 
    handle = Entrez.efetch(db="nucleotide", id=accession_number, rettype="fasta", retmode="text")
    sequence = handle.read()
    
    with open(output_file, "w") as f:
        f.write(sequence)

accession_number =  "MK868028.1" 
 
output_file = "pdc.fasta"
 
download_save(accession_number, output_file)

In [61]:
def extract_snps(blast_file):
    with open(blast_file, 'r') as file:
        lines = file.readlines()

    snps = []
    subject_sequences = {}  # Dictionary to store subject sequences

    for line in lines:
        if line.startswith("Query"):
            query_sequence = line.split()[-2]
        elif line.startswith("Sbjct"):
            subject_sequence = line.split()[-2]
            subject_id = line.split()[1]  # Assuming subject ID is in the line
            for q, s in zip(query_sequence, subject_sequence):
                if q != s or q == '-':  # Check for differences and gaps in query sequence
                    position = int(line.split()[1])
                    snps.append({"position": position, "query_base": q, "subject_base": s})
            # Store subject sequence based on subject ID
            if subject_id in subject_sequences:
                subject_sequences[subject_id] += subject_sequence
            else:
                subject_sequences[subject_id] = subject_sequence

    # Convert subject sequences into a list
    subject_sequences_list = [{"subject_id": sid, "sequence": seq} for sid, seq in subject_sequences.items()]

    return snps, subject_sequences_list

# Example usage
blast_file = "blast_results.txt"
identified_snps, subject_sequences = extract_snps(blast_file)

# Print identified SNPs
for snp in identified_snps:
    print(f"SNP at position {snp['position']} - Query base: {snp['query_base']}, Subject base: {snp['subject_base']}")

# Print subject sequences
for subject_seq in subject_sequences:
    print(f"Subject ID: {subject_seq['subject_id']}, Sequence: {subject_seq['sequence']}")


SNP at position 233637 - Query base: G, Subject base: A
SNP at position 233517 - Query base: A, Subject base: C
SNP at position 233517 - Query base: A, Subject base: T
SNP at position 233457 - Query base: G, Subject base: A
SNP at position 233397 - Query base: A, Subject base: G
SNP at position 233277 - Query base: A, Subject base: T
SNP at position 233217 - Query base: T, Subject base: C
SNP at position 233217 - Query base: T, Subject base: C
SNP at position 233217 - Query base: G, Subject base: A
SNP at position 233217 - Query base: A, Subject base: G
SNP at position 233217 - Query base: T, Subject base: C
SNP at position 233217 - Query base: A, Subject base: T
SNP at position 233217 - Query base: T, Subject base: C
SNP at position 233157 - Query base: A, Subject base: T
SNP at position 233157 - Query base: A, Subject base: G
SNP at position 233097 - Query base: C, Subject base: A
SNP at position 233097 - Query base: A, Subject base: C
SNP at position 233097 - Query base: G, Subject 

In [62]:
import requests
from Bio.SeqUtils.ProtParam import ProteinAnalysis
 
def translate_dna_to_protein(dna_sequence):
    url = "https://web.expasy.org/cgi-bin/translate/dna2aa.cgi"
    params = {
        'dna_sequence': dna_sequence,
        'output_format': 'fasta'
    }
    response = requests.post(url, data=params)
    return response.text.strip()

 
print(subject_sequences)


def perform_expasy_analysis(subject_sequences):
    expasy_results = []
    protein_sequences = []  # Initialize a list to store protein sequences
    for sequence in subject_sequences:
        dna_sequence = sequence["sequence"]
        protein_sequence = translate_dna_to_protein(dna_sequence)
        print(protein_sequence)
        # Append the translated protein sequence to the list
        protein_sequences.append(protein_sequence) 
        if not protein_sequence:
            print(f"Skipping sequence {sequence['subject_id']} due to translation error")
            continue
        
        # Check if the protein sequence contains invalid characters
        invalid_chars = set(protein_sequence) - set("ACDEFGHIKLMNPQRSTVWY")
        if invalid_chars:
            print(f"Skipping sequence {sequence['subject_id']} due to invalid protein sequence: {invalid_chars}")
            continue
        
        subject_id = sequence["subject_id"]
        protein_analysis = ProteinAnalysis(protein_sequence)
        molecular_weight = protein_analysis.molecular_weight()
        if molecular_weight is None:
            print(f"Skipping sequence {sequence['subject_id']} due to invalid protein sequence")
            continue
        expasy_result = {
            "subject_id": subject_id,
            "molecular_weight": molecular_weight,
            "aromaticity": protein_analysis.aromaticity(),
            "instability_index": protein_analysis.instability_index(),
            # Add more properties as needed
        }
        expasy_results.append(expasy_result)
    return expasy_results

expasy_results = perform_expasy_analysis(subject_sequences)

# Print ExPASy results
for result in expasy_results:
    print(result)

[{'subject_id': '1', 'sequence': 'ATGTCTGAAATTACTTTGGGTAAATATTTGTTCGAAAGATTAAAGCAAGTCAACGTTAAC'}, {'subject_id': '61', 'sequence': 'ACCGTTTTCGGTTTGCCAGGTGACTTCAACTTGTCCTTGTTGGACAAGATCTACGAAGTT'}, {'subject_id': '121', 'sequence': 'GAAGGTATGAGATGGGCTGGTAACGCCAACGAATTGAACGCTGCTTACGCCGCTGATGGT'}, {'subject_id': '181', 'sequence': 'TACGCTCGTATCAAGGGTATGTCTTGTATCATCACCACCTTCGGTGTCGGTGAATTGTCT'}, {'subject_id': '241', 'sequence': 'GCTTTGAACGGTATTGCCGGTTCTTACGCTGAACACGTCGGTGTTTTGCACGTTGTTGGT'}, {'subject_id': '301', 'sequence': 'GTCCCATCCATCTCTGCTCAAGCTAAGCAATTGTTGTTGCACCACACCTTGGGTAACGGT'}, {'subject_id': '361', 'sequence': 'GACTTCACTGTTTTCCACAGAATGTCTGCCAACGTTTCTGAAACCACTGCTATGATCACT'}, {'subject_id': '421', 'sequence': 'GACATTGCTACCGCCCCAGCTGAAATTGACAGATGTATCAGAACCACTTACGTCACCCAA'}, {'subject_id': '481', 'sequence': 'AGACAAGTCAACTTAGGTTTGCCAGCTAACTTGGTCGACTTGAACGTCCCAGCTAAGTTG'}, {'subject_id': '541', 'sequence': 'TTGCAAACTCCAATTGACATGTCTTTGAAGCCGAACGATGCTGAATCCGAAAAGGAAGTC'}, {'subject_id

KeyboardInterrupt: 