In [1]:
import pandas as pd
import requests

In [2]:
# Load the Excel file with chromosome and position data
file_path = 'Input.xlsx'
df = pd.read_excel(file_path)


In [3]:
# UCSC Table Browser URL for DNA retrieval (configured for mm10)
url = "https://genome.ucsc.edu/cgi-bin/das/mm10/dna"

In [4]:
# Initialize the FASTA output
fasta_output = []


In [7]:
# Iterate through the data
for index, row in df.iterrows():
    try:
        # Extract chromosome and position
        chromosome = str(row['chr'])
        position = int(row['position'])

        # Define the range for 150 bp upstream and downstream
        start = position - 150
        end = position + 150

        # Construct the request parameters
        params = {
            "segment": f"chr{chromosome}:{start},{end}"
        }

        # Query the UCSC API
        response = requests.get(url, params=params)

        if response.status_code == 200:
            # Parse the DNA sequence from the response
            sequence_lines = response.text.splitlines()
            sequence = "".join(sequence_lines[2:])  # Skip the header lines

            # Format the result in FASTA
            fasta_header = f">chr{chromosome}:{start}-{end}"
            fasta_sequence = "\n".join([sequence[i:i+60] for i in range(0, len(sequence), 60)])  # Wrap at 60 characters
            fasta_output.append(f"{fasta_header}\n{fasta_sequence}")
        else:
            # Add an error message in place of the sequence
            fasta_header = f">chr{chromosome}:{start}-{end}"
            fasta_output.append(f"{fasta_header}\nError: HTTP {response.status_code}")

    except Exception as e:
        # Handle any processing errors
        fasta_header = f">chr{chromosome}:{start}-{end}"
        fasta_output.append(f"{fasta_header}\nError: {str(e)}")

# Save the FASTA output to a file
output_path = "DNA_Sequences_API_Output.fasta"
with open(output_path, "w") as fasta_file:
    fasta_file.write("\n".join(fasta_output))

In [None]:
print(f"DNA sequences saved in FASTA format to {output_path}")