In [2]:
# 1. Download the genome sequence of Baker’s Yeast (Saccharomyces cerevisiae)
# Set up an email address for NCBI access.
# Use the Biopython library to search for the genome sequence of Saccharomyces cerevisiae in the NCBI nucleotide database.
# Download the genome record and save it to a GenBank file-yeast_genome.gb

In [3]:
from Bio import Entrez, SeqIO

def download_genome_sequence(species, output_file):
    Entrez.email = "sargun22450@iiitd.ac.in" 
    
    # Search for the genome using the species name
    search_handle = Entrez.esearch(db="nucleotide", term=species, idtype="acc")
    record = Entrez.read(search_handle)
    search_handle.close()

    # Fetch the genome record
    if record['IdList']:
        genome_id = record['IdList'][0]
        genbank_handle = Entrez.efetch(db="nucleotide", id=genome_id, rettype="gb", retmode="text")
        
        # Save the genome sequence to a file
        with open(output_file, "w") as f:
            f.write(genbank_handle.read())
        genbank_handle.close()
        print(f"Genome sequence downloaded and saved to {output_file}")
    else:
        print(f"No genome sequence found for {species}")

# Example usage
download_genome_sequence("Saccharomyces cerevisiae", "yeast_genome.gb")


Genome sequence downloaded and saved to yeast_genome.gb


In [None]:
# 2. Find the Origin of Replication in the Yeast Genome
 
# a. Read the yeast genome sequence from the downloaded GenBank file.
# b. Define an Autonomously Replicating Sequence (ARS) consensus sequence.
# c. Use a custom nucleotide search function to find the positions of the ARS consensus sequence in the genome.
# d. Check for the presence of additional features associated with ARS near the identified positions.
# e. Output the positions of the ARS consensus sequence and any additional features found.

# Read the Genome Sequence:
# Read the yeast genome sequence from a GenBank file.

# Define ARS Consensus Sequence:
# Define an ARS consensus sequence. This is a short DNA sequence that is commonly found in ARS.

# Define Additional Features:
# Define additional features or motifs associated with ARS. These could be specific sequences or motifs known to be present near ARS.

# Search for ARS Consensus Sequence:
# Use a custom nucleotide search function (nt_search) to find the positions of the ARS consensus sequence in the yeast genome.

# Check for Additional Features:
# Check for the presence of additional features near the identified positions of the ARS consensus sequence. This step ensures
#  that the identified region has characteristics commonly associated with ARS.

# Output Results:
# Print the positions of the ARS consensus sequence and any additional features found. If no ARS consensus sequence is found,
#  print a message indicating that.
 
# This logic combines the search for the ARS consensus sequence with the verification of additional features to
#     increase the likelihood of identifying an Autonomously Replicating Sequence in the yeast genome.

In [5]:
from Bio import SeqIO
from Bio.SeqUtils import nt_search
def find_ars(genome_file):
    # Read the genome sequence from the GenBank file
    with open(genome_file, "r") as f:
        record = SeqIO.read(f, "genbank")

    # Extract the DNA sequence and convert it to a string
    dna_sequence = str(record.seq)

    # ARS consensus sequence in S. cerevisiae
    ars_consensus = "WTTTAYRTTTW"

    # Additional features associated with ARS
    additional_features = ["AATAAA", "TTTTT", "GCGC"]

    # Search for the ARS consensus sequence in the DNA sequence
    ars_matches = nt_search(dna_sequence, ars_consensus)

    # Check if any matches were found for the ARS consensus sequence
    if ars_matches:
        # Extract positions from the list (excluding the ARS consensus sequence itself)
        positions = [pos for pos in ars_matches if isinstance(pos, int)]

        # Print the ARS consensus sequence and its positions
        print(f"ARS consensus sequence ({ars_consensus}) found at positions: {positions}")

        # Check if additional features are present near the ARS consensus sequence
        for feature in additional_features:
            feature_matches = nt_search(dna_sequence, feature)
            if any(isinstance(pos, int) and pos in range(positions[0] - 20, positions[0] + 20) for pos in feature_matches):
                print(f"Additional feature ({feature}) found near the ARS consensus sequence.")

    else:
        # Print a message if no ARS consensus sequence was found in the given sequence
        print("No ARS consensus sequence found in the given sequence.")

# Example usage
find_ars("yeast_genome.gb")


ARS consensus sequence (WTTTAYRTTTW) found at positions: [56946, 65503, 68265]
Additional feature (TTTTT) found near the ARS consensus sequence.
