<a href="https://colab.research.google.com/github/louismathewbt23/Bioinformatics/blob/main/Expt_5_1RV23BT034.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [11]:
import re
from google.colab import files

print("Enter your Genome sequence (e.g., in FASTA format)")

# The files.upload() function is interactive. It will prompt you to upload a file.
# For a non-interactive example or testing, you might comment this out and define `seqfile` directly.
try:
    uploaded = files.upload()
    seqfile = next(iter(uploaded))
    print(f"The Genome is uploaded to your Colab workspace.\nName of the file is: {seqfile}")
except Exception as e:
    print(f"File upload failed or was skipped: {e}")
    print("Using a dummy sequence for demonstration.")
    # Fallback to a dummy file and sequence for demonstration if upload is skipped or fails
    seqfile = 'dummy.fasta'
    with open(seqfile, 'w') as f:
        f.write('>Dummy Genome\nAGATGCGCGAGATTCCGAATTCGTAGGTACCAAGCTTATAGGCGCGAGCATAGATAGCTAGATGCGCGAGATTCCGAATTCGTAGGTACCAAGCTTATAGGCGCGAGC')


def read_fasta(filename):
    """Reads a FASTA file and returns the descriptor and sequence."""
    with open(filename, 'r') as f:
        descriptor = f.readline().strip()
        seq = ''
        for line in f:
            seq += line.strip()
    return descriptor, seq


descriptor, seq = read_fasta(seqfile)
print(f"\nDescriptor: {descriptor}")
print(f"Sequence (first 100 chars): {seq[:100]}...")

Enter your Genome sequence (e.g., in FASTA format)


Saving sequence (1).fasta to sequence (1).fasta
The Genome is uploaded to your Colab workspace.
Name of the file is: sequence (1).fasta

Descriptor: >NC_000019.10:1040107-1065572 Homo sapiens chromosome 19, GRCh38.p14 Primary Assembly
Sequence (first 100 chars): AAGCTCAGCGCACTTGGCTTAAGGGGCGGCGCGCTCCCTGCCTGCTGCTGGGCGGAGGGAAGGCGGCAAGAGCTGCGGAGCCCCTGGAAGGTGAGAAGGA...


In [12]:
def restriction_sites_with_re(seq, recog_seq):
    """Finds restriction sites using regular expressions."""
    sites = []
    for site in re.finditer(recog_seq, seq):
        sites.append(site.start())
    return sites

def restriction_sites(seq, recog_seq):
    """Finds restriction sites by direct string comparison."""
    sites = []
    for i in range(len(seq) - len(recog_seq) + 1):
        if seq[i:i+len(recog_seq)] == recog_seq:
            sites.append(i)
    return sites


print('\n--- Restriction Sites using re.finditer ---')
print('HindIII:', restriction_sites_with_re(seq, 'AAGCTT'))
print('EcoRI:  ', restriction_sites_with_re(seq, 'GAATTC'))
print('KpnI:   ', restriction_sites_with_re(seq, 'GGTACC'))

print('\n--- Restriction Sites using direct comparison ---')
print('HindIII:', restriction_sites(seq, 'AAGCTT'))
print('EcoRI:  ', restriction_sites(seq, 'GAATTC'))
print('KpnI:   ', restriction_sites(seq, 'GGTACC'))


--- Restriction Sites using re.finditer ---
HindIII: []
EcoRI:   []
KpnI:    [7524, 20870]

--- Restriction Sites using direct comparison ---
HindIII: []
EcoRI:   []
KpnI:    [7524, 20870]


In [13]:
def design_simple_primer(sequence, restriction_site_index, primer_length=20):
    """Designs a simple forward primer upstream of a given restriction site.

    Args:
        sequence (str): The DNA sequence.
        restriction_site_index (int): The starting index of the restriction site.
        primer_length (int): The desired length of the primer.

    Returns:
        str: The designed primer sequence, or None if the region is too short.
    """
    # Ensure the restriction site index is valid and there's enough space for the primer
    if restriction_site_index is None or restriction_site_index < primer_length:
        print(f"Warning: Not enough sequence upstream for a {primer_length} bp primer at index {restriction_site_index}.")
        return None

    # Extract the sequence upstream of the restriction site
    start_index = restriction_site_index - primer_length
    primer = sequence[start_index:restriction_site_index]
    return primer


# --- User Selection for Primer Design ---

# Define available restriction enzymes and their recognition sequences
restriction_enzymes = {
    'HindIII': 'AAGCTT',
    'EcoRI': 'GAATTC',
    'KpnI': 'GGTACC'
}

print("\n--- Primer Design based on Restriction Sites ---")
print("Available restriction enzymes:")
for i, enzyme in enumerate(restriction_enzymes.keys()):
    print(f"{i+1}. {enzyme}")

# Get user's choice of enzyme
selected_enzyme = None
while selected_enzyme not in restriction_enzymes.keys():
    choice = input("Enter the number or name of the restriction enzyme you want to use for primer design: ").strip()
    if choice.isdigit() and 1 <= int(choice) <= len(restriction_enzymes):
        selected_enzyme = list(restriction_enzymes.keys())[int(choice) - 1]
    elif choice in restriction_enzymes.keys():
        selected_enzyme = choice
    else:
        print("Invalid choice. Please try again.")

recog_seq = restriction_enzymes[selected_enzyme]
print(f"You selected {selected_enzyme} (recognition sequence: {recog_seq}).")

# Find all sites for the chosen enzyme
chosen_sites = restriction_sites(seq, recog_seq)

if not chosen_sites:
    print(f"No {selected_enzyme} sites found in the sequence. Cannot design primer.")
else:
    print(f"\n{selected_enzyme} sites found at indices: {chosen_sites}")

    selected_site_index = None
    while selected_site_index not in chosen_sites:
        try:
            idx_choice = int(input(f"Enter the index of the {selected_enzyme} site you want to design the primer for: "))
            if idx_choice in chosen_sites:
                selected_site_index = idx_choice
            else:
                print("Invalid index. Please choose one from the list above.")
        except ValueError:
            print("Invalid input. Please enter a number.")

    # Design the primer
    primer_length_input = None
    while primer_length_input is None or primer_length_input <= 0:
        try:
            primer_length_input = int(input("Enter desired primer length (e.g., 20): "))
            if primer_length_input <= 0:
                print("Primer length must be a positive integer.")
        except ValueError:
            print("Invalid input. Please enter a number.")

    designed_primer = design_simple_primer(seq, selected_site_index, primer_length=primer_length_input)

    if designed_primer:
        print(f"\nSuccessfully designed {primer_length_input} bp primer upstream of {selected_enzyme} site at index {selected_site_index}:")
        print(f"Primer: {designed_primer}")
    else:
        print("\nCould not design primer based on the selected site and length. Please ensure there is enough sequence upstream.")



--- Primer Design based on Restriction Sites ---
Available restriction enzymes:
1. HindIII
2. EcoRI
3. KpnI
Enter the number or name of the restriction enzyme you want to use for primer design: 3
You selected KpnI (recognition sequence: GGTACC).

KpnI sites found at indices: [7524, 20870]
Enter the index of the KpnI site you want to design the primer for: 7524
Enter desired primer length (e.g., 20): 20

Successfully designed 20 bp primer upstream of KpnI site at index 7524:
Primer: GCTCTACGGCCTCGCCACCT
