In [31]:
from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [32]:
%%bash
cd /content/drive/MyDrive

# make repo folder
mkdir -p ros
cd ros

# create subfolders
mkdir -p src data/sample docs outputs

# requirements.txt
echo "biopython" > requirements.txt

# README.md
echo "# Rosalind Assignment (Problems 1–9)
Sushma Palli + 1002232673 + BIOL5340-001
This repo contains solutions in Python for Rosalind bioinformatics problems." > README.md

# AI usage log
echo "## AI Usage Disclosure
## AI Usage Disclosure

### Purpose of AI Collaboration
For this assignment, I collaborated with AI tools, specifically **OpenAI ChatGPT (GPT-5 mini)** and **Google Gemini**, to assist in understanding bioinformatics concepts, generating Python code templates, and explaining errors in code. The AI was used as a learning and productivity aid, not as a substitute for my own work.

### Specific Uses
- **Code generation:** Assisted in creating Python scripts for Problems 1–9, including reading input files, processing DNA/RNA sequences, and generating outputs.
- **Error debugging:** Provided explanations and suggested fixes for errors encountered while running Python scripts in Google Colab.
- **Workflow guidance:** Suggested folder structures, input file organization, and version-controlled workflow practices suitable for GitHub submission.
- **Documentation drafting:** Helped structure README.md content and AI usage logs to comply with assignment requirements.

### Verification and Testing
All AI-generated code and suggestions were:
1. **Reviewed manually** for correctness and understanding.
2. **Tested in Google Colab** to verify that outputs matched the expected Rosalind problem results.
3. **Refactored or modified** as necessary to ensure proper functionality and learning outcomes.

### Summary
AI tools were used **responsibly** to support my learning and to improve efficiency. I verified every solution, ensuring that the final code and documentation reflect my understanding and effort.

**AI Tools Used:**
- OpenAI ChatGPT (GPT-5 mini)
- Google Gemini 2.5

" > docs/ai_usage.md


In [33]:
!pip install biopython




In [34]:
%%bash
echo "AGCTTAGCTA" > /content/drive/MyDrive/ros/data/sample/dna_input.txt


#Creating data sample


In [35]:
# Create sample input files for Rosalind Problems 1-9

sample_inputs = {
    "problem1": "AGCTTAGCTA",  # Counting DNA nucleotides
    "problem2": "GATTACA",      # Transcribing DNA to RNA
    "problem3": "GATTACA",      # Complementing a DNA strand
    "problem4": "ATGCGT",       # GC content
    "problem5": "ATGGCCATTGTAATGGGCCGCTGAAAGGGTGCCCGATAG",  # Counting motifs
    "problem6": "ATGGCCATTGTAATGGGCCGCTGAAAGGGTGCCCGATAG",  # Translating DNA to protein
    "problem7": "ATGCGATCGATCG",  # Finding reverse complement
    "problem8": "ATGCATGC",      # Counting sub-sequences
    "problem9": "ATGCGTATGCGT"   # Finding pattern occurrences
}

import os
folder = "/content/drive/MyDrive/ros/data/sample/"
os.makedirs(folder, exist_ok=True)

for key, seq in sample_inputs.items():
    with open(f"{folder}/{key}_input.txt", "w") as f:
        f.write(seq)


In [36]:
!ls /content/drive/MyDrive/ros/data/sample/


dna_input.txt	    problem3_input.txt	problem6_input.txt  problem9_input.txt
problem1_input.txt  problem4_input.txt	problem7_input.txt
problem2_input.txt  problem5_input.txt	problem8_input.txt


In [37]:
import os

src_folder = "/content/drive/MyDrive/ros/src/"
os.makedirs(src_folder, exist_ok=True)



scripts = {
    "problem1.py": """
# Problem 1 - Counting DNA nucleotides
input_file = "/content/drive/MyDrive/ros/data/sample/problem1_input.txt"
with open(input_file, 'r') as f:
    dna = f.read().strip()
A = dna.count('A')
C = dna.count('C')
G = dna.count('G')
T = dna.count('T')
print(A, C, G, T)
""",
    "problem2.py": """
# Problem 2 - Transcribe DNA to RNA
input_file = "/content/drive/MyDrive/ros/data/sample/problem2_input.txt"
with open(input_file, 'r') as f:
    dna = f.read().strip()
rna = dna.replace('T', 'U')
print(rna)
""",
    "problem3.py": """
# Problem 3 - Complement DNA Strand
input_file = "/content/drive/MyDrive/ros/data/sample/problem3_input.txt"
with open(input_file, 'r') as f:
    dna = f.read().strip()
complement = dna.replace('A','t').replace('T','a').replace('C','g').replace('G','c').upper()
print(complement)
""",
    "problem4.py": """
# Problem 4 - GC Content
input_file = "/content/drive/MyDrive/ros/data/sample/problem4_input.txt"
with open(input_file, 'r') as f:
    dna = f.read().strip()
gc_content = (dna.count('G') + dna.count('C')) / len(dna) * 100
print(round(gc_content,2))
""",
    "problem5.py": """
# Problem 5 - Count Motifs (ATG)
input_file = "/content/drive/MyDrive/ros/data/sample/problem5_input.txt"
with open(input_file, 'r') as f:
    dna = f.read().strip()
motif = "ATG"
count = 0
for i in range(len(dna)-len(motif)+1):
    if dna[i:i+len(motif)] == motif:
        count += 1
print(count)
""",
    "problem6.py": """
# Problem 6 - Translate DNA to Protein
from Bio.Seq import Seq
input_file = "/content/drive/MyDrive/ros/data/sample/problem6_input.txt"
with open(input_file, 'r') as f:
    dna_seq = f.read().strip()
dna = Seq(dna_seq)
protein = dna.translate()
print(protein)
""",
    "problem7.py": """
# Problem 7 - Reverse Complement
from Bio.Seq import Seq
input_file = "/content/drive/MyDrive/ros/data/sample/problem7_input.txt"
with open(input_file, 'r') as f:
    dna_seq = f.read().strip()
dna = Seq(dna_seq)
rev_comp = dna.reverse_complement()
print(rev_comp)
""",
    "problem8.py": """
# Problem 8 - Count Sub-sequences
input_file = "/content/drive/MyDrive/ros/data/sample/problem8_input.txt"
with open(input_file, 'r') as f:
    dna = f.read().strip()
subseq = "ATG"
count = 0
for i in range(len(dna)-len(subseq)+1):
    if dna[i:i+len(subseq)] == subseq:
        count += 1
print(count)
""",
    "problem9.py": """
# Problem 9 - Pattern Occurrences
input_file = "/content/drive/MyDrive/ros/data/sample/problem9_input.txt"
with open(input_file, 'r') as f:
    dna = f.read().strip()
pattern = "ATGC"
positions = [i+1 for i in range(len(dna)-len(pattern)+1) if dna[i:i+len(pattern)] == pattern]
print(positions)
"""
}

# Create all Python script files
for filename, code in scripts.items():
    path = os.path.join(src_folder, filename)
    with open(path, "w") as f:
        f.write(code.strip())

print("All 9 Python scripts created in src/ folder!")


All 9 Python scripts created in src/ folder!


#Verifing the rosaline problems files in drive


In [38]:
!ls /content/drive/MyDrive/ros/src/


problem1.ipynb	problem2.py  problem4.py  problem6.py  problem8.py
problem1.py	problem3.py  problem5.py  problem7.py  problem9.py


In [39]:
import os
import subprocess

# Folder where scripts are located
src_folder = "/content/drive/MyDrive/ros/src/"

# List of all script filenames
scripts = [
    "problem1.py",
    "problem2.py",
    "problem3.py",
    "problem4.py",
    "problem5.py",
    "problem6.py",
    "problem7.py",
    "problem8.py",
    "problem9.py"
]

# Run each script and print output
for script in scripts:
    script_path = os.path.join(src_folder, script)
    print(f"\n--- Running {script} ---")
    try:
        # Run the script and capture output
        result = subprocess.run(["python", script_path], capture_output=True, text=True)
        print(result.stdout.strip())  # print normal output
        if result.stderr:
            print("Error:", result.stderr)  # print any errors
    except Exception as e:
        print(f"Exception running {script}: {e}")



--- Running problem1.py ---
3 2 2 3

--- Running problem2.py ---
GAUUACA

--- Running problem3.py ---
CTAATGT

--- Running problem4.py ---
50.0

--- Running problem5.py ---
2

--- Running problem6.py ---
MAIVMGR*KGAR*

--- Running problem7.py ---
CGATCGATCGCAT

--- Running problem8.py ---
2

--- Running problem9.py ---
[1, 7]


#Saving outputs to a file



In [40]:
import os
import subprocess

src_folder = "/content/drive/MyDrive/ros/src/"
output_file = "/content/drive/MyDrive/ros/outputs/test_outputs.txt"
os.makedirs("/content/drive/MyDrive/ros/outputs", exist_ok=True)

scripts = [f"problem{i}.py" for i in range(1, 10)]

with open(output_file, "w") as f_out:
    for script in scripts:
        script_path = os.path.join(src_folder, script)
        f_out.write(f"--- {script} ---\n")
        result = subprocess.run(["python", script_path], capture_output=True, text=True)
        f_out.write(result.stdout + "\n")
        if result.stderr:
            f_out.write("Error: " + result.stderr + "\n")

print(f"All outputs saved to {output_file}")


All outputs saved to /content/drive/MyDrive/ros/outputs/test_outputs.txt


#Github links

#https://github.com/sushma23github/bioinformatics-rosalind/releases/tag/week3-submission

#https://github.com/sushma23github/bioinformatics-rosalind