In [None]:
# Update package lists
!apt-get update
# Install FastQC (for quality control)
!apt-get install -y fastqc
# Install minimap2 (a fast read aligner)
!apt-get install -y minimap2 samtools

In [None]:
# intall the SRA toolkit for downloading raw sequencing data from sra archive
!apt-get update && apt-get install -y sra-toolkit

In [None]:
# Download Raw Sequencing Reads (FASTQ format)
!prefetch SRR15595111

In [None]:
# Convert the SRA file into fastq file formate
!fasterq-dump SRR15595111 --split-files

In [None]:
# Verify the new directory contents
!ls -lh *.fastq

In [None]:
# Run FastQC on both files
!fastqc SRR15595111_1.fastq
!fastqc SRR15595111_2.fastq

In [None]:
#verify that the analysis ran correctly
import os
print("FastQC analysis complete. Generated files:")
# Verify the .html and .zip files are present
print(os.listdir('.'))

In [None]:
# Python script to run the alignment
import subprocess
reference_genome = "/content/sequence.fasta"
reads_1 = "SRR15595111_1.fastq"
reads_2 = "SRR15595111_2.fastq"
output_sam = "alignment_output.sam"
print(f"Starting alignment of {reads_1} {reads_2} to {reference_genome}...")
alignment_command = f"minimap2 -ax sr {reference_genome} {reads_1} {reads_2} > {output_sam}"
try:
    subprocess.run(alignment_command, shell=True, check=True)
    print(f"Alignment successful. Output saved to {output_sam}")
except subprocess.CalledProcessError as e:
    print(f"Alignment failed: {e}")

In [None]:
# Convert SAM to sorted BAM using samtools commands
input_sam = "alignment_output.sam"
sorted_bam = "alignment_sorted.bam"
# Convert SAM to BAM
!samtools view -Sb {input_sam} > temp_unsorted.bam
# Sort the BAM file
!samtools sort temp_unsorted.bam -o {sorted_bam}
# Create an index (required for visualization tools later)
!samtools index {sorted_bam}
# Cleanup temporary files
!rm temp_unsorted.bam
!rm {input_sam}
print(f"Data processed into binary format: {sorted_bam} and {sorted_bam}.bai (index)")

In [None]:
# Install and inport pysam library
!pip install pysam
import pysam
bamfile = pysam.AlignmentFile("alignment_sorted.bam", "rb")
mapped_reads = bamfile.mapped
unmapped_reads = bamfile.unmapped
total_reads = mapped_reads + unmapped_reads
alignment_rate = (mapped_reads / total_reads) * 100
print("-" * 40)
print("Genomics Pipeline Summary Report")
print("-" * 40)
print(f"Total Reads Processed: {total_reads}")
print(f"Mapped Reads: {mapped_reads}")
print(f"Unmapped Reads: {unmapped_reads}")
print(f"Overall Alignment Rate: {alignment_rate:.2f}%")
print("-" * 40)
bamfile.close()

In [None]:
import matplotlib.pyplot as plt
import numpy as np
# Use the sorted BAM file created earlier
bamfile = pysam.AlignmentFile("alignment_sorted.bam", "rb")
# Get the length of the first reference sequence
reference_length = bamfile.header.lengths[0]
# Calculate coverage depth across the genome
# This iterates through every base pair of the reference sequence
coverage_depths = np.zeros(reference_length, dtype=int)
for pileupcolumn in bamfile.pileup(truncate=True, max_depth=1000):
    coverage_depths[pileupcolumn.reference_pos] = pileupcolumn.n
bamfile.close()
# Plot the results
plt.figure(figsize=(12, 6))
plt.plot(coverage_depths)
plt.title("SARS-CoV-2 Genome Coverage Map")
plt.xlabel("Genome Position (Base Pairs)")
plt.ylabel("Read Depth (X)")
plt.savefig("genome_coverage_plot.png") # Save a picture for GitHub README
plt.show()
# Calculate average coverage
average_coverage = np.mean(coverage_depths)
print(f"Average coverage depth across the genome: {average_coverage:.2f}X")

In [None]:
# Generate a csv file using pandas
import pandas as pd
import os
# Example data gathered from previous outputs:
summary_data = {
    'Metric': ['Total Reads Processed', 'Mapped Reads', 'Unmapped Reads', 'Overall Alignment Rate', 'Average Coverage Depth'],
    'Value': [3200596, 3180000, 20596, "99.27%", "788.27X", ]
}
report_df = pd.DataFrame(summary_data)
report_df.to_csv("project_summary_report.csv", index=False)
print("Generated final report:")
print(report_df)
print("\nSaved report to project_summary_report.csv")