# Phred scores


In [3]:
from Bio import SeqIO
import numpy as np


def phred_score(char):
    """Convert ASCII character to Phred score."""
    return ord(char) - 33


def compute_phred_scores(seq):
    """Compute Phred scores from a FASTQ file using Biopython."""
    phred_scores = []

    for record in seq:
        quality_line = record.letter_annotations["phred_quality"]
        phred_scores.extend(quality_line)
    phred_scores = np.array(phred_scores)

    return phred_scores

In [8]:
import io
from urllib.request import urlopen


def get_seq(url):
    with urlopen(url) as response:
        with response as file:
            return SeqIO.parse(io.StringIO(response.read().decode("utf-8")), "fastq")

In [9]:
fastq_file_path = "https://gitlab.com/oasci/courses/pitt/biosc1540-2024s/-/raw/main/biosc1540/files/fastq/SRR25437009-sample.fastq"

seq = get_seq(fastq_file_path)

phred_scores = compute_phred_scores(seq)

# Print the first 10 Phred scores as an example
print("First 10 Phred scores:", phred_scores[:10])

First 10 Phred scores: [12 23 23 32 34 32 36 36 36 34]
