In [1]:
import pandas as pd
chromosomes=pd.read_csv("./data/sequence_report.tsv", sep="\t")
filtered=chromosomes.iloc[0:16,:]

In [2]:
#This code creates a basic view of yeast chromosomes

from reportlab.lib.units import cm
from Bio.Graphics import BasicChromosome

entries=[]
for i in range(filtered.shape[0]):
    entries.append((filtered["UCSC style name"][i], filtered["Seq length"][i]))

chr_diagram = BasicChromosome.Organism()
chr_diagram.page_size = (40 * cm, 20 * cm) 

lengths=[]
for name, length in entries:
    lengths.append(length)
    max_len=max(lengths)


for name, length in entries:
    name=name.removeprefix("chr")
    cur_chromosome = BasicChromosome.Chromosome(name)

    tel_len=100000
    cur_chromosome.scale_num = max_len + 2 * tel_len
    cur_chromosome.label_size=2

    start = BasicChromosome.TelomereSegment()
    start.scale = tel_len
    cur_chromosome.add(start)

    body = BasicChromosome.ChromosomeSegment()
    body.scale = length
    cur_chromosome.add(body)

    end = BasicChromosome.TelomereSegment(inverted=True)
    end.scale = tel_len
    cur_chromosome.add(end)

    chr_diagram.add(cur_chromosome)

chr_diagram.draw(title="S_cerevisiae", output_file="Chromosomes.pdf")

In [3]:
filenames=filtered["GenBank seq accession"].to_list()
names=filtered["Chromosome name"].to_list()
named_files=list(zip(names, filenames))

In [4]:
#This code creates an annotated view of yeast chromosomes with labelled CDS

from reportlab.lib.units import cm
from Bio.Graphics import BasicChromosome
from Bio import Entrez, SeqIO, GenBank

entries=[]
for i in range(filtered.shape[0]):
    entries.append((filtered["UCSC style name"][i], filtered["Seq length"][i]))

chr_diagram = BasicChromosome.Organism()
chr_diagram.page_size = (40 * cm, 20 * cm) 

lengths=[]
for name, length in entries:
    lengths.append(length)
    max_len=max(lengths)

for index, (name, filename) in enumerate(named_files):
    Entrez.email="k.zoltowska@oxfordalumni.org"
    file=Entrez.efetch(db="nucleotide", id=filename, rettype="gb",retmode="text")
    record = SeqIO.read(file, format="gb")
    length = len(record)
    features = [f for f in record.features if f.type == "CDS"]
    for f in features:
        f.qualifiers["color"] = [index + 2]

    cur_chromosome = BasicChromosome.Chromosome(name)

    tel_len=100000
    cur_chromosome.scale_num = max_len + 2 * tel_len
    cur_chromosome.label_size=2

    start = BasicChromosome.TelomereSegment()
    start.scale = tel_len
    cur_chromosome.add(start)

    body = BasicChromosome.AnnotatedChromosomeSegment(length, features)
    body.scale = length
    cur_chromosome.add(body)

    end = BasicChromosome.TelomereSegment(inverted=True)
    end.scale = tel_len
    cur_chromosome.add(end)

    chr_diagram.add(cur_chromosome)

chr_diagram.draw(title="S.Cerevisiae", output_file="Chromosomes_annotated.pdf")


