In [None]:
# Import modules
import pandas as pd # v0.23.4
from matplotlib import pyplot as plt # v3.0.2
import matplotlib.gridspec as gridspec # v3.0.2
from scipy import stats # v1.1.0
import numpy as np # v1.15.4
import itertools
import seaborn as sns # v0.9.0
from Bio import SeqIO # v1.73

In [None]:
# Import table containing the PCR marker results and sporulation ability
with open("mito_markers.csv","r") as file_in:
    data=pd.read_csv(file_in, header=0)
NC=dict(zip(["A","B","C","D","E","F","H","I"],["L1","M1","H1","L2","M2","H2","VL2","VL1"]))
Time=dict(zip(["Tinit","Tfin"],[r"$T_{ini}$",r"$T_{end}$"]))
mito_genotype=[]
nuc_genotype=[]
for i in data.index:
    if data.loc[i,"rnl"]=="+" and data.loc[i,"atp6"]=="+":
        # If both rnl and atp6 markers are present, score mitochondrial completeness as "+"
        mito_genotype.append("+")
    else:
        # If any of rnl or atp6 markers is absent, score mitochondrial completeness as "-"
        mito_genotype.append("-")
    nuc_genotype.append(NC[data.loc[i,"strain"][0]])
data["mtg"]=mito_genotype
data["ncg"]=nuc_genotype
# Add formatted time values
DATA=data.copy()
DATA["time"]=[Time[i] for i in DATA["time"]]

# Create contingency tables for Tend and Tini
contingency1=pd.DataFrame(np.zeros([2,2]), index=["spo+","spo-"], columns=["mito+", "mito-"])
contingency2=pd.DataFrame(np.zeros([2,2]), index=["spo+","spo-"], columns=["mito+", "mito-"])
for i in data.index:
    for j,k in itertools.product(["+","-"],["+","-"]):
        if data.loc[i,"mtg"]==j and data.loc[i,"sporulation"]==k:
            if data.loc[i,"time"]=="Tfin":
                contingency1.loc["spo"+k, "mito"+j]+=1
            if data.loc[i,"time"]=="Tinit":
                contingency2.loc["spo"+k, "mito"+j]+=1

In [None]:
# Export the mito markers data for figure creation

fig, axes = plt.subplots(ncols=3, nrows=4, figsize=[12,12])
ax_idx=0
for i in [range(32), range(32,64), range(64,80)]:

    plot_table=np.array(DATA.loc[i, ["ncg","strain","time","sporulation"]])
    ax=axes[3,ax_idx]
    table=ax.table(cellText=plot_table, loc="top",
             cellLoc = 'center', rowLoc = 'center')

    for cell in table.properties()["child_artists"]:
        cell.set_height(0.09)
        cell.set_width(0.20)
    ax_idx=ax_idx+1
    
for i,j in itertools.product(range(4), range(3)):
    axes[i,j].axis("off")


plt.tight_layout()
plt.savefig("mito_table.pdf")
plt.show()
plt.close()

In [None]:
# Plot the contingency tables
fig = plt.figure(figsize=[6.5,3.2])

GS=gridspec.GridSpec(nrows=1, ncols=2, width_ratios=[1,1.25])

ax = fig.add_subplot(GS[0,0])

sns.heatmap(contingency2, annot=True, cmap="Blues", ax=ax,
            cbar=False,
           vmin=0, vmax=40, linecolor="black", linewidths=1)
ax.set_xlabel("mtDNA completeness", size=14)
ax.set_xticklabels(["+","-"], size=14)
ax.set_ylabel("Sporulation ability", size=14)
ax.set_yticklabels(["+","-"], rotation=0, size=14)
ax.set_title(r"$T_{ini}$", size=18)
ax.xaxis.set_ticks_position('none')
ax.yaxis.set_ticks_position('none')

ax = fig.add_subplot(GS[0,1])
HM=sns.heatmap(contingency1, annot=True, cmap="Blues", ax=ax,
            cbar=True, cbar_kws={"label":"Number of lines"},
           vmin=0, vmax=40, linecolor="black", linewidths=1)
ax.set_xlabel("mtDNA completeness", size=14)
ax.set_xticklabels(["+","-"], size=14)
ax.set_ylabel("Sporulation ability", size=14)
ax.set_yticklabels(["+","-"], rotation=0, size=14)
ax.set_title(r"$T_{end}$", size=18)
ax.xaxis.set_ticks_position('none')
ax.yaxis.set_ticks_position('none')


plt.tight_layout()
plt.savefig("mtDNAcomplete_sporulation.pdf")
plt.show()
plt.close()

In [None]:
# Make Fisher's exact test on the data at Tend, replacing the 0 value for [spo+, mito-] by one to 
# yield a finite boundary to the odds ratio.
test=contingency1.copy()
test.loc["spo+","mito-"]=1
stats.fisher_exact(test)

In [None]:
# Plot a map of the mitochondrial genomes and annotations with positions of the PCR markers
# import mito genome and annotations from S288C and YPS128, from https://yjx1217.github.io/Yeast_PacBio_2016/data/
strains=zip(["S288c.mt",
        "YPS138.mt"],
           [[(192,477), (31379,32168)], [(192,477), (25969,26687)]])

fig, axes = plt.subplots(ncols=2, figsize=[10,5], subplot_kw={"projection":"polar"})
lwd=1

idx=0

for s, loc in strains:
    strain_name=s.split("/")[-1][:-3]
    with open(s+".genome.fa","r") as mito_in:
        mtgenome=[i for i in SeqIO.parse(mito_in, "fasta")]
    with open(s+".all_feature.gff","r") as gff_in:
        mtgff=pd.DataFrame([i.split("\t") for i in gff_in.read().splitlines()])
    mtgff[3]=mtgff[3].astype(int)
    mtgff[4]=mtgff[4].astype(int)
    NAME=[]
    ID=[]
    for i in mtgff.index:
        attributes=dict([(j.split("=")[0],j.split("=")[1]) for j in mtgff.loc[i,8].split(";")])
        NAME.append(attributes["Name"])
        ID.append(attributes["ID"])
    mtgff["ID"]=ID
    mtgff["Name"]=NAME

    # plot a schematic map of mitochondrial genome
    len_genome=len(mtgenome[0].seq)
    def circ_coord(x):
        c=2*np.pi*x/len_genome
        return(c)

    genome_backbone=np.arange(0,circ_coord(len_genome),0.01)
    genes_mito=mtgff.loc[mtgff[2]!="mRNA"]

    ax=axes[idx]

    ax.set_theta_zero_location("N")
    ax.set_theta_direction("clockwise")

    ax.plot(genome_backbone, np.repeat(4,len(genome_backbone)), linewidth=1, color="black")

    for i in genes_mito.index:

        genespan=np.arange(circ_coord(mtgff.loc[i,3]-1),circ_coord(mtgff.loc[i,4]),0.001)
        midpos=0.5*sum([circ_coord(mtgff.loc[i,3]), circ_coord(mtgff.loc[i,4])])

        if mtgff.loc[i,2]=="gene":
            col="cyan"
            #lwd=4
            name=mtgff.loc[i,"Name"]

        if mtgff.loc[i,2]=="tRNA":
            col="lime"
            #lwd=4
            name=None
        if mtgff.loc[i,2]=="CDS":
            col="blue"
            #lwd=4
            name=None
        if mtgff.loc[i,2] in ['rnl', 'rnpB', 'rns']:
            col="red"
            #lwd=4
            name=mtgff.loc[i,2]

        for y in np.arange(3.8,4.21,0.03):
            ax.plot(genespan, np.repeat(y,len(genespan)), linewidth=lwd, color=col)

        # if a protein or RNA-coding gene, print tag
        if mtgff.loc[i,2] in ["gene", 'rnl', 'rnpB', 'rns']:


            ax.plot([midpos, midpos], [4.35,4.55], linewidth=0.5, color="black")

            if 0<=midpos<=np.pi or 2*np.pi-0.2<=midpos<2*np.pi:
                ax.text(midpos, 5.3, s=name, ha="center", va="center", 
                        rotation=90-180*(midpos/np.pi),
                       size=11, style="italic") 

            if np.pi<=midpos<2*np.pi-0.2:
                ax.text(midpos, 5.3, s=name, ha="center", va="center", 
                       rotation=-90-180*(midpos)/np.pi,
                       size=11, style="italic")

    for i in loc:

        genespan_pcr=np.arange(circ_coord(i[0]),circ_coord(i[1]),0.001)
        midpos=0.5*sum([circ_coord(i[0]),circ_coord(i[1])])
        for y in np.arange(3.3,3.71,0.03):
            ax.plot(genespan_pcr, np.repeat(y,len(genespan_pcr)), linewidth=lwd, color="orange")

    for i in np.arange(0,len_genome,10000):
        ax.scatter(circ_coord(i),3, marker=".", color="black", s=5)
        label="%.0f" % (i/1000)
        ha="center"
        if i==0:
            label="%.0f kb" % (i/1000)
            if strain_name=="S288c":
                ha="center"
            if strain_name=="YPS138":
                ha="left"
        if i==70000 and strain_name=="YPS138":
            label="%.0f" % (i/1000)
            ha="right"            

        ax.text(circ_coord(i), 2.5, s=label,
               ha=ha, va="center", size=10)
    ax.text(0,0, s=strain_name, size=18, ha="center", va="center")

    ax.grid(False)
    ax.axis("off")


    ax.set_ylim([0,5.5])
    idx=idx+1
plt.tight_layout()
plt.savefig("mito_map_5x10.pdf")
plt.show()
plt.close()