In [163]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import pickle as pk
import os,glob,re
import tifffile
from matplotlib.gridspec import GridSpec
from matplotlib import cm
import fpdf
from math import isnan

In [164]:
#create_msr_infodict
msr_info = pd.read_excel("/Users/jz-rolling/Desktop/AutoP/MSR_reference_new.xls")
MSR_dict = {}
for i in msr_info.index:
    plate = msr_info.iloc[i]["MSR_INDEX"]
    if plate == "PP1":
        plate = "MSR_PP1"
    if plate not in MSR_dict:
        MSR_dict[plate] = {}
    well = msr_info.iloc[i]["WELL_NO"]
    locus = msr_info.iloc[i]["MSMEG_####"]
    if not isnan(locus):
        locus = "MSMEG_{}".format(str(int(locus)).zfill(4))
        MSR_dict[plate][well] = locus

In [294]:
def read_gff(locus,gff):
    if locus in gff["Locus"].values:
        items = ["Start","Stop","Strand","Name","Product","UniProt_AC",'Orthologues M. tuberculosis']
        gene = gff[gff["Locus"]==locus].copy()
        return (gene[items].values[0])
    else:
        raise ValueError("Locus not found!")

def read_essentiality(locus,info_table):
    es_call_msm,es_call_mtb,\
    mtb_gene_name,mtb_gene_description = 'nan','nan','nan','nan' 
    if locus in msm_tnseq["Locus"].values:
        es_call_msm = es_conversion[int(msm_tnseq[msm_tnseq["Locus"]==locus]["Essentiality"].values[0])]
        mtb_ortholog = info_table[-1]
        if mtb_ortholog in mtb_tnseq["ORF ID"].values:
            mtb_gene = mtb_tnseq[mtb_tnseq["ORF ID"]==mtb_ortholog]
            es_call_mtb = mtb_gene["Final Call"].values[0]
            mtb_gene_name = mtb_gene["Name"].values[0]
            mtb_gene_description = mtb_gene["Description"].values[0]
    return es_call_msm,es_call_mtb,mtb_gene_name,mtb_gene_description
    
def reconstruct_operons(operon_df):
    operon_dict = {}
    for operon in operon_df.values:
        operon = operon[0][6:]
        split_genes = operon.split("-")
        for gene in split_genes:
            operon_dict["MSMEG_"+gene] = operon
    return operon_dict

def reconstruct_go(go_df):
    locus_list = set(go_df["Locus"].values)
    locus_dict = {}
    for locus in locus_list:
        locus_dict[locus] = "|".join(list(go_df[go_df["Locus"]==locus]["Description"].values))
    return locus_dict

In [162]:
gff = pd.read_excel("/Volumes/Sam/info/msm_gff.xls")
msm_tnseq = pd.read_excel("/Volumes/Sam/info/msm_tnseq.xlsx")
mtb_tnseq = pd.read_excel("/Volumes/Sam/info/mtb_tnseq.xlsx")
es_conversion = {1:"NE",2:"ES",3:"DE"}
operon_df = pd.read_excel("/Volumes/Sam/info/msm_operon.xlsx")
go_df = pd.read_excel("/Volumes/Sam/info/smegmatis_GO_terms_08_28_2017.xlsx")
operon_dict = reconstruct_operons(operon_df)
go_dict = reconstruct_go(go_df)

In [165]:
#create_msr_pathdict
path_dict = {}
plates = glob.glob("/Volumes/Sam/MSR*_IDW/")
for plate in plates:
    plate_name = plate.split("/")[-2][:-4]
    if plate_name not in path_dict:
        path_dict[plate_name] = {}
    subfolder = glob.glob(plate+"*raw_files/")[0]
    if len(glob.glob(subfolder+"*raw_files/")) > 0:
        babyfolders = glob.glob(subfolder+"*raw_files/")
        for babyfolder in babyfolders:
            wells = glob.glob(babyfolder+"output/Well*/")
            batch = babyfolder.split("/")[-2][:-10]
            if len(wells)>0:
                for well_folder in wells:
                    well = well_folder.split("/")[-2][-3:]
                    if well in path_dict[plate_name]:
                        path_dict[plate_name][well].append(well_folder)
                    else:
                        path_dict[plate_name][well] = [well_folder]
    else:
        wells = glob.glob(subfolder+"output/Well*/")
        batch = subfolder.split("/")[-2][:-10]
        if len(wells)>0:
            for well_folder in wells:
                well = well_folder.split("/")[-2][-3:]
                if well in path_dict[plate_name]:
                    path_dict[plate_name][well].append(well_folder)
                else:
                    path_dict[plate_name][well] = [well_folder]

In [264]:
from fpdf import FPDF
def add_image(pdf,img,x,y,w):
    pdf.image(img,x=x,y=y,w=w)

In [330]:
def frame_page(pdf,locus,img,plate,well,batch):
    info_table = read_gff(locus,gff)
    name,product,up_id,mtb_ortholog = str(info_table[3]),str(info_table[4]),str(info_table[5]),str(info_table[6])
    es_call_msm,es_call_mtb,\
    mtb_gene_name,mtb_gene_description = read_essentiality(locus,info_table)
    if locus in operon_dict:
        operon = operon_dict[locus]
    else:
        operon = "nan"
    
    if locus in go_dict:
        go = go_dict[locus]
    else:
        go = "nan"
    pdf.add_page()
    pdf.set_line_width(0.8)
    pdf.line(20,172,185,172)
    pdf.line(20,210,185,210)
    add_image(pdf,img,20,3,170)
    pdf.cell(5,163,ln=1)
    pdf.cell(10,7)
    pdf.cell(80,7,"Plate: {}, well: {}, batch: {}.".format(plate,well,batch),\
             border = 0,ln=1)
    pdf.cell(10,7)
    
    pdf.cell(80,7,"Locus: Msm | Mtb: {} | {}.  Uniprot: {}.".format(locus,mtb_ortholog,up_id),\
             border = 0,ln=1)
    pdf.cell(10,7)
    pdf.cell(80,7,"Name: Msm | Mtb: {} | {}.".format(name,mtb_gene_name),\
             border = 0,ln=1)
    pdf.cell(10,7)
    pdf.cell(80,7,"Essentiality: Msm | Mtb: {} | {}.".format(es_call_msm,es_call_mtb),\
             border = 0,ln=1)
    pdf.cell(10,7)
    pdf.cell(80,7,"Operon: {}.".format(operon),border = 0,ln=1)
    pdf.cell(10,4,ln=1)
    i = 0
    while i < len(product):
        if i == 0:
            substr = "Product: "+product[i:min(i+64,len(product))]
            i += 64
        else:
            substr = " "*12+product[i:min(i+64,len(product))]
            i += 64
        pdf.cell(10,8)
        pdf.cell(80,8,substr,border = 0,ln=1)
    j = 0
    while j < len(go):
        if j == 0:
            substr = "GO     : "+go[j:min(j+64,len(go))]
            j+=64
        else:
            substr = " "*12+go[j:min(j+64,len(go))]
            j+=64
        pdf.cell(10,8)
        pdf.cell(80,8,substr,border = 0,ln=1)

In [359]:
locus = "MSMEG_0001"
plate = "test"
well = "test"
batch = "test"
img="/Users/jz-rolling/Desktop/not_enough_cells.png"
pdf = FPDF(orientation='P', unit='mm', format='A4')
pdf.set_font('Arial', size=13)
for i in range(1,5):
    locus = "MSMEG_"+str(i).zfill(4)
    #print(locus)
    frame_page(pdf,locus,img,plate,well,batch)

In [280]:
read_gff("MSMEG_0001",gff)[4]

'DNA polymerase III, beta subunit'

In [366]:
coords = [[5,5],[100,5],[5,130],[100,130]]
for plate,wells in path_dict.items():
    pdf = FPDF(orientation='P', unit='mm', format='A4')
    pdf.set_font('Arial', size=13)
    for well,well_folders in wells.items():
        if well not in MSR_dict[plate]:
                print("Plate {}, well {} is supposed to be empty!".format(plate,well))
        else:
            for folder in well_folders:
                batch = folder.split("/")[-4][:-10]
                summary_plot = glob.glob(folder+"summary_plot.png")
                if len(summary_plot) == 1:
                    img = summary_plot[0]
                else:
                    img = "/Users/jz-rolling/Desktop/not_enough_cells.png"
                locus = MSR_dict[plate][well]
                frame_page(pdf,locus,img,plate,well,batch)
                cell_plots = glob.glob(folder+"*extensive_plot.png")
                if len(cell_plots) > 0:
                    pdf.add_page()
                    _idx = np.arange(len(cell_plots))
                    np.random.shuffle(_idx)
                    for i in range(min(4,len(_idx))):
                        (x,y) = coords[i]
                        w = 95
                        plot = cell_plots[_idx[i]]
                        add_image(pdf,plot,x,y,w)
    pdf.output("/Volumes/Sam/summary/{}.pdf".format(plate))

Plate MSR11, well H12 is supposed to be empty!
Plate MSR1, well B07 is supposed to be empty!
Plate MSR1, well C10 is supposed to be empty!
Plate MSR1, well E08 is supposed to be empty!
Plate MSR1, well G10 is supposed to be empty!
Plate MSR3, well C07 is supposed to be empty!
Plate MSR6, well A05 is supposed to be empty!
Plate MSR6, well G06 is supposed to be empty!
Plate MSR7, well D05 is supposed to be empty!
Plate MSR9, well E02 is supposed to be empty!


In [365]:
pdf.output("/Users/jz-rolling/Desktop/img.pdf")

''

In [316]:
pdf.get_string_width("/Users/jz-rolling/Desktop/img.pdf")

66.0033111111111

In [340]:
a = np.arange(5)
np.random.shuffle(a)

In [360]:
pdf.add_page()
add_image(pdf,"/Volumes/Sam/MSR2_IDW/plate2_raw_files/08232019_raw_files/output/Well_A01/MSR2_A01_0_0_45_0_extensive_plot.png",5,5,95)
add_image(pdf,"/Volumes/Sam/MSR2_IDW/plate2_raw_files/08232019_raw_files/output/Well_A01/MSR2_A01_0_0_45_0_extensive_plot.png",100,5,95)
add_image(pdf,"/Volumes/Sam/MSR2_IDW/plate2_raw_files/08232019_raw_files/output/Well_A01/MSR2_A01_0_0_45_0_extensive_plot.png",5,130,95)
add_image(pdf,"/Volumes/Sam/MSR2_IDW/plate2_raw_files/08232019_raw_files/output/Well_A01/MSR2_A01_0_0_45_0_extensive_plot.png",100,130,95)