In [None]:
### import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np
from pathlib import Path
from matplotlib.collections import PatchCollection
from matplotlib.patches import Circle, Polygon, Wedge
import math

In [None]:
import Rust_alnPairMat

In [None]:

def get_seq_one_by_one(open_file, position=None):
    """

    An Iterator over an opened fasta file
    :param open_file: an open fasta file
    :return: tuple(prompt, sequence)
    """

    pos = 0
    p, seq = "", ""
    line = open_file.readline()
    while line:

        if line.startswith('>'):

            if seq:

                if not position:
                    yield p, seq
                else:
                    yield p, seq, pos
                p, seq = "", ""

            p = line[1:].strip()

        else:
            seq += line.strip()
            pos = open_file.tell()
        line = open_file.readline()

    if not position:
        yield p, seq
    else:
        yield p, seq, pos

In [None]:
def reverse_complement(x):
    d = { "N": "N",
        "A": "T", 
         "T": "A",
         "C": "G",
         "G": "C"}
    return ''.join([d[e] for e in x[::-1]])

def get_intronseq(header, dico_seq):
    

    strand = header.split("__")[-1]
    #mau_exon_size = []
    intron_res = []
    contig = header.split("_FB")[0]
    exons = []

    for r in header.split("__")[1].split(";"):
        r = r.split('...')
        exons.append((int(r[0]), int(r[1])))
    for i, r in enumerate(exons[0:-1]):
        introns = dico_seq[contig][r[1]:exons[i+1][0]].upper()
        if strand == "-":
            introns = reverse_complement(introns)
        intron_res.append(introns)
    if strand == "-":
        intron_res = intron_res[::-1]
    return intron_res

def clamp(v, minimum, maximum):
    if v < minimum:
        v = minimum
    elif v > maximum:
        v = maximum
    return v    
def get_windows_size(seq, chuncks=300):
    return int(clamp(len(seq) / chuncks, 5, 10000))

def rotate(p, theta):
    theta = math.radians(theta)
    xx = p[0]
    yy = p[1]
    xp = xx  * math.cos(theta) - yy*math.sin(theta)
    yp = yy  * math.cos(theta) + xx*math.sin(theta)
    return [xp, yp]

In [None]:
dico_mau = {}
with open("<dmau_scaffold2_V2.fasta>") as fi:
    for p, s in fasta_reader.get_seq_one_by_one(fi):
        dico_mau[p] = s
        
dico_sim = {}
with open("<dsim_scaffold2_V2.fasta>") as fi:
    for p, s in fasta_reader.get_seq_one_by_one(fi):
        dico_sim[p] = s

In [None]:
out_dir = Path("<Figure_Dotplot_dir>")

In [None]:
# you need to get the CDS (exons) of the sequences 
# one sequence per file
seq1_file = "<dmau_cds_FBgn0267432.fna>"
seq2_file = "<dsim_cds_FBgn0267432.fna>"
dico_s = {}
with open(seq1_file) as fi:
    for p, s in fasta_reader.get_seq_one_by_one(fi):
        dico_s["mau"] = s
        mau_h = p
with open(seq2_file) as fi:
    for p, s in fasta_reader.get_seq_one_by_one(fi):
        dico_s["sim"] = s
        sim_h = p


In [None]:
# then we nee to know the size of every exon.
# The way I extract my CDS I got the information in the sequence name file (example: Y_scaffold1_FBgn0267432_FBtr0346771_kl-3__5910520...5911634;5913233...5913336;5913394...5914735;5970309...5970477;5970542...5970947;6178731...6178942;6178995...6179511;6203120...6203249;6203303...6205515;6205573...6206957;6270719...6271053;6298912...6299720;6299994...6301454;6343117...6345992;6346048...6346226;6486977...6487645__+)
# here in the end you need a list with the size in pb of every exon in the same order of the exons

mau_exon_size = []
for r in mau_h.split("__")[1].split(";"):
    d = r.split('...')
    mau_exon_size.append((int(d[1]) - int(d[0]) ))

sim_exon_size = []
for r in sim_h.split("__")[1].split(";"):
    d = r.split('...')
    sim_exon_size.append((int(d[1]) - int(d[0]) ))

In [None]:
windows_size = 50

In [None]:
sim_intron = np.array(sim_exon_size) / windows_size
mau_intron = np.array(mau_exon_size) / windows_size

# get the dot plot matrix for the coding sequence
x = Rust_alnPairMat.get_matrix(dico_s["mau"], dico_s["sim"], windows_size, -10., -0.5, 5, True)


In [None]:
gene_name = "kl-3"

In [None]:
figure = plt.figure(figsize=(10, 10), dpi=80)
ax = figure.add_axes((0,0,1,1))
ax.imshow(x, vmin=0.5, cmap="Greys", origin="lower")
plt.yticks(xticks, xlabels);
plt.xticks(yticks, ylabels);
xlim = ax.get_xlim()
ylim = ax.get_ylim()


s = 6
p = 0
patches = []
for e in mau_intron[0:-1]:
    p += e
    p1 = rotate((0,0), -135)
    p1[0] += p
    p1[1] += p
    
    p2 = rotate((0.5*s,-1*s), -135)
    p2[0] += p
    p2[1] += p
    
    p3 = rotate((-0.5*s,-1*s), -135)
    p3[0] += p
    p3[1] += p
    
    polygon = Polygon((p1,
                   p2,
                   p3
                  ),
                    closed=True)     
    patches.append(polygon)
p = PatchCollection(patches, color="orange")
ax.add_collection(p)

patches = []

p = 0
for e in sim_intron[0:-1]:
    p += e
    y_ = int(p)
    p1 = rotate((0,0), 45)
    p1[0] += p
    p1[1] += p
    
    p2 = rotate((0.5*s,-1*s), 45)
    p2[0] += p
    p2[1] += p
    
    p3 = rotate((-0.5*s,-1*s), 45)
    p3[0] += p
    p3[1] += p
    
    polygon = Polygon((p1,
                   p2,
                   p3
                  ),
                    closed=True)     
    patches.append(polygon)
    #plt.vlines(p, 0, p, color="blue")#, transform=ax.get_xaxis_transform())

p = PatchCollection(patches, color="steelblue")
ax.add_collection(p)
 
plt.ylabel("mau")
plt.xlabel("sim")
#plt.xlim(xlim)
#plt.ylim(ylim)
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
plt.title("cds {} dot plot".format(gene_name))
plt.savefig(out_dir / "{}_cds.pdf".format(gene_name), bbox_inches='tight')
plt.savefig(out_dir / "{}_cds.png".format(gene_name), bbox_inches='tight')
plt.show();

In [None]:
#intron need to do it manually foe every exon you want to:
# get the sequences using the position. 
mau_i5 = dico_mau["Y_scaffold1"][5970947-500  : 5970947 + 5000]
sim_i5 = dico_sim["Y_scaffold2"][220622-500  : 220622 + 5000]

windows_size = 50
x = Rust_alnPairMat.get_matrix(mau_i5, sim_i5, windows_size, -10., -0.5, 5, True)
figure = plt.figure(figsize=(10, 10), dpi=80)
ax = figure.add_axes((0,0,1,1))
ax.imshow(x, vmin=0.5, cmap="Greys", origin="lower")
plt.ylabel("mau")
plt.xlabel("sim")
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
plt.savefig(out_dir / "gene_intronN_start.pdf", bbox_inches='tight')
plt.show();
plt.close();   