In [None]:
import os
import json
import datetime

import tqdm
import glob
from imp import reload

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats
from sklearn.preprocessing import normalize
from scipy.optimize import minimize

import deltascope as ds
from statsmodels.stats import multitest

In [None]:
%matplotlib inline

In [None]:
# --------------------------------
# -------- User input ------------
# --------------------------------

# Specify path to exported landmark data
lmpath = glob.glob('*landmarks.csv')
binpath = glob.glob('*landmarks_bins.json')
print(lmpath,binpath)

# Pick the correct path from the list
lmpath = lmpath[0]
binpath = binpath[0]

In [None]:
# Load landmarks from csv
oldlm = pd.read_csv(lmpath)

In [None]:
# Load landmark bins 
with open(binpath,'r') as f:
    bins = json.load(f)
acbins = bins['acbins']
tbins = bins['tbins']

In [None]:
colors = ['#41ab5d','#ef3b2c','#00441b','#67000d']
tarr = np.round(tbins,2)
xarr = np.round(acbins,2)
tpairs = [[tarr[0],tarr[4]],[tarr[1],tarr[5]],[tarr[2],tarr[6]],[tarr[3],tarr[7]]]

# Restructure Data

We will sort landmark data according to stype and organize it in a two tiered dictionary according to sample type (s) and channel (c).

In [None]:
Dlm = {}
for stype in tqdm.tqdm(oldlm.stype.unique()):
    # These two lines may need to be modified based on stype structure
    s = stype.split('-')[0]
    c = stype.split('-')[-1]
    
    # Add sample type dictionary if not already present
    if s not in Dlm.keys():
        Dlm[s] = {}
    
    # Save sample specific landmark data to dictionary
    Dlm[s][c] = oldlm[oldlm.stype==stype]

# Set up graph data

In [None]:
# --------------------------------
# -------- User input ------------
# --------------------------------

# put the sample type names here
stype1 = '' #ex: 'wt'
stype2 = '' #ex: 'yot'
# put the channel names here
channel1 = '' #ex: 'AT'
channel2 = '' #ex: 'ZRF'

gdata = {}
gdata[stype1] = {}
gdata[stype2] = {}

gdata[stype2][channel2] = ds.graphData(Dlm[stype2][channel2],colors[3])
gdata[stype1][channel2] = ds.graphData(Dlm[stype1][channel2],colors[1])
gdata[stype2][channel1] = ds.graphData(Dlm[stype2][channel1],colors[2])
gdata[stype1][channel1] = ds.graphData(Dlm[stype1][channel1],colors[0])

# Graphs

In [None]:
crop = 40 # microns cropped from either end in the alpha dirrection (we only want middle of commissure)
legend = False 
save = True
a = 0.3 # transparency of plot
pthresh = 0.01 # statistics significance level

In [None]:
channel = channel2

fig,axr = plt.subplots(2,4,figsize=(12,5),sharey='row',sharex=True)

if crop is not None:
    mask = np.where((xarr>-crop)&(xarr<crop) == True)[0]
    xmin = mask.min()
    xmax = mask.max()
    xarrcr = xarr[xmin:xmax+1]
else:
    xarrcr = xarr

for I,dtype in enumerate(['r','pts']):
    go1 = gdata[stype1][channel]
    go1.prepare_data(xarrcr,tarr,dtype)
    go2 = gdata[stype2][channel]
    go2.prepare_data(xarrcr,tarr,dtype)
    
    parr =[]
    for i in range(len(go1.arr_masked[:,1,1])):
        R =[]
        for j in range(len(go1.arr_masked[1,:,1])-1):
            try:
                r = stats.kruskal(go1.arr_masked[i,j,:], go2.arr_masked[i,j,:], nan_policy = 'omit')[1]
                R.append(r)
            except:
                r=np.NaN
                R.append(r)
        parr.append(R)
    parr=np.asarray(parr)
    parr = np.reshape(parr, ((1,-1)))
    multi = multitest.multipletests(parr[0], alpha =.01, method='fdr_tsbh')[1]
    multi[multi<.01]=0
    multi[multi>=.01]=1
    parr = multi.reshape(11,8)
    
    # Plot wildtype data
    go = go1
    for i,p in enumerate(tpairs):
        n = i
        i = I
        
        ti1 = np.where(tarr==p[0])[0][0]
        ti2 = np.where(tarr==p[1])[0][0]

        axr[i,n].fill_between(xarrcr,go.avg[:,ti1]+go.sem[:,ti1],go.avg[:,ti1]-go.sem[:,ti1],alpha=a,color=go.c,zorder=1)
        axr[i,n].fill_between(xarrcr,-go.avg[:,ti2]+go.sem[:,ti2],-go.avg[:,ti2]-go.sem[:,ti2],alpha=a,color=go.c,zorder=1)

        axr[i,n].plot(xarrcr,go.avg[:,ti1],c=go.c,zorder=2,label='{} {}'.format(go.arr.shape[-1],stype1))
        axr[i,n].plot(xarrcr,-go.avg[:,ti2],c=go.c,zorder=2)
        
    # Plot mutant data
    go = go2
    for i,p in enumerate(tpairs):
        n = i
        i = I
        
        ti1 = np.where(tarr==p[0])[0][0]
        ti2 = np.where(tarr==p[1])[0][0]

        axr[i,n].fill_between(xarrcr,go.avg[:,ti1]+go.sem[:,ti1],go.avg[:,ti1]-go.sem[:,ti1],alpha=a,color=go.c,zorder=1)
        axr[i,n].fill_between(xarrcr,-go.avg[:,ti2]+go.sem[:,ti2],-go.avg[:,ti2]-go.sem[:,ti2],alpha=a,color=go.c,zorder=1)

        axr[i,n].plot(xarrcr,go.avg[:,ti1],c=go.c,zorder=2,label='{} {}'.format(go.arr.shape[-1],stype2))
        axr[i,n].plot(xarrcr,-go.avg[:,ti2],c=go.c,zorder=2)
        
        axr[i,n].scatter(xarrcr,go.avg[:,ti1],c=parr[:,ti1],cmap='Greys_r',zorder=3,vmin=0,vmax=1,edgecolor='k')
        axr[i,n].scatter(xarrcr,-go.avg[:,ti2],c=parr[:,ti2],cmap='Greys_r',zorder=3,vmin=0,vmax=1,edgecolor='k')
        
        axr[0,n].legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=3,ncol=2, mode="expand", borderaxespad=0.)

plt.tight_layout()
        
tstamp = datetime.datetime.now().strftime('%Y-%m-%d')

if save:
    fig.savefig(tstamp+'_'+stype2+'-'+stype1+'-{}.pdf'.format(channel))

In [None]:
channel = channel1

fig,axr = plt.subplots(2,4,figsize=(12,5),sharey='row',sharex=True)

if crop is not None:
    mask = np.where((xarr>-crop)&(xarr<crop) == True)[0]
    xmin = mask.min()
    xmax = mask.max()
    xarrcr = xarr[xmin:xmax+1]
else:
    xarrcr = xarr

for I,dtype in enumerate(['r','pts']):
    go1 = gdata[stype1][channel]
    go1.prepare_data(xarrcr,tarr,dtype)
    go2 = gdata[stype2][channel]
    go2.prepare_data(xarrcr,tarr,dtype)
    
    parr =[]
    for i in range(len(go1.arr_masked[:,1,1])):
        R =[]
        for j in range(len(go1.arr_masked[1,:,1])-1):
            try:
                r = stats.kruskal(go1.arr_masked[i,j,:], go2.arr_masked[i,j,:], nan_policy = 'omit')[1]
                R.append(r)
            except:
                r=np.NaN
                R.append(r)
        parr.append(R)
    parr=np.asarray(parr)
    parr = np.reshape(parr, ((1,-1)))
    multi = multitest.multipletests(parr[0], alpha =.01, method='fdr_tsbh')[1]
    multi[multi<.01]=0
    multi[multi>=.01]=1
    parr = multi.reshape(11,8)
    
    # Plot wildtype data
    go = go1
    for i,p in enumerate(tpairs):
        n = i
        i = I
        
        ti1 = np.where(tarr==p[0])[0][0]
        ti2 = np.where(tarr==p[1])[0][0]

        axr[i,n].fill_between(xarrcr,go.avg[:,ti1]+go.sem[:,ti1],go.avg[:,ti1]-go.sem[:,ti1],alpha=a,color=go.c,zorder=1)
        axr[i,n].fill_between(xarrcr,-go.avg[:,ti2]+go.sem[:,ti2],-go.avg[:,ti2]-go.sem[:,ti2],alpha=a,color=go.c,zorder=1)

        axr[i,n].plot(xarrcr,go.avg[:,ti1],c=go.c,zorder=2,label='{} {}'.format(go.arr.shape[-1],stype1))
        axr[i,n].plot(xarrcr,-go.avg[:,ti2],c=go.c,zorder=2)
        
    # Plot mutant data
    go = go2
    for i,p in enumerate(tpairs):
        n = i
        i = I
        
        ti1 = np.where(tarr==p[0])[0][0]
        ti2 = np.where(tarr==p[1])[0][0]

        axr[i,n].fill_between(xarrcr,go.avg[:,ti1]+go.sem[:,ti1],go.avg[:,ti1]-go.sem[:,ti1],alpha=a,color=go.c,zorder=1)
        axr[i,n].fill_between(xarrcr,-go.avg[:,ti2]+go.sem[:,ti2],-go.avg[:,ti2]-go.sem[:,ti2],alpha=a,color=go.c,zorder=1)

        axr[i,n].plot(xarrcr,go.avg[:,ti1],c=go.c,zorder=2,label='{} {}'.format(go.arr.shape[-1],stype2))
        axr[i,n].plot(xarrcr,-go.avg[:,ti2],c=go.c,zorder=2)
        
        axr[i,n].scatter(xarrcr,go.avg[:,ti1],c=parr[:,ti1],cmap='Greys_r',zorder=3,vmin=0,vmax=1,edgecolor='k')
        axr[i,n].scatter(xarrcr,-go.avg[:,ti2],c=parr[:,ti2],cmap='Greys_r',zorder=3,vmin=0,vmax=1,edgecolor='k')
        
        axr[0,n].legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=3,ncol=2, mode="expand", borderaxespad=0.)

plt.tight_layout()

tstamp = datetime.datetime.now().strftime('%Y-%m-%d')

if save:
    fig.savefig(tstamp+'_'+stype2+'-'+stype1+'-{}.pdf'.format(channel))