In [146]:
import numpy as np
import pandas as pd
import seaborn as sns; sns.set()
from matplotlib import pyplot as plt, cm, colors
import matplotlib.gridspec as gridspec
from mpl_toolkits.mplot3d import Axes3D

sns.set(style="darkgrid")
sns.set(style="darkgrid")
from matplotlib import pyplot as plt, cm, colors
from tqdm.notebook import tqdm

import glob
import os.path as ospath
import os
from sys import executable
from subprocess import check_output
from PyQt5.QtWidgets import QFileDialog, QApplication
from IPython.display import HTML
import re

from scipy import optimize
from scipy.spatial import distance
from scipy import linalg
from scipy import signal
from sklearn.cluster import MeanShift, estimate_bandwidth

from picasso.picasso import io
from picasso.picasso.postprocess import link, compute_dark_times
from picasso.picasso.render import render
from picasso.picasso.gui.render import estimate_kinetic_rate, fit_cum_exp


In [147]:
# define colors:
blue = "#4C72B0"
orange = "#DD8452"
red = "#C44E52"
gray = "#90A8CE"


def OpenFileDialog():
    file = check_output([executable, __file__])
    return file.strip()


def gui_fname(dir=None):
    """
    Select a file via a dialog and return the file name.
    """
    if dir is None: 
        dir ="./"

    app = QApplication([dir])
    fname = QFileDialog.getExistingDirectory(None, "Select a folder...", 
            dir)
    if isinstance(fname, tuple):
        return fname[0]
    else: 
        return str(fname)


def load_files(dirname):
    
    os.chdir(dirname)
    files = glob.glob("*.hdf5")
    
    if files:
        print("{} HDF5 files found.".format(len(files)))
    else:
        print("No HDF5 files found at: {}".format(dirname))
            
    return files


def load_data(path):

    data, info = io.load_locs(path)
    try:
        pixelsize = info[1]["Pixelsize"]
    except:  
        print("No pixelsize found in yaml file. Default 130 nm used.")
  

    # convert px to nm
    data.x *= pixelsize
    data.y *= pixelsize
    data.x_pick_rot *= pixelsize
    data.y_pick_rot *= pixelsize
    
    return data, info, pixelsize


def load_ring_data_df(dirname, filename):
    file = ospath.join(dirname, filename)

    try: 
        df = pd.read_pickle(file)
    except FileNotFoundError:
        print("No results of previously analyzed datasets were detected.")
        return None
    else: 
        print("Results of previously analyzed datasets were detected.")
        return df
        

def identify_new_files(files, df_ring_data):
    """
    Identify which files have already been analyzed previously. 
    Return list of new files for processing
    """
    new_files = []
    for file in files:
        if file not in df_ring_data['filename'].values:
            new_files.append(file)
    
    n_old = len(files)-len(new_files)
    n_new = len(new_files)
    if n_old == 1:
        print(" {} HDF5 file was previously analyzed.".format(n_old))
    else: 
        print(" {} HDF5 files were previously analyzed.".format(n_old))
        
    if n_new == 1:
        print(" {} HDF5 file is new and will be analyzed.".format(n_new))
    else: 
        print(" {} HDF5 files are new and will be analyzed.".format(n_new))
        
    return new_files
   

def identify_fov_cell_type_channel(df_ring_data, filenames, fov_id_start, protein1, protein2):
    """
    For each file identify 
    - cell type: sporulating or vegetative cells
    - fov index: fov from which the picks were generated
    Results will be saved in a dataframe with columns:
    'fov_id', 'filename', 'cell_type', 'channel'
    
    If some files have been analyzed before the current script execution 
    the results were saved in the ring_data file and loaded to df_ring_data.
    (columns: "fov_id", "cell_type", "filename", "group", ... where 
    group are the pick ids.)
    If no prior analysis results exist, then df_ring_data = None. 
    Therefore we can check if a new file belongs to a previously analyzed fov.
    
    """
    dictionary = {}
    filenames_no_cell_type = []
    filenames_no_channel = []

    
    fov_id_counter = fov_id_start + 1
    
    for i, filename in enumerate(filenames):
        # cell type: spor or veg:
        cell_type = np.nan
                
        spor_found = re.search('spor', filename, re.IGNORECASE)
        veg_found = re.search('veg', filename, re.IGNORECASE)
        
        if spor_found and not veg_found:
            cell_type = 'spor'
        elif not spor_found and veg_found:
            cell_type = 'veg'
        elif spor_found and veg_found:
            # consider the string occuring first as cell type determining string
            spor_found_location = spor_found.start()
            veg_found_location = veg_found.start()
            
            if spor_found_location < veg_found_location:
                cell_type = 'spor'
            else:
                cell_type = 'veg'
        else:
            cell_type = np.nan
            filenames_no_cell_type.append(filename)
            
        
        # channel: protein1 or protein2:
        channel = np.nan
                
        protein1_found = re.search(protein1, filename, re.IGNORECASE)
        protein2_found = re.search(protein2, filename, re.IGNORECASE)
        
        if protein1_found and not protein2_found:
            channel = protein1
        elif not protein1_found and protein2_found:
            channel = protein2
        elif protein1_found and protein2_found:
            # consider the string occuring first as channel determining string
            protein1_found_location = protein1_found.start()
            protein2_found_location = protein2_found.start()
            
            if protein1_found_location < protein2_found_location:
                channel = protein1
            else:
                channel = protein2
        else:
            channel = np.nan
            filenames_no_channel.append(filename)

        
        # search if a file from the same fov was already registered:
        if not pd.isnull(cell_type):
            """
            The string starting after the second occurance of '_' and ending with 'kcb102' 
            is the same for all files from the same FOV, no meatter their cell type or 
            the protein channel.
            However, this might be the same string for other FOVs measured on the same day. 
            Thus, also search for the string 'fov' in the filename, which indicates the FOV id of that specific day.
            """
            
            # get substrings after the second '_'
            second_underscore_idx = filename.find('_', filename.find('_')+1)
            kcb102_idx = filename.find('kcb102')
            fov_identifying_substring = filename[second_underscore_idx:kcb102_idx+1]
            # Is there the word 'fov' in the filename?
            # If yes, then extract the number after 'fov' to search other files with this name
            if 'fov' in filename:
                fov_filename_id = re.findall(r'%s(\d+)' % 'fov', filename)
                if len(fov_filename_id) == 1:
                    fov_substring = 'fov' + str(fov_filename_id[0])
                else:
                    fov_substring = ''
                
            
            
            # check if an already registered file exists that contains the substrings
            filenames_found = []
            old_or_new_file = [] # True if file from previous run of the script, False if new file.
            for filename2 in dictionary.keys(): # search in files that were already registered.
                contained = fov_identifying_substring in filename2 and fov_substring in filename2
                if contained and filename != filename2:
                    filenames_found.append(filename2)
                    old_or_new_file.append(False)
            if isinstance(df_ring_data, pd.DataFrame):
                previously_analyzed_files = np.unique(df_ring_data['filename'])
                for filename2 in previously_analyzed_files:
                    contained = fov_identifying_substring in filename2 and fov_substring in filename2
                    if contained and filename != filename2:
                        filenames_found.append(filename2)
                        old_or_new_file.append(True)
            
            # if one other file was found: assign the existing fov index to the newly registred file
            # if no other file was found: assign a new fov index to the newly registered file
            if len(filenames_found) > 3:
                raise Exception('''Files from the same FOV than ''' + filename + ''' where searched. 
                However more than three other file was detected: 
                ''' + '\n'.join(filenames_found))
            elif len(filenames_found) >= 1:
                
                filename_found = filenames_found[0]
                if old_or_new_file[0]: # filename_found is from previous run of the script
                    fov_id = df_ring_data.loc[df_ring_data['filename'] == filename_found, 'fov_id'].iloc[0]
                if not old_or_new_file[0]: # filename_found is also a new file.
                    fov_id = dictionary[filename_found][0]
            else: # No file from the same fov previously registered
                fov_id = fov_id_counter
                fov_id_counter += 1
        
        else:
            fov_id = np.nan
        
        
        
        dictionary[filename] = [fov_id, filename, cell_type, channel]
        
    df_results = pd.DataFrame.from_dict(dictionary, orient = 'index', columns = ['fov_id', 'filename', 'cell_type', 'channel'])
    df_results = df_results.reset_index()

    print()
    print('The cell type (spr or veg) of these files could not be determined')
    print('and thus cannot be used for further analysis:')
    for filename in filenames_no_cell_type:
        print(' -', filename)
        
    return df_results
                
    

def double_gaus(x,a,x0,sigma, b, x1, sigma1):
    return a*np.exp(-(x-x0)**2/(2*sigma**2)) + b*np.exp(-(x-x1)**2/(2*sigma1**2))

def gaus(x,a,x0,sigma):
    return a*np.exp(-(x-x0)**2/(2*sigma**2)) 


def find_peak(data, binning=100, bins = None, axes="y"):
    
    if axes == "y":
        column = "y_pick_rot"
    elif axes == "x":
        column = "x_pick_rot"
    elif axes == "xyz":
        column = 2
    
    # find peak
    bandwidth = estimate_bandwidth(data[column].reshape(-1, 1), quantile=0.2, n_samples=binning)
    #print("estimated bandwidth: "+str(bandwidth))
    ms = MeanShift(bandwidth=bandwidth, bin_seeding=True)
    ms.fit(data[column].reshape(-1, 1))
    labels = ms.labels_
   # print(ms.cluster_centers_[0:2])
    peaks = np.sort(ms.cluster_centers_[0:2], axis=None)
    peak1 = float(ms.cluster_centers_[0])
    estimated_peak = {0:peak1}

    # histogram
    if bins is not None:
        n, bins = np.histogram(data[column], bins=bins)
    else:
        n, bins = np.histogram(data[column], bins=binning)
    #bins = bins[1:]
    centers = (bins[:-1] + bins[1:]) / 2
    hist_data = [n, centers]

    # fit peaks
    p0 = [peak1/2, peak1, 40]
    try:
        p_fit, p_cov = optimize.curve_fit(gaus, centers, n, p0=p0)
    except:
        p_fit = [0,0,0]
        
    return estimated_peak, p_fit, hist_data

"""
def plot_peak_dist(data, hist_data, peaks, p_fit, binning=100,cutoff=1, axes="y", ax=None):
    
    if ax is None:
        ax = plt.gca()
        
    if axes == "y":
        column = "y_pick_rot"
    elif axes == "x":
        column = "x_pick_rot"
    elif axes == "xyz":
        column == 1
        
    peak1 = peaks[0]
    peak2 = peaks[1]
    
    n = hist_data[0]
    bins = hist_data[1]
    
    #fig, ax = plt.subplots(figsize=(9, 5))
    binwidth = (max(bins)-min(bins)) / binning
    ax.bar(bins, n, width=binwidth, color=gray)
    xlin = np.linspace(0, data[column].max(), 1000)
    ax.plot(xlin, gaus(xlin,*p_fit[0:3]), c=red, linewidth=2)
    ax.plot(xlin, gaus(xlin,*p_fit[3:6]), c=red, linewidth=2)
    ax.axvline(p_fit[1]-(p_fit[2]*cutoff),c=blue, linewidth=2, linestyle="--")
    ax.axvline(p_fit[1]+(p_fit[2]*cutoff),c=blue, linewidth=2, linestyle="--")
    ax.axvline(p_fit[4]-(p_fit[5]*cutoff),c=orange, linewidth=2, linestyle="--")
    ax.axvline(p_fit[4]+(p_fit[5]*cutoff),c=orange, linewidth=2, linestyle="--")
    ax.set_title("Line profile",loc="left",fontsize=14)
    ax.set_xlabel("y (nm)")
    ax.set_ylabel("Counts")  
    ax.text(0.15,
            0.7,
            ("Estimated Peaks:\n"
            "Peak 1 at {:.1f} nm\n"
            "Peak 2 at {:.1f} nm\n"
            "Fitted Peaks:\n"
            "Peak 1 at {:.1f} nm\n"
            "Peak 2 at {:.1f} nm").format(peak1,peak2,p_fit[1],p_fit[4]),
            horizontalalignment="center",
            verticalalignment="center",
            transform = ax.transAxes,
            fontsize=12)

    return ax
"""
def plot_peak_dist(data, 
                   hist_data, 
                   peaks, 
                   p_fit, 
                   binning=100,
                   cutoff=1, 
                   axes="y", 
                   title = None, 
                   color_hist = gray, 
                   color_fit = red, 
                   annotate = True, 
                   ax=None):
    
    if ax is None:
        ax = plt.gca()
        
    
    if axes == "xyz":
        column == 1
    
    column = axes
        
    peak1 = peaks[0]

    n = hist_data[0]
    bins = hist_data[1]
    
    #fig, ax = plt.subplots(figsize=(9, 5))
    binwidth = bins[1]-bins[0]
    ax.bar(bins, n, width=binwidth, color=color_hist, alpha = 0.5)
    xlin = np.linspace(0, data[column].max(), 1000)
    ax.plot(xlin, gaus(xlin,*p_fit[0:3]), c=color_fit, linewidth=2)
    #ax.axvline(p_fit[1]-(p_fit[2]*cutoff),c=blue, linewidth=2, linestyle="--")
    #ax.axvline(p_fit[1]+(p_fit[2]*cutoff),c=blue, linewidth=2, linestyle="--")
    if title is None:
        ax.set_title("Line profile",loc="left",fontsize=14)
    else:
        ax.set_title(title,loc="left",fontsize=14)
        
    ax.set_xlabel("y (nm)")
    ax.set_ylabel("Counts")  
    if annotate:
        ax.text(0.15,
                0.7,
                ("Estimated Peaks:\n"
                "Peak at {:.1f} nm\n"
                "Fitted Peaks:\n"
                "Peak at {:.1f} nm\n"
                "sigma of {:.1f} nm\n").format(peak1, p_fit[1], p_fit[2]),
                horizontalalignment="center",
                verticalalignment="center",
                transform = ax.transAxes,
                fontsize=12)

    return ax

def isolate_ring(data, c, w, axes="y", cutoff=1.0):
    
    if axes == "y":
        column = "y_pick_rot"
    elif axes == "x":
        column = "x_pick_rot"
        
    ring_data = data[np.where((data[column]>(c-(cutoff*w))) & (data[column] <(c+(cutoff*w))))]
    return ring_data


def rodrigues_rot(P, n0, n1):
    # adapted from https://meshlogic.github.io/posts/jupyter/curve-fitting/fitting-a-circle-to-cluster-of-3d-points/
    # If P is only 1d array (coords of single point), fix it to be matrix
    if P.ndim == 1:
        P = P[np.newaxis,:]
    
    # Get vector of rotation k and angle theta
    n0 = n0/linalg.norm(n0)
    n1 = n1/linalg.norm(n1)
    k = np.cross(n0,n1)
    k = k/linalg.norm(k)
    theta =np.arccos(np.dot(n0,n1))
    
    # Compute rotated points
    P_rot = np.zeros((len(P),3))
    for i in range(len(P)):
        P_rot[i] = P[i]*np.cos(theta) + np.cross(k,P[i])*np.sin(theta) + k*np.dot(k,P[i])*(1-np.cos(theta))

    return P_rot


def rotate_ring(XYZ): 
    # Fitting plane by SVD for the mean-centered data
    # Eq. of plane is <p,n> + d = 0, where p is a point on plane and n is normal vector
       
    # Normal vector of fitting plane is given by 3rd column in V
    # Note linalg.svd returns V^T, so we need to select 3rd row from V^T
    ring_mean = XYZ.mean(axis=0)
    ring_centered = XYZ - ring_mean
    U,s,V = linalg.svd(ring_centered)
    normal = V[2,:]
    d = -np.dot(ring_mean, normal) 
        
    n0 = normal # new z axes
    n1 = [0,0,1] # old z axes

    ring_rot = rodrigues_rot(ring_centered, n0, n1)
    
    return ring_rot, normal


def plot_3d_ring(ring_rot1, ring_rot2, color1, color2, label1, label2, ax=None):
    
    ax.scatter(ring_rot1.x_pick_rot, ring_rot1.y_pick_rot, ring_rot1.z,c=color1, alpha=0.5, label = label1)
    ax.scatter(ring_rot2.x_pick_rot, ring_rot2.y_pick_rot, ring_rot2.z,c=color2, alpha=0.5, label = label2)
    ax.set_xlabel("x (nm)")
    ax.set_ylabel("y (nm)")
    ax.set_zlabel("z (nm)")
    ax.legend(loc="best",labelspacing=0.1)
    #ax.view_init(elev=2., azim=10.)
    ax.view_init(elev=30., azim=30.)
    set_axes_equal_3d(ax)

    return ax
    
    
def set_axes_equal_3d(ax):
    limits = np.array([ax.get_xlim3d(), ax.get_ylim3d(), ax.get_zlim3d()])
    spans = abs(limits[:,0] - limits[:,1])
    centers = np.mean(limits, axis=1)
    radius = 0.5 * max(spans)
    ax.set_xlim3d([centers[0]-radius, centers[0]+radius])
    ax.set_ylim3d([centers[1]-radius, centers[1]+radius])
    ax.set_zlim3d([centers[2]-radius, centers[2]+radius])
    

def angle_between(u, v, n=None):
    if n is None:
        return np.arctan2(np.linalg.norm(np.cross(u,v)), np.dot(u,v))*180/np.pi
    else:
        return np.arctan2(np.dot(n,np.cross(u,v)), np.dot(u,v))*180/np.pi


def calc_R(x, y, xc, yc):
    # adapted from https://gist.github.com/lorenzoriano/6799568
    """ calculate the distance of each 2D points from the center (xc, yc) """
    return np.sqrt((x-xc)**2 + (y-yc)**2)


def f(c, x, y):
    """ calculate the algebraic distance between the data points and the mean circle centered at c=(xc, yc) """
    Ri = calc_R(x, y, *c)
    return Ri - np.median(Ri)


def leastsq_circle(x,y):
    
    x_m = np.median(x)
    y_m = np.median(y)
    center_estimate = x_m, y_m
    center, ier = optimize.leastsq(f, center_estimate, args=(x,y))
    xc, yc = center
    Ri       = calc_R(x, y, *center)
    R        = np.median(Ri)
    residu   = np.sum((Ri - R)**2)
    return xc, yc, R, residu


def plot_data_circle(x, y, xc, yc, R, id, center=True, ax=None):
    
    if id == 0:
        label = "spore"
        color = blue
    else:
        label = "mother"
        color = orange
    
    if ax is None:
        ax = plt.gca()
    
    if center:
        x -= xc
        y -= yc
        xc = 0
        yc = 0 
        
    #f, ax = plt.subplots(figsize=(5,5))  #figsize=(7, 5.4), dpi=72,
    theta_fit = np.linspace(-np.pi,np.pi, 180)
    x_fit = xc + R*np.cos(theta_fit)
    y_fit = yc + R*np.sin(theta_fit)
    
    # plot fit
    ax.plot(x_fit, y_fit, label="Fitted circle", lw=2, c=red)
    ax.plot([xc], [yc], mec="y", mew=1,  c=red)
    
    # plot data
    ax.scatter(x, y, alpha=0.4,  label="Projected locs", marker=".", c=color)
    
    ax.set_xlabel("x rotated (nm)")
    ax.set_ylabel("y rotated (nm)")
    #ax.axis("equal")
    ax.set_xlim([-800,800])
    ax.set_aspect("equal",adjustable="datalim")
    
    ax.legend(loc="best", labelspacing=0.1)
    ax.set_title("Least squares circle {}\n"
                 "Fit radius: {:.1f} nm".format(label,R),loc="left",fontsize=14)
    
    return 


def check_consistency(ring_par):
    
#    ev = 0

#    amp_1, c_1, w_1 = ring_par[0]
#    amp_2, c_2, w_2 = ring_par[1]

#    if amp_1/amp_2 >= 0.3 and amp_2/amp_1 >= 0.3:
#        ev +=1 

#    if abs(c_1-c_2) > 30 and abs(c_1-c_2) < 300:
#        ev +=1

#    if w_1/w_2 >= 0.3 and w_2/w_1 >=0.3:
#        ev +=1
        
    return (True)

def plot_cum_exp(pooled_locs, fit_result_len, fit_result_dark, id, ax=None):
    
    if id == 0:
        color = blue
        label = 'spore'
    if id == 1:
        color = orange
        label = 'mother'

    if ax is None:
        ax = plt.gca()
    
    data = pooled_locs.dark
    data.sort()
    y = np.arange(1, len(data) + 1)
       
    a = fit_result_dark.best_values["a"]
    t = fit_result_dark.best_values["t"]
    c = fit_result_dark.best_values["c"]

    ax.set_title(
        "Dark time (cumulative) {}\n"
        r"$Fit: {:.2f}\cdot(1-exp(x/{:.2f}))+{:.2f}$".format(label, a, t, c),loc="left",fontsize=14)
    data = pooled_locs.dark
    data.sort()
    y = np.arange(1, len(data) + 1)

    ax.semilogx(data, y, c=color, label="Data")
    ax.semilogx(data, fit_result_dark.best_fit, c=red, label="Fit")
    ax.legend(loc="best")
    ax.set_xlabel("Duration (frames)")
    ax.set_ylabel("Frequency")
    
    return ax


def save_ring_locs(locs, info, path, file_id, pick, id, link=False):
    
    if link:
        ending = "_link.hdf5"
    else:
        ending = ".hdf5"
       
    locs.x /= 130
    locs.y /= 130
    
    locs_name = "file_{}_pick_{}_ring_{}{}".format(file_id, pick, id, ending)
    locs_path = os.path.join(path,"ring_locs")
    locs_path_name = os.path.join(locs_path, locs_name)
    
    if not os.path.isdir(locs_path):
        os.makedirs(locs_path)
    
    io.save_locs(locs_path_name, locs, info)

    
def export_pick_img(locs, path, file_id, pick, id, link=False):
    
    if link:
        ending = "_link.png"
    else:
        ending = ".png"
    
    pixelsize = 130

    export_locs = locs.copy()
    
    export_locs.x /= pixelsize
    export_locs.y /= pixelsize
    
    x_min = np.min(export_locs.x)    
    x_max = np.max(export_locs.x)
    y_min = np.min(export_locs.y)
    y_max = np.max(export_locs.y)

    viewport =  (y_min, x_min), (y_max, x_max)
    oversampling = 50
    len_x, image = render(export_locs, viewport = viewport, oversampling=oversampling, blur_method="smooth")
    
    img_name = "file_{}_pick_{}_ring_{}{}".format(file_id, pick, id, ending)
    img_path = os.path.join(path,"ring_images")
    img_path_name = os.path.join(img_path,img_name)
    
    if not os.path.isdir(img_path):
        os.makedirs(img_path)
    
    plt.imsave(img_path_name, image, cmap="hot")

## Load data

In [148]:
#path = gui_fname()
path = r'W:\users\reinhardt\z.software\Git\spor-PAINT\dev_sr\spor-paint\ZapA_FtsZ\picked'

protein1 = 'FtsZ'
protein2 = 'ZapA'

filenames_all = load_files(path)

plotting = True
max_dist = 130 #nanometer
max_dark_time = 15 #frames
binning = 50 # binning for peak histogram


21 HDF5 files found.


In [149]:
# Check if some of the found hdf5 files were already analyzed?
# If yes, open ring_data dataframe with previous results.
df_ring_data = load_ring_data_df(path, "exchange_data.pkl")


# Identify which files have not yet been analyzed.
if df_ring_data is not None:
    filenames = identify_new_files(filenames_all, df_ring_data)
    fov_id_start = df_ring_data['fov_id'].max()
else:
    filenames = filenames_all
    fov_id_start = 0

# Create a dataframe that saves which file was taken from which FOV and which cell types are contained (spor or veg)
# and which channel the data is from (protein1 or protein2)
# columns = ['fov_id', 'filename', 'cell_type', 'channel']
df_fov_file_assign = identify_fov_cell_type_channel(df_ring_data, filenames, fov_id_start, protein1, protein2)


No results of previously analyzed datasets were detected.

The cell type (spr or veg) of these files could not be determined
and thus cannot be used for further analysis:


In [150]:
for fov_id in range(int(fov_id_start)+1, int(df_fov_file_assign['fov_id'].max())+1):
    
    print('FOV ID:', fov_id)
    files_fov_id = df_fov_file_assign.loc[df_fov_file_assign['fov_id'] == fov_id]

    
    print('  spor : ')

    files_spor = files_fov_id.loc[files_fov_id['cell_type'] == 'spor']
    
    
    
    print('    {} : '.format(protein1), end = '')
    
    protein1_spor_name = files_spor.loc[files_spor['channel'] == protein1]['filename']

    if protein1_spor_name.empty:
        print('--')
    else:
        print(protein1_spor_name.iloc[0])
      
    
    
    print('    {} : '.format(protein2), end = '')
   
    protein2_spor_name = files_spor.loc[files_spor['channel'] == protein2]['filename']

    if protein2_spor_name.empty:
        print('--')
    else:
        print(protein2_spor_name.iloc[0])
    
    
        
    print('  veg : ')

    files_veg = files_fov_id.loc[files_fov_id['cell_type'] == 'veg']
    
    print('    {} : '.format(protein1), end = '')
        
    protein1_veg_name = files_veg.loc[files_veg['channel'] == protein1]['filename']

    if protein1_veg_name.empty:
        print('--')
    else:
        print(protein1_veg_name.iloc[0])
        
        
            
    print('    {} : '.format(protein2), end = '')

    protein2_veg_name = files_veg.loc[files_veg['channel'] == protein2]['filename']

    if protein2_veg_name.empty:
        print('--')
    else:
        print(protein2_veg_name.iloc[0])

FOV ID: 1
  spor : 
    FtsZ : spor_FtsZ_ZapAandFtsZ_230516_artemis_kcb102_fov3_180pM-r5_FtsZ_DP_1_drift_aligned_picked.hdf5
    ZapA : spor_ZapA_ZapAandFtsZ_230516_artemis_kcb102_fov3_1nM-r3_ZapA_DP_1_drift_aligned_picked.hdf5
  veg : 
    FtsZ : veg_FtsZ_ZapAandFtsZ_230516_artemis_kcb102_fov3_180pM-r5_FtsZ_DP_1_drift_aligned_picked.hdf5
    ZapA : veg_ZapA_ZapAandFtsZ_230516_artemis_kcb102_fov3_1nM-r3_ZapA_DP_1_drift_aligned_picked.hdf5
FOV ID: 2
  spor : 
    FtsZ : --
    ZapA : --
  veg : 
    FtsZ : veg_FtsZ_ZapAandFtsZ_230516_artemis_kcb102_fov1_180pM-R5_FtsZ_DP_1_drift_aligned_picked.hdf5
    ZapA : veg_ZapA_ZapAandFtsZ_230516_artemis_kcb102_fov1_1nM-R3_ZapA_DP_1_drift_aligned_picked.hdf5
FOV ID: 3
  spor : 
    FtsZ : spor_FtsZ_ZapAandFtsZ_230517_apollo_kcb102_fov4_r5-180pM_FtsZ_DP_1_drift_aligned_picked.hdf5
    ZapA : spor_ZapA_ZapAandFtsZ_230517_apollo_kcb102_fov4_r3-1nM_ZapA_DP_1_drift_aligned_filter_picked.hdf5
  veg : 
    FtsZ : veg_FtsZ_ZapAandFtsZ_230517_apollo_kcb102

## Main loop

In [151]:
# initialize containers
ring_data, fts_ring_data = [], []
ring_locs,fts_ring_locs = {}, {}
ring_locs_linked, fts_ring_locs_linked = {}, {}
ring_kinetics, fts_ring_kinetics = {}, {}
ring_kinetics_fit, fts_ring_kinetics_fit = {}, {}
ring_radii, fts_ring_radii = {}, {}
ring_rot, fts_ring_rot = {}, {}
ring_angles,fts_ring_angles = {}, {}
center, fts_center = {}, {}

rings_excluded, fts_rings_excluded = [], []
ring_n_events, fts_ring_n_events = {}, {}
circle_plots, fts_circle_plots = {}, {}
file_id = 0

analysis_folder = os.path.join(path, "analysis")
#analysis_folder_excluded = os.path.join(analysis_folder, "excluded")

# image export settings
img_format = ".png"
dpi = 100

# cutoff is mutliplied to the sigma of the peak fit for the seperation of the two rings
# smaller cutoff means smaller ring sections
cutoff = 1.25

#prepare analysis folder
if not os.path.isdir(analysis_folder):
    os.makedirs(analysis_folder)
    os.makedirs(analysis_folder_excluded)

for fov_id in tqdm(range(1, int(df_fov_file_assign['fov_id'].max())+1), desc="Processing FOVs"):
    for cell_type in ['spor', 'veg']:
        
        filename1 = df_fov_file_assign.loc[(df_fov_file_assign['fov_id']==fov_id)
                                              &(df_fov_file_assign['cell_type']==cell_type)
                                              &(df_fov_file_assign['channel']==protein1)]['filename']
        filename2 = df_fov_file_assign.loc[(df_fov_file_assign['fov_id']==fov_id)
                                              &(df_fov_file_assign['cell_type']==cell_type)
                                              &(df_fov_file_assign['channel']==protein2)]['filename']
        
        if filename1.empty:
            print('No file for channel 1 for {} cells in FOV {}.'.format(cell_type, str(fov_id)))
            continue
        if filename2.empty:
            print('No file for channel 2 for {} cells in FOV {}.'.format(cell_type, str(fov_id)))
            continue
    
        filename1 = filename1.iloc[0]
        filename2 = filename2.iloc[0]

        #load locs and convert distances from px to nm (Attention!)
        locs1, info1, pixelsize = load_data(os.path.join(path,filename1))        
        locs2, info2, pixelsize = load_data(os.path.join(path,filename2))
        
        #print(filename1)
        #print(filename2)
        # iterate over picks in a file
        for pick in tqdm(np.unique(locs1.group), desc="Processing picks"):

            # select locs from pick
            pick_locs1 = locs1[locs1.group == pick]
            pick_locs2 = locs2[locs2.group == pick]
        
        
            ################
            # (1) estimate the postition of the ring using a histogram along the pick direction
            #  a ring should yield a gaussian peak (2D projection in XY)
            ################

            #print('pick ID', pick)
            #print('channel 1', len(pick_locs1))
            #print('channel 2', len(pick_locs2))

            max_bin = max(pick_locs1.y_pick_rot.max(), pick_locs2.y_pick_rot.max())
            step_size = max_bin/binning
            bins = np.arange(0,max_bin+1, step_size)
            estimated_peak1, r_par1, hist_data1 = find_peak(pick_locs1, binning=binning, bins = bins, axes="y")
            estimated_peak2, r_par2, hist_data2 = find_peak(pick_locs2, binning=binning, bins = bins, axes="y")
            # estimated peaks form MeanShift analysis to initialize guassian fits
            # r_par yields an array containing: [amplitued_1, center_1, width_1, amplitude_2, center_2, width_2]


            # seperate ring parameter to the corresponding ring
            ring_parameter1 = [] # to work as the double band script
            ring_parameter1.append(r_par1)
            ring_parameter2 = [] # to work as the double band script
            ring_parameter2.append(r_par2)

            
            peak_dist = ring_parameter1[0][1]-ring_parameter2[0][1]
            
            
            
            # set up plot with gridspec
            fig = plt.figure(figsize=(18, 14), constrained_layout=True)
            gs = fig.add_gridspec(2,2)
            fig.suptitle(("FOV {}, {}, Pick {} - Ring analysis\n"
                          "{}: {}\n"
                          "{}: {}").format(fov_id, cell_type, pick, protein1, filename1, protein2, filename2),
                          fontsize=16,
                          ha="center")
            
            ax1 = fig.add_subplot(gs[0, 0])
           
            
            plot_peak_dist(pick_locs1,
                           hist_data1,
                           estimated_peak1,
                           r_par1,
                           binning=binning,
                           cutoff=cutoff,
                           axes="y_pick_rot",
                           title=protein1,
                           color_hist = blue, 
                           color_fit = blue,
                           ax=ax1)
            
            ax2 = fig.add_subplot(gs[1, 0])

            plot_peak_dist(pick_locs2,
                           hist_data2,
                           estimated_peak2,
                           r_par2,
                           binning=binning,
                           cutoff=cutoff,
                           axes="y_pick_rot",
                           title=protein2,
                           color_hist = orange, 
                           color_fit = orange,
                           ax=ax2)
            
            ax3 = fig.add_subplot(gs[1,1])
            
            plot_peak_dist(pick_locs1,
                           hist_data1,
                           estimated_peak1,
                           r_par1,
                           binning=binning,
                           cutoff=cutoff,
                           axes="y_pick_rot",
                           title='Distance from {} to {}: {:.1f} nm'.format(protein1, protein2, peak_dist),
                           color_hist = blue, 
                           color_fit = blue,
                           annotate = False,
                           ax=ax3)
            
            plot_peak_dist(pick_locs2,
                           hist_data2,
                           estimated_peak2,
                           r_par2,
                           binning=binning,
                           cutoff=cutoff,
                           axes="y_pick_rot",
                           title='Distance from {} to {}: {:.1f} nm'.format(protein1, protein2, peak_dist),
                           color_hist = orange, 
                           color_fit = orange,
                           annotate = False,
                           ax=ax3)
            
            
            # plot 3D ring
            ax4 = fig.add_subplot(gs[0, 1], projection="3d")
            plot_3d_ring(pick_locs1, pick_locs2, blue, orange, protein1, protein2, ax=ax4)

            
            ax2.set_xlim(ax1.get_xlim())
            ax3.set_xlim(ax1.get_xlim())

                    
            
            ################
            # (5) append data into large array
            ################
            
            ring_data.append([fov_id, #running file index
                              cell_type, # veg or spore
                              protein1,
                              protein2,
                              filename1, #filename of protein species1
                              filename2, #filename of protein species2
                              pick, #pick number
                              peak_dist, #peak-to-peak distance (nm)
                              ])
            
            ################
            # (6) save plots
            ################
            
            # ring analysis
            img_fname = "fov_{}_{}_pick_{}".format(fov_id, cell_type, pick)
            img_name = os.path.join(analysis_folder, img_fname)
            fig.savefig(img_name+img_format, dpi=dpi, format="png")
            plt.close(fig)
            
            
       
        
print("Calculation finished.")
print("Total: {} rings analysed, {} excluded.".format(len(ring_data), len(rings_excluded)))



# Save analysis data
df_ring_data_add = pd.DataFrame(ring_data, columns=["fov_id",
                                                "cell_type",
                                                "protein1",
                                                "protein1",
                                                "filename1",
                                                "filename2",
                                                "group",
                                                "p2p distance"
                                                ])
if df_ring_data is not None:
    df_ring_data = pd.concat([df_ring_data, df_ring_data_add], ignore_index = True)
else:
    df_ring_data = df_ring_data_add
    
df_ring_data.to_csv("exchange_data.csv")
# Save dataframe with cell means for easy loading of data for postprocessing
df_ring_data.to_pickle("exchange_data.pkl")
        

Processing FOVs:   0%|          | 0/6 [00:00<?, ?it/s]

Processing picks:   0%|          | 0/19 [00:00<?, ?it/s]

Processing picks:   0%|          | 0/5 [00:00<?, ?it/s]

No file for channel 1 for spor cells in FOV 2.


Processing picks:   0%|          | 0/12 [00:00<?, ?it/s]

Processing picks:   0%|          | 0/14 [00:00<?, ?it/s]

Processing picks:   0%|          | 0/3 [00:00<?, ?it/s]

Processing picks:   0%|          | 0/31 [00:00<?, ?it/s]

Processing picks:   0%|          | 0/5 [00:00<?, ?it/s]

Processing picks:   0%|          | 0/32 [00:00<?, ?it/s]

No file for channel 1 for veg cells in FOV 5.


Processing picks:   0%|          | 0/20 [00:00<?, ?it/s]

Processing picks:   0%|          | 0/5 [00:00<?, ?it/s]

Calculation finished.
Total: 146 rings analysed, 0 excluded.


In [157]:
df_ring_data = load_ring_data_df(path, "exchange_data.pkl")

data_veg = df_ring_data[df_ring_data['cell_type'] == 'veg']
data_spor = df_ring_data[df_ring_data['cell_type'] == 'spor']

# histogram
max_bin = max(data_veg['p2p distance'].max(), data_spor['p2p distance'].max())
min_bin = min(data_veg['p2p distance'].min(), data_spor['p2p distance'].min())

N_bins = 20
step_size = (max_bin-min_bin)/N_bins
bins = np.arange(min_bin,max_bin+1, step_size)

n_veg, bins_veg = np.histogram(data_veg['p2p distance'], bins=bins)
centers_veg = (bins[:-1] + bins[1:]) / 2

n_spor, bins_spor = np.histogram(data_spor['p2p distance'], bins=bins)
centers_spor = (bins[:-1] + bins[1:]) / 2

binwidth = bins[1]-bins[0]

fig = plt.figure()
ax = fig.add_subplot()

ax.bar(centers_spor, n_spor, width=binwidth, color=red, alpha = 0.5, label = 'spor')
ax.bar(centers_veg, n_veg, width=binwidth, color=blue, alpha = 0.5, label = 'veg')

ax.axvline(0,c='gray', linewidth=2, linestyle="--")

ax.set_title('Distance from {} to {}'.format(protein1, protein2),fontsize=14)
ax.set_xlabel("Distance (nm)")
ax.set_ylabel("Counts")  
    
ax.legend()

# ring analysis
img_fname = "Distance"
img_name = os.path.join(analysis_folder, img_fname)
fig.savefig(img_name+img_format, dpi=dpi, format="png")
plt.close(fig)

Results of previously analyzed datasets were detected.
