In [5]:
import numpy as np
import os
from tqdm import tqdm

# plotting 
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import ImageGrid
import PyPDF2

# data manager and analysis
import vodex as vx
import numan as nu

# writing files
import tifffile as tif

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Project structure: 

Provide the project folder with the "processed" folder created in the previous notebook. 

As you keep going with the analysis, the folder will have the following structure: 

```
processed
│   experiment.json <----------------------------------- (DONE in 01) the file that contains everything about the experiment, you are creating it once at the beginning of the processing and reusing ever after
└───dff_movie  <---------------------------------------- (DONE in 01) the dff movie :) 
│   │   dff_movie_0000.tif
│   │   dff_movie_0001.tif
│   │   ... 
└───tscore_volumes  <----------------------------------- (DONE in 02) t-score tif files per pair
│   └───2v3
│       │   tscore_2v3.tif
│   └───3v5
│       │   tscore_3v5.tif
│   └───2v5
│       │   tscore_2v5.tif
│   └───2vB
│       │   tscore_2vB.tif
│   └───3vB
│       │   tscore_3vB.tif
│   └───5vB
│       │   tscore_5vB.tif
│   └───BvB1
│       │   tscore_BvB1.tif
│   └───BvB2
│       │   tscore_BvB2.tif
│   └───BvB3
│       │   tscore_BvB3.tif
└───diff_volumes  <------------------------------------- (DONE in 02) absolute difference tif files per pair
│   └───2v3
│       │   diff_2v3.tif
│   └───3v5
│       │   diff_3v5.tif
│   └───...
└───spots
│   └───imaris  <--------------------------------------- (DONE after 02) ATTENTION : You need to put stuff generated by imaris into this folder!!! 
│       │   └───tscore_2v3_Statistics
│       │       │     tscore_2v3_Position.csv
│       │       │     tscore_2v3_Diameter.csv
│       │       │     ...
│       │   └───tscore_3v5_Statistics
│       │       │     tscore_3v5_Position.csv
│       │       │     tscore_3v5_Diameter.csv
│       │       │     ...
│       │   └───tscore_2v5_Statistics
│       │       │     ...
│       │   └───...
│   └───signals  <-------------------------------------- (DONE in 03, 04 and 05, WILL BE UPDATED in this notebook) json files with the extracted signals, also will have the group info after you added it
│       │   spots_2v3.json
│       │   spots_3v5.json
│       │   spots_2v5.json
│       │     ...
│   └───reports  <---------------------------------- tiffs and pdf with the cells significant in any pairwise comparison
│       └───all_significant  <---------------------- tiffs and pdf with all significant cells per group
│           │   └───signals  <---------------------- (DONE in 05) pdfs with signals
│           │       │     ...
│           │   └───images <------------------------ tif masks 
│           │       │     ...
│       └───groupped  <----------------------------- tiffs and pdf where the cells are groupped based on signal shape .. or anything else you want
│           │   readme.txt  <----------------------- ATTENTION : you need to describe the groups
│           │   └───signals  <---------------------- (WILL BE DONE in this notebook) pdfs with signals
│           │       │     ...
│           │   └───images  <----------------------- (WILL BE DONE in this notebook) tif masks 
│           │       │     ...
````

# Set project folder

The processed/spots/signals should already exist and have the extracted signals saved in there. 

In [2]:
project_folder = "D:/Code/repos/numan/notebooks/data/2vs3vs5/"
path = os.path.join(project_folder, 'processed')

assert os.path.isdir( os.path.join(path, "spots", "signals")), "the directory 'processed/spots/signals' doesn't exist in the project," \
                                " did you forget to run the previous notebook?"

os.chdir(path)
os.getcwd()

'D:\\Code\\repos\\numan\\notebooks\\data\\2vs3vs5\\processed'

# Make pdfs to choose groups

## (SKIP THIS STEP if you already have the pdfs ending with "_choose.pdf" ) 

In [3]:
# get the folder ready
group_path = os.path.join(path, 'spots', 'reports', 'groupped')
os.makedirs(group_path)

Create reports for all the tscore images again: it will output the same psh_b plots as in 05, for all the cells significant that were significant in at least one pairwise comparison... but there will be checkboxes! 

**You will use this pdf to create groups** : 
* Open the pdf ( ending in _choose.pdf) and click on the cells you want to have in a group 
* Save pdf " save as" , changing the _choose part to the name of the group
* repeat to create as many groups as you want, just don't name then as the once that already exist. In the next step we'll create masks for these groups...

In [6]:
experiment = vx.from_json(vx.Experiment,'experiment.json')
rep = nu.Reports(path, experiment)

# Use the same parameters as in 05
# Choose the number of blanks before and after the stimulus
N_BLANKS_BEFORE_STIM = 3
N_BLANKS_AFTER_STIM = 5

# Create the timepoints to overlap in the right order
time_centers = [[13, 7, 20], [27, 37, 53], [43, 60, 70]]

spot_tags = ["2v3","2v5","3v5","2vB","3vB","5vB"]
for spot_tag in  tqdm(spot_tags):
    rep.make_group_selection(bb = N_BLANKS_BEFORE_STIM,
                             ba = N_BLANKS_AFTER_STIM,
                             time_centers = time_centers,
                             spot_tag = spot_tag ,
                             plot_type = "psh_b")

100%|████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:54<00:00,  9.01s/it]


# Add groups from pdfs to spots

In [12]:
def add_group_from_pdf(spot_tag, group_tag,  rewrite = False):
    
    # get checked boxes from pdf
    f = open(f"spots/reports/groupped/from_{spot_tag}_significance_sigAny2v3v5vB_{group_tag}.pdf", "rb")
    group_pdf = PyPDF2.PdfFileReader(f)
    fields = group_pdf.getFields()
    spots_in_group = np.array([spot for spot in fields if fields[spot]['/V']=='/Yes']).astype(int)
    f.close()
    
    # prepare the new group
    spots = nu.Spots.from_json(f"spots/signals/spots_{spot_tag}.json")
    is_in_group = np.zeros((spots.num_spots,)).astype(bool)
    is_in_group[spots_in_group] = True  

    # add the new groups to spots
    spots.add_groups({group_tag:is_in_group}, rewrite = rewrite)
    spots.to_json(f"spots/signals/spots_{spot_tag}.json")
    
    print(f"Added group {group_tag} to spots_{spot_tag}.json")

Here is an example with spots from tscore image for 2v3 and 2v5: we created groups for both that we called ( in the file tag ) 235_up, 2_up, 2_down ... ( 6 pdfs total ) and also a group 5_down for 2v5 only

You will have to add your own groups. You can remove elements from the list and keep just one ( for example if you only processe

In [13]:
spot_tag = "2v3"
group_tags = ["235_up", "2_up", "2_down"]
for group_tag in group_tags:
    
    add_group_from_pdf(spot_tag, group_tag, rewrite = False)

Added group 235_up to spots_2v3.json
Added group 2_up to spots_2v3.json
Added group 2_down to spots_2v3.json


In [14]:
spot_tag = "2v5"
group_tags = ["235_up", "2_up", "2_down", "5_down" ]
for group_tag in group_tags:
    
    add_group_from_pdf(spot_tag, group_tag, rewrite = False)

Added group 235_up to spots_2v5.json
Added group 2_up to spots_2v5.json
Added group 2_down to spots_2v5.json
Added group 5_down to spots_2v5.json


# Create group masks and save as tif

In [15]:
groupped_signals_dir = os.path.join(path, 'spots', 'reports', 'groupped', 'signals')
groupped_images_dir =os.path.join(path, 'spots', 'reports', 'groupped', 'images')

if not os.path.isdir(groupped_signals_dir):
    os.makedirs(groupped_signals_dir)
if not os.path.isdir(groupped_images_dir):
    os.makedirs(groupped_images_dir)

get wrapper functions

In [18]:
# get the size of one volume
T,Z,Y,X = experiment.volume_manager.load_volumes([0], verbose = False).shape
# resolution in ZYX order in um
RESOLUTION = [4.8, 1.17, 1.17]

def write_group_mask(spot_tag , group_tag):
    spot_dir = f'{groupped_images_dir}/{spot_tag}'
    if not os.path.isdir(spot_dir):
        os.mkdir(spot_dir)
    
    spots = nu.Spots.from_json(f"spots/signals/spots_{spot_tag}.json")
    
    mask = spots.get_group_mask(spots.groups[group_tag], (Z,Y,X))
    tif.imwrite(f'{spot_dir}/mask_from_{spot_tag}_group_{group_tag}.tif', 
                mask.astype(np.uint16), shape=(Z,Y,X),
                metadata={'spacing': RESOLUTION[0], 'unit': 'um','axes': 'ZYX'},
                resolution=(1/RESOLUTION[1], 1/RESOLUTION[2]), imagej=True)
    print(f"Created mask for spots from {spot_tag} tscore image, for group {group_tag}")

100%|████████████████████████████████████████████████████████████████████████████████| 52/52 [00:00<00:00, 1409.06it/s]


In [19]:
spot_tag = "2v3"
group_tags = ["235_up", "2_up", "2_down"]
for group_tag in group_tags:
    
    write_group_mask(spot_tag , group_tag)

100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 11.66it/s]


Created mask for spots from 2v3 tscore image, for group 235_up


100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 11.62it/s]


Created mask for spots from 2v3 tscore image, for group 2_up


100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 11.45it/s]

Created mask for spots from 2v3 tscore image, for group 2_down





In [20]:
spot_tag = "2v5"
group_tags = ["235_up", "2_up", "2_down", "5_down" ]
for group_tag in group_tags:
    
    write_group_mask(spot_tag , group_tag)

100%|████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 11.52it/s]


Created mask for spots from 2v5 tscore image, for group 235_up


100%|████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 11.59it/s]


Created mask for spots from 2v5 tscore image, for group 2_up


100%|████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 11.73it/s]


Created mask for spots from 2v5 tscore image, for group 2_down


100%|████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 11.27it/s]

Created mask for spots from 2v5 tscore image, for group 5_down





# Create group signal reports

In [22]:
# TODO : hide this inside project report function ...
def generate_timpoints(bb, ba, time_centers):
    """
    Adds the bb and ba number of blanks around each value in time_centers, 
    keeping the number of rows the same.
    """
    time_points = np.zeros((3,((bb + ba)+1)*3))
    for it, t in enumerate(time_centers):
        a,b,c = t
        a_long = np.concatenate((a - np.arange(bb + 1)[::-1] , a + 1 + np.arange(ba) ))
        b_long = np.concatenate((b - np.arange(bb + 1)[::-1] , b + 1 + np.arange(ba) ))
        c_long = np.concatenate((c - np.arange(bb + 1)[::-1] , c + 1 + np.arange(ba) ))
        time_points[it] = np.concatenate((a_long,b_long,c_long))
    time_points = time_points.astype(int)
    return time_points


# Choose the number of blanks before and after the stimulus
N_BLANKS_BEFORE_STIM = 3
N_BLANKS_AFTER_STIM = 5

# Create the timepoints to overlap in the right order
time_centers = [[13, 7, 20],[27, 37, 53],[43, 60, 70]]
time_points = generate_timpoints(N_BLANKS_BEFORE_STIM, N_BLANKS_AFTER_STIM, time_centers)
time_points[2,-N_BLANKS_AFTER_STIM:] = np.arange(N_BLANKS_AFTER_STIM)
# create a way to break the lines on the plot (to visually separate different stimuli)
signal_split = np.array([np.arange(N_BLANKS_BEFORE_STIM + N_BLANKS_AFTER_STIM + 1),
             np.arange(N_BLANKS_BEFORE_STIM + N_BLANKS_AFTER_STIM + 1)+(N_BLANKS_BEFORE_STIM + N_BLANKS_AFTER_STIM + 1), 
             np.arange(N_BLANKS_BEFORE_STIM + N_BLANKS_AFTER_STIM + 1)+(N_BLANKS_BEFORE_STIM + N_BLANKS_AFTER_STIM + 1)*2])

In [23]:
spot_tag = "2v3"
group_tags = ["235_up", "2_up", "2_down"]

for group_tag in tqdm(group_tags):
    rep.make_signal_reports(spot_tag, group_tag, 
                # types of plots: 
                plot_type = "psh_b", 
                # just for the pdf naming : 
                # this is to be able to distinguish the pdfs with the same plot type, 
                # but errors are different or raw traces on/off or front_to_tail
                plot_type_tag = '',
                # only show certain timepoints from the signal, for example : only 2 dots
                time_points = time_points,
                # how to break the line 
                signal_split = signal_split,
                # draw vertical lines
                vlines = [8.5, 17.5],
                # wether or not you want to have the cells sorted on how many tests they passes
                sort_by_sig = True, 
                # what error type to use ( "sem" for SEM or "prc" for 5th - 95th percentile )
                error_type="sem",
                # wheather to plot the individual traces
                plot_individual=False,
                # the color of the individual traces (if shown)
                noise_color='-c',
                # where to save files
                tmp_folder= "spots/reports/groupped/signals/",
                pdf_filename=f"spots/reports/groupped/signals/signals_from_{spot_tag}_group_{group_tag}.pdf")

100%|████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:03<00:00,  1.20s/it]


In [24]:
spot_tag = "2v5"
group_tags = ["235_up", "2_up", "2_down", "5_down" ]

for group_tag in tqdm(group_tags):
    rep.make_signal_reports(spot_tag, group_tag, 
                # types of plots: 
                plot_type = "psh_b", 
                # just for the pdf naming : 
                # this is to be able to distinguish the pdfs with the same plot type, 
                # but errors are different or raw traces on/off or front_to_tail
                plot_type_tag = '',
                # only show certain timepoints from the signal, for example : only 2 dots
                time_points = time_points,
                # how to break the line 
                signal_split = signal_split,
                # draw vertical lines
                vlines = [8.5, 17.5],
                # wether or not you want to have the cells sorted on how many tests they passes
                sort_by_sig = True, 
                # what error type to use ( "sem" for SEM or "prc" for 5th - 95th percentile )
                error_type="sem",
                # wheather to plot the individual traces
                plot_individual=False,
                # the color of the individual traces (if shown)
                noise_color='-c',
                # where to save files
                tmp_folder= "spots/reports/groupped/signals/",
                pdf_filename=f"spots/reports/groupped/signals/signals_from_{spot_tag}_group_{group_tag}.pdf")

100%|████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:01<00:00,  2.44it/s]
