In [196]:
import numpy as np
import os

# plotting 
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import ImageGrid
# progress bar
from tqdm import tqdm


# data manager and analysis
import vodex as vx
import numan as nu

# writing files
import tifffile as tif

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Project structure: 

Provide the project folder with the "processed" folder created in the previous notebook. 

As you keep going with the analysis, the folder will have the following structure: 

```
processed
│   experiment.json <----------------------------------- (DONE in 01) the file that contains everything about the experiment, you are creating it once at the beginning of the processing and reusing ever after
└───dff_movie  <---------------------------------------- (DONE in 01) the dff movie :) 
│   │   dff_movie_0000.tif
│   │   dff_movie_0001.tif
│   │   ... 
└───tscore_volumes  <----------------------------------- (DONE in 02) t-score tif files per pair
│   └───2v3
│       │   tscore_2v3.tif
│   └───3v5
│       │   tscore_3v5.tif
│   └───2v5
│       │   tscore_2v5.tif
│   └───2vB
│       │   tscore_2vB.tif
│   └───3vB
│       │   tscore_3vB.tif
│   └───5vB
│       │   tscore_5vB.tif
│   └───BvB1
│       │   tscore_BvB1.tif
│   └───BvB2
│       │   tscore_BvB2.tif
│   └───BvB3
│       │   tscore_BvB3.tif
└───diff_volumes  <------------------------------------- (DONE in 02) absolute difference tif files per pair
│   └───2v3
│       │   diff_2v3.tif
│   └───3v5
│       │   diff_3v5.tif
│   └───...
└───spots
│   └───imaris  <--------------------------------------- (DONE after 02) ATTENTION : You need to put stuff generated by imaris into this folder!!! 
│       │   └───tscore_2v3_Statistics
│       │       │     tscore_2v3_Position.csv
│       │       │     tscore_2v3_Diameter.csv
│       │       │     ...
│       │   └───tscore_3v5_Statistics
│       │       │     tscore_3v5_Position.csv
│       │       │     tscore_3v5_Diameter.csv
│       │       │     ...
│       │   └───tscore_2v5_Statistics
│       │       │     ...
│       │   └───...
│   └───signals  <-------------------------------------- (DONE in 03 and 04, WILL BE UPDATED in this notebook) json files with the extracted signals, also will have the group info after you added it
│       │   spots_2v3.json
│       │   spots_3v5.json
│       │   spots_2v5.json
│       │     ...
│       └───reports  <---------------------------------- tiffs and pdf with the cells significant in any pairwise comparison
│       │   └───all_significant  <---------------------- tiffs and pdf with all significant cells per group
│       │       │   └───signals  <---------------------- (WILL BE DONE in this notebook) pdfs with signals
│       │       │       │     ...
│       │       │   └───images <------------------------ tif masks 
│       │       │       │     ...
│       │   └───groupped  <----------------------------- tiffs and pdf where the cells are groupped based on signal shape .. or anything else you want
│       │       │   readme.txt  <----------------------- ATTENTION : you need to describe the groups
│       │       │   └───signals  <---------------------- pdfs with signals
│       │       │       │     ...
│       │       │   └───images  <----------------------- tif masks 
│       │       │       │     ...
```

# Set project folder

The processed/spots/signals should already exist and have the extracted signals saved in there. 

In [6]:
project_folder = "D:/Code/repos/numan/notebooks/data/2vs3vs5/"
path = os.path.join(project_folder, 'processed')

assert os.path.isdir( os.path.join(path, "spots", "signals")), "the directory 'processed/spots/signals' doesn't exist in the project," \
                                " did you forget to run the previous notebook?"

os.chdir(path)
os.getcwd()

'D:\\Code\\repos\\numan\\notebooks\\data\\2vs3vs5\\processed'

# Load experiment with the raw data and define conditions: 

In [None]:
experiment = vx.from_json(vx.Experiment,'experiment.json')
experiment.summary()

## Create lots of pdfs with lots of traces : 

Since we have so much different ways that a cell can be significant, I think I'll just out put all the cells that are significant in any way at all .. and will indicate the way that they are significant ... Not the best way of doing things in general.. but oh well... 

## Find the cells that are significant in any way 

In [156]:
def all_sig_summary(groups_per_spot, ever_significant):
    # sort by length and in alphabetical order
    groups_per_spot = np.array(groups_per_spot)[ever_significant].tolist()
    groups_per_spot.sort(key=len, reverse=True)
    groups_per_spot = np.array(groups_per_spot)

    n_sig = np.sum(ever_significant)
    n_tot = len(ever_significant)

    print(f"{n_sig}/{n_tot} spots are ever significant. \nBelow are their types.\n")
    for group_name in groups_per_spot:
        print(group_name)
        
def get_ever_significnt(spots, verbose = False):
    # different categories in which we test if a spot is significant 
    sig_categories = ["sig2v3", "sig2v5", "sig3v5", "sig2vB", "sig3vB", "sig5vB"] 
    groups_per_spot = spots.get_group_info(sig_categories).tolist()
    # significant in at least one category
    ever_significant = [len(group)>0 for group in groups_per_spot] 
    
    if verbose: 
        all_sig_summary(groups_per_spot, ever_significant)
        
    return ever_significant

In [None]:
spot_tag = "2v3"
print(f"\n{spot_tag}__________________________________________________")
spots = nu.Spots.from_json(f"spots/signals/spots_{spot_tag}.json")
ever_significant = get_ever_significnt(spots, verbose = True)
spots.add_groups({"sigAny2v3v5vB":ever_significant})
spots.to_json(f"spots/signals/spots_{spot_tag}.json")

spot_tag = "2v5"
print(f"\n{spot_tag}__________________________________________________")
spots = nu.Spots.from_json(f"spots/signals/spots_{spot_tag}.json")
ever_significant = get_ever_significnt(spots, verbose = True)
spots.add_groups({"sigAny2v3v5vB":ever_significant})
spots.to_json(f"spots/signals/spots_{spot_tag}.json")

spot_tag = "3v5"
print(f"\n{spot_tag}__________________________________________________")
spots = nu.Spots.from_json(f"spots/signals/spots_{spot_tag}.json")
ever_significant = get_ever_significnt(spots, verbose = True)
spots.add_groups({"sigAny2v3v5vB":ever_significant})
spots.to_json(f"spots/signals/spots_{spot_tag}.json")

spot_tag = "2vB"
print(f"\n{spot_tag}__________________________________________________")
spots = nu.Spots.from_json(f"spots/signals/spots_{spot_tag}.json")
ever_significant = get_ever_significnt(spots, verbose = True)
spots.add_groups({"sigAny2v3v5vB":ever_significant})
spots.to_json(f"spots/signals/spots_{spot_tag}.json")

spot_tag = "3vB"
print(f"\n{spot_tag}__________________________________________________")
spots = nu.Spots.from_json(f"spots/signals/spots_{spot_tag}.json")
ever_significant = get_ever_significnt(spots, verbose = True)
spots.add_groups({"sigAny2v3v5vB":ever_significant})
spots.to_json(f"spots/signals/spots_{spot_tag}.json")

spot_tag = "5vB"
print(f"\n{spot_tag}__________________________________________________")
spots = nu.Spots.from_json(f"spots/signals/spots_{spot_tag}.json")
ever_significant = get_ever_significnt(spots, verbose = True)
spots.add_groups({"sigAny2v3v5vB":ever_significant})
spots.to_json(f"spots/signals/spots_{spot_tag}.json")

Add the " ever significant" group to the spots groups to treat is as one of the spot types, and save the spots. 

## Create a folder and initialise the reports maker  

In [194]:
reports_folder = os.path.join(path, 'spots','reports', 'all_significant','signals')
os.makedirs(reports_folder)
rep = nu.Reports(path, experiment)

print(f"All reports will be in : {reports_folder}")

All reports will be in : D:/Code/repos/numan/notebooks/data/2vs3vs5/processed\spots\reports\all_significant\signals


## Make cycle plots: 
Without individual traces

In [187]:
spot_tags = ["2v3","2v5","3v5","2vB","3vB","5vB"]
for spot_tag in tqdm(spot_tags):
    rep.make_signal_reports(spot_tag, "sigAny2v3v5vB", 
            # types of plots: 
            plot_type = "cycle", 
            # just for the pdf naming : 
            # this is to be able to distinguish the pdfs with the same plot type, 
            # but errors are different or raw traces on/off or front_to_tail
            plot_type_tag = '',
            # wether or not you want to have the cells sorted on how many tests they passes
            sort_by_sig = True, 
            # front_to_tail will shift the cycleby the set number of voxels
            # so when set to 3, there are 3 blank volumes at the begining and at the end ...
            # if set to 0, will have 6 leading blanks and will end right after the 5 dots (black bar)
            front_to_tail=0,
            # what error type to use ( "sem" for SEM or "prc" for 5th - 95th percentile )
            error_type="sem",
            # wheather to plot the individual traces
            plot_individual=False,
            # the color of the individual traces (if shown)
            noise_color='-c')

With individual traces

In [181]:
spot_tags = ["2v3","2v5","3v5","2vB","3vB","5vB"]
for spot_tag in tqdm(spot_tags):
    rep.make_signal_reports(spot_tag, "sigAny2v3v5vB", 
                # types of plots: 
                plot_type = "cycle", 
                # just for the pdf naming : 
                # this is to be able to distinguish the pdfs with the same plot type, 
                # but errors are different or raw traces on/off or front_to_tail
                plot_type_tag = '_individ_traces',
                # wether or not you want to have the cells sorted on how many tests they passes
                sort_by_sig = True, 
                # front_to_tail will shift the cycleby the set number of voxels
                # so when set to 3, there are 3 blank volumes at the begining and at the end ...
                # if set to 0, will have 6 leading blanks and will end right after the 5 dots (black bar)
                front_to_tail=3,
                # what error type to use ( "sem" for SEM or "prc" for 5th - 95th percentile )
                error_type="sem",
                # wheather to plot the individual traces
                plot_individual=True,
                # the color of the individual traces (if shown)
                noise_color='-c')


## Make PSH plots: 
### Only stimulus 
List the cycle timepoints that you want to show and ovrelap ... 

In [None]:
# ATTENTION ! This is specific to the current experiment !!! 
# here it means that it will overlap timepoints 13, 27 and 43 from the cycle ( this is when we're showing 2 dots ) ... and will show it first on the plot , 
# then it will overlap time points 7, 37, 60 (dot 3 ) and show that .. etc 
time_points = [[13, 7, 20],[27, 37, 53],[43, 60, 70]]

In [189]:
spot_tags = ["2v3","2v5","3v5","2vB","3vB","5vB"]
for spot_tag in tqdm(spot_tags):
    rep.make_signal_reports(spot_tag, "sigAny2v3v5vB", 
                # types of plots: 
                plot_type = "psh_0", 
                # just for the pdf naming : 
                # this is to be able to distinguish the pdfs with the same plot type, 
                # but errors are different or raw traces on/off or front_to_tail
                plot_type_tag = '',
                # only show certain timepoints from the signal, for example : only 2 dots
                time_points=time_points,
                # wether or not you want to have the cells sorted on how many tests they passes
                sort_by_sig = True, 
                # what error type to use ( "sem" for SEM or "prc" for 5th - 95th percentile )
                error_type="sem",
                # wheather to plot the individual traces
                plot_individual=False,
                # the color of the individual traces (if shown)
                noise_color='-c')

In [None]:
spot_tags = ["2v3","2v5","3v5","2vB","3vB","5vB"]
for spot_tag in  tqdm(spot_tags):
    rep.make_signal_reports(spot_tag, "sigAny2v3v5vB", 
                # types of plots: 
                plot_type = "psh_0", 
                # just for the pdf naming : 
                # this is to be able to distinguish the pdfs with the same plot type, 
                # but errors are different or raw traces on/off or front_to_tail
                plot_type_tag = '_individ_traces',
                # only show certain timepoints from the signal, for example : only 2 dots
                time_points=time_points,
                # wether or not you want to have the cells sorted on how many tests they passes
                sort_by_sig = True, 
                # what error type to use ( "sem" for SEM or "prc" for 5th - 95th percentile )
                error_type="sem",
                # wheather to plot the individual traces
                plot_individual=True,
                # the color of the individual traces (if shown)
                noise_color='-c')

### Stimulus + some blanks before and after

In [191]:
def generate_timpoints(bb, ba, time_centers):
    """
    Adds the bb and ba number of blanks around each value in time_centers, 
    keeping the number of rows the same.
    """
    time_points = np.zeros((3,((bb + ba)+1)*3))
    for it, t in enumerate(time_centers):
        a,b,c = t
        a_long = np.concatenate((a - np.arange(bb + 1)[::-1] , a + 1 + np.arange(ba) ))
        b_long = np.concatenate((b - np.arange(bb + 1)[::-1] , b + 1 + np.arange(ba) ))
        c_long = np.concatenate((c - np.arange(bb + 1)[::-1] , c + 1 + np.arange(ba) ))
        time_points[it] = np.concatenate((a_long,b_long,c_long))
    time_points = time_points.astype(int)
    return time_points


# Choose the number of blanks before and after the stimulus
N_BLANKS_BEFORE_STIM = 3
N_BLANKS_AFTER_STIM = 5

# Create the timepoints to overlap in the right order
time_centers = [[13, 7, 20],[27, 37, 53],[43, 60, 70]]
time_points = generate_timpoints(N_BLANKS_BEFORE_STIM, N_BLANKS_AFTER_STIM, time_centers)
time_points[2,-N_BLANKS_AFTER_STIM:] = np.arange(N_BLANKS_AFTER_STIM)
# create a way to break the lines on the plot (to visually separate different stimuli)
signal_split = np.array([np.arange(N_BLANKS_BEFORE_STIM + N_BLANKS_AFTER_STIM + 1),
             np.arange(N_BLANKS_BEFORE_STIM + N_BLANKS_AFTER_STIM + 1)+(N_BLANKS_BEFORE_STIM + N_BLANKS_AFTER_STIM + 1), 
             np.arange(N_BLANKS_BEFORE_STIM + N_BLANKS_AFTER_STIM + 1)+(N_BLANKS_BEFORE_STIM + N_BLANKS_AFTER_STIM + 1)*2])

In [193]:
spot_tags = ["2v3","2v5","3v5","2vB","3vB","5vB"]
for spot_tag in tqdm(spot_tags):
    rep.make_signal_reports(spot_tag, "sigAny2v3v5vB", 
                # types of plots: 
                plot_type = "psh_b", 
                # just for the pdf naming : 
                # this is to be able to distinguish the pdfs with the same plot type, 
                # but errors are different or raw traces on/off or front_to_tail
                plot_type_tag = '',
                # only show certain timepoints from the signal, for example : only 2 dots
                time_points = time_points,
                # how to break the line 
                signal_split = signal_split,
                # draw vertical lines
                vlines = [8.5, 17.5],
                # wether or not you want to have the cells sorted on how many tests they passes
                sort_by_sig = True, 
                # what error type to use ( "sem" for SEM or "prc" for 5th - 95th percentile )
                error_type="sem",
                # wheather to plot the individual traces
                plot_individual=False,
                # the color of the individual traces (if shown)
                noise_color='-c')

In [180]:
spot_tags = ["2v3","2v5","3v5","2vB","3vB","5vB"]
for spot_tag in tqdm(spot_tags):
    rep.make_signal_reports(spot_tag, "sigAny2v3v5vB", 
                # types of plots: 
                plot_type = "psh_b", 
                # just for the pdf naming : 
                # this is to be able to distinguish the pdfs with the same plot type, 
                # but errors are different or raw traces on/off or front_to_tail
                plot_type_tag = '_individ_traces',
                # only show certain timepoints from the signal, for example : only 2 dots
                time_points = time_points,
                # how to break the line 
                signal_split = signal_split,
                # draw vertical lines
                vlines = [8.5, 17.5],
                # wether or not you want to have the cells sorted on how many tests they passes
                sort_by_sig = True, 
                # what error type to use ( "sem" for SEM or "prc" for 5th - 95th percentile )
                error_type="sem",
                # wheather to plot the individual traces
                plot_individual=True,
                # the color of the individual traces (if shown)
                noise_color='-c')