# Truncates the experiment to the volumes around the stimuli
* Defines the time annotation for the truncated experiment and saves to processed/stimuli_truncated_timelines.csv
* Takes about 3 min to run on beefsy


In [1]:
import os
import numpy as np
import pandas as pd

# data manager and analysis
import vodex as vx
import numan as nu

Matplotlib created a temporary config/cache directory at /tmp/matplotlib-55g5kvym because the default path (/home/ply/.config/matplotlib) is not a writable directory; it is highly recommended to set the MPLCONFIGDIR environment variable to a writable directory, in particular to speed up the import of Matplotlib and to better support multiprocessing.


### Packages and versions when this notebook was last tested:

In [2]:
!pip list

Package             Version   Editable project location
------------------- --------- -------------------------------
antspyx             0.3.8
asttokens           2.2.1
attrs               23.1.0
backcall            0.2.0
beautifulsoup4      4.12.2
bleach              6.0.0
certifi             2023.5.7
charset-normalizer  3.1.0
chart-studio        1.1.0
comm                0.1.3
contourpy           1.1.0
cycler              0.11.0
debugpy             1.6.7
decorator           5.1.1
defusedxml          0.7.1
executing           1.2.0
fastjsonschema      2.17.1
fonttools           4.40.0
idna                3.4
imageio             2.31.1
ipykernel           6.23.3
ipython             8.14.0
jedi                0.18.2
Jinja2              3.1.2
joblib              1.2.0
jsonschema          4.17.3
jupyter_client      8.3.0
jupyter_core        5.3.1
jupyterlab-pygments 0.2.2
kiwisolver          1.4.4
lazy_loader         0.2
MarkupSafe          2.1.3
matplotlib          3.7.1
matplotlib-inli

In [3]:
# please provedi FULL, not relative, path to the folder 
project_folder = "/home/ply/repos/numan_dev/numan/data/hz09"
project = nu.Project(project_folder)

In [4]:
project.create("processed")

We will also set the processed directory as our working directory, this step is important , since all the paths later are relative to this folder. Verify that the output of the cell is the "processed" folder inside your project folder. 

In [5]:
project.activate("processed")
os.getcwd()

'/home/ply/repos/numan_dev/numan/data/hz09/processed'

# Define experiment: 

* First you need to provide the path to the folder with the **raw data**, ```data_dir```. Note that by default the code will search for all the .tif files in that folder and will treat it as raw data. While you can exclude the unwanted files later, it is recommended that three is only raw tif data in that folder. Make sure to use either ```\\``` or ```/``` as a separator, not ```\```, as it is an escape character in python.

* provide the number of **frames per volume**

* Then you need to define the **labels**.

* Then, if your experiment has a **repeating cycle**, you will need to order labels as they appear in a cycle in and provide the corresponding duration for each label. Note that the duration is in frames, as they appear in your image data, not in seconds, not in volumes.

First let's output the annotations for the experiment. Check that the output is correct.

In [6]:
# output all annotation tables from csv files for future reference
col_names = ['duration_frames','duration_seconds','duration_volumes','name','group','description']
numerosity_df = pd.read_csv("/home/ply/repos/numan_dev/numan/data/hz09/number_cycle.csv", usecols=col_names)
shape_df = pd.read_csv("/home/ply/repos/numan_dev/numan/data/hz09/shape_cycle.csv", usecols=col_names)
spread_df = pd.read_csv("/home/ply/repos/numan_dev/numan/data/hz09/spread_cycle.csv", usecols=col_names)

# mege into one table with muti-level columns 
df = pd.concat([numerosity_df, shape_df, spread_df], axis = 1).replace(np.nan, '')
df.columns = pd.MultiIndex.from_product([['numerosity', 'shape', 'spread'],col_names])
df

Unnamed: 0_level_0,numerosity,numerosity,numerosity,numerosity,numerosity,numerosity,shape,shape,shape,shape,shape,shape,spread,spread,spread,spread,spread,spread
Unnamed: 0_level_1,duration_frames,duration_seconds,duration_volumes,name,group,description,duration_frames,duration_seconds,duration_volumes,name,group,description,duration_frames,duration_seconds,duration_volumes,name,group,description
0,540,9,9,b,number,"blank, no dots",540,9,9,b,shape,"blank, shape doesn't apply",20520.0,342.0,342.0,ch,spread,equivalent convex hull
1,60,1,1,d4,number,4 dots on the screen,60,1,1,cr,shape,constant radius,20520.0,342.0,342.0,id,spread,equivalent inter-distance
2,1620,27,27,b,number,"blank, no dots",1620,27,27,b,shape,"blank, shape doesn't apply",,,,,,
3,60,1,1,d3,number,3 dots on the screen,60,1,1,cr,shape,constant radius,,,,,,
4,1260,21,21,b,number,"blank, no dots",1260,21,21,b,shape,"blank, shape doesn't apply",,,,,,
5,60,1,1,d5,number,5 dots on the screen,60,1,1,cr,shape,constant radius,,,,,,
6,900,15,15,b,number,"blank, no dots",900,15,15,b,shape,"blank, shape doesn't apply",,,,,,
7,60,1,1,d2,number,2 dots on the screen,60,1,1,cr,shape,constant radius,,,,,,
8,1260,21,21,b,number,"blank, no dots",1260,21,21,b,shape,"blank, shape doesn't apply",,,,,,
9,60,1,1,d1,number,1 dot on the screen,60,1,1,cr,shape,constant radius,,,,,,


Now...
This will initialise experiment and output the experiment information. Read it carefully! Make sure it is all as expected! Any mistake at this step will make all the future analysis wrong.

In [7]:
# files info
data_dir = "/home/ply/repos/numan_dev/numan/data/hz09/20230601_Hz09_casper_h2bcamp7f_7dpf_60Z_1hzvol_2P_1v2v3v4v5_1"

# volumes info
frames_per_volume = 60
# starting_slice is zero if the recording starts exactly at the beginning of a full volume, 
# must be set to a slice where the recording starts (zero-based), slices > 0 if the recording starts somewhere in the middle of the volume:
starting_slice = 0 

# initialise experiment
experiment = vx.Experiment.from_dir(data_dir, frames_per_volume,starting_slice, verbose=True)
# Check that the files are named correctly, i.e. Pos0_01.tif, Pos0_02.tif, etc. 
# ( NOT Pos0_1.tif, Pos0_2.tif, etc.)
for file in experiment.file_names:
    if "Pos0_1." in file:
        raise ValueError("Please rename the files to Pos0_01.tif, Pos0_02.tif, etc.")

# add all stimulus annotations
experiment.add_annotations_from_df(numerosity_df, cycles = True)
experiment.add_annotations_from_df(shape_df, cycles = True)
experiment.add_annotations_from_df(spread_df, cycles = True)

experiment.labels_df

# PS: <tifffile.TiffTag 270 @269649> coercing invalid ASCII to bytes seems to be okay

<tifffile.TiffTag 270 @269649> coercing invalid ASCII to bytes
<tifffile.TiffTag 270 @269649> coercing invalid ASCII to bytes
<tifffile.TiffTag 270 @269649> coercing invalid ASCII to bytes
<tifffile.TiffTag 270 @269649> coercing invalid ASCII to bytes
<tifffile.TiffTag 270 @269649> coercing invalid ASCII to bytes
<tifffile.TiffTag 270 @269649> coercing invalid ASCII to bytes
<tifffile.TiffTag 270 @269649> coercing invalid ASCII to bytes
<tifffile.TiffTag 270 @269649> coercing invalid ASCII to bytes
<tifffile.TiffTag 270 @269649> coercing invalid ASCII to bytes
<tifffile.TiffTag 270 @269649> coercing invalid ASCII to bytes
<tifffile.TiffTag 270 @269649> coercing invalid ASCII to bytes
<tifffile.TiffTag 270 @269649> coercing invalid ASCII to bytes
<tifffile.TiffTag 270 @269649> coercing invalid ASCII to bytes


Image files information :

files directory: /home/ply/repos/numan_dev/numan/data/hz09/20230601_Hz09_casper_h2bcamp7f_7dpf_60Z_1hzvol_2P_1v2v3v4v5_1
files [number of frames]: 
0) 20230601_Hz09_casper_h2bcamp7f_7dpf_60Z_1hzvol_2P_1v2v3v4v5_1_MMStack_Pos0.ome.tif [12877]
1) 20230601_Hz09_casper_h2bcamp7f_7dpf_60Z_1hzvol_2P_1v2v3v4v5_1_MMStack_Pos0_01.ome.tif [12876]
2) 20230601_Hz09_casper_h2bcamp7f_7dpf_60Z_1hzvol_2P_1v2v3v4v5_1_MMStack_Pos0_02.ome.tif [12876]
3) 20230601_Hz09_casper_h2bcamp7f_7dpf_60Z_1hzvol_2P_1v2v3v4v5_1_MMStack_Pos0_03.ome.tif [12876]
4) 20230601_Hz09_casper_h2bcamp7f_7dpf_60Z_1hzvol_2P_1v2v3v4v5_1_MMStack_Pos0_04.ome.tif [12876]
5) 20230601_Hz09_casper_h2bcamp7f_7dpf_60Z_1hzvol_2P_1v2v3v4v5_1_MMStack_Pos0_05.ome.tif [12876]
6) 20230601_Hz09_casper_h2bcamp7f_7dpf_60Z_1hzvol_2P_1v2v3v4v5_1_MMStack_Pos0_06.ome.tif [12876]
7) 20230601_Hz09_casper_h2bcamp7f_7dpf_60Z_1hzvol_2P_1v2v3v4v5_1_MMStack_Pos0_07.ome.tif [12876]
8) 20230601_Hz09_casper_h2bcamp7f_7dpf_60Z_1hzvol_2P

Unnamed: 0,annotation,label,description
0,number,b,"blank, no dots"
1,number,d1,1 dot on the screen
2,number,d2,2 dots on the screen
3,number,d3,3 dots on the screen
4,number,d4,4 dots on the screen
5,number,d5,5 dots on the screen
6,shape,b,"blank, shape doesn't apply"
7,shape,cr,constant radius
8,shape,ta,equivalent total area
9,shape,tp,equivalent total perimeter


**Make sure everything above is correct** and then , if it is , save it for future use.
This will create a database, for more information see https://lemonjust.github.io/vodex/db/

In [8]:
experiment.save("experiment_raw.db")

Copied 20264 of 20264 pages...


# Select only the volumes that will be used for analysis

We will use one point before the stimulation and three points after the stimulation... however to create a dff movie we need to have the whole sliding window. 
So we will process the volumes that a half a sliding window before the stimulus volumes minus 1 and a half a sliding window after the last volume plus three.
Let's see how much computation it will save us:


In [9]:
# number of volumes befor and after the stimuli 
# to include in the analysis:
tp_before = 3
tp_after = 5

# grad the stimuli volumes from the experiment
stimuli_volumes = experiment.choose_volumes([('number', 'd1'),('number', 'd2'),('number', 'd3'),('number', 'd4'),('number', 'd5')], logic='or')
print("Original stimuli volumes:")
print(stimuli_volumes[0])
print(stimuli_volumes[1])
print(stimuli_volumes[2])
print(f"Total number of stimuli volumes: {len(stimuli_volumes)}")

# add the volume before the stimuli and three volumes after the stimuli and half a sliding window before and after
stimuli_volumes_extended = np.array([np.arange(v - tp_before, v + tp_after + 1) for v in stimuli_volumes])
print("Extended stimuli volumes:")
print(stimuli_volumes_extended[0])
print(stimuli_volumes_extended[1])
print(stimuli_volumes_extended[2])

print("Flattened extended stimuli volumes:")
stimuli_volumes_extended = stimuli_volumes_extended.flatten()
print(stimuli_volumes_extended)
print(f"Total number of volumes: {len(stimuli_volumes_extended)}")

Original stimuli volumes:
9
37
59
Total number of stimuli volumes: 240
Extended stimuli volumes:
[ 6  7  8  9 10 11 12 13 14]
[34 35 36 37 38 39 40 41 42]
[56 57 58 59 60 61 62 63 64]
Flattened extended stimuli volumes:
[   6    7    8 ... 5456 5457 5458]
Total number of volumes: 2160


In [10]:
# Get annotation information for these time points
# ( this is slow .. ~2 min on beefsy) 
stimuli_volumes_df = pd.DataFrame(experiment.get_volume_annotations(stimuli_volumes_extended.flatten()))
print(stimuli_volumes_df)
stimuli_volumes_df.to_csv("/home/ply/repos/numan_dev/numan/data/hz09/processed/stimuli_truncated_timelines.csv", index=False)

     number shape spread  volumes
0         b     b     ch        6
1         b     b     ch        7
2         b     b     ch        8
3        d4    cr     ch        9
4         b     b     ch       10
...     ...   ...    ...      ...
2155      b     b     id     5454
2156      b     b     id     5455
2157      b     b     id     5456
2158      b     b     id     5457
2159      b     b     id     5458

[2160 rows x 4 columns]
