In [None]:
save_dsets = False

save_figs = True  # set to True to save the figures created
save_as_eps = False
save_as_pdf = True
if save_as_pdf:
    file_format = ".pdf"
elif save_as_eps:
    file_format = ".eps"
else:
    file_format = ".jpg"
if save_figs:
    print(f"Going to save figures as {file_format} files.")

In [None]:
#Auto-reload modules (used to develop functions outside this notebook)
%load_ext autoreload
%autoreload 2

In [None]:
import numpy as np
import h5py
from nd2_to_caiman import np_arr_from_nd2
import labrotation.file_handling as fh
from matplotlib import pyplot as plt
from matplotlib.collections import PatchCollection
from matplotlib.patches import Rectangle
import matplotlib.colors as mcolors
import matplotlib as mpl
from math import floor, ceil, sqrt, atan2, acos, pi, sin, cos
from datetime import datetime
import json
from labrotation import json_util
import scipy
from scipy import ndimage
from scipy.spatial import distance_matrix
from scipy.stats import circmean, circstd  # for statistical testing on directionality
import datadoc_util
from statsmodels.nonparametric.smoothers_lowess import lowess
import pandas as pd
import seaborn as sns
import multiprocess as mp  # multiprocessing does not work with IPython. Use fork instead.
import os
import random  # for surrogate algorithm
from collections.abc import Iterable
import math
from functools import partial

In [None]:
mpl.rcParams.update({'font.size': 20})

In [None]:
sns.set(font_scale=1.5)
sns.set_style("whitegrid")
color_palette = sns.color_palette("deep")

In [None]:
def get_datetime_for_fname():
    now = datetime.now()
    return f"{now.year:04d}{now.month:02d}{now.day:02d}-{now.hour:02d}{now.minute:02d}{now.second:02d}"

In [None]:
grid_shape = (8,8)

In [None]:
env_dict = dict()
if not os.path.exists("./.env"):
    print(".env does not exist")
else:
    with open("./.env", "r") as f:
        for line in f.readlines():
            l = line.rstrip().split("=")
            env_dict[l[0]] = l[1]
print(env_dict.keys())

In [None]:
if "DATA_DOCU_FOLDER" in env_dict.keys():
    data_docu_folder = env_dict["DATA_DOCU_FOLDER"]
else:
    data_docu_folder = fh.open_dir("Open Data Documentation folder")
print(data_docu_folder)

In [None]:
ddoc = datadoc_util.DataDocumentation(data_docu_folder)
ddoc.loadDataDoc()

In [None]:
df_colors = ddoc.getColorings()

In [None]:
dict_mouse_id_color = {row["mouse_id"]: row["color"] for i_row, row in df_colors.iterrows()}

In [None]:
output_folder = env_dict["DOWNLOADS_FOLDER"]
print(f"Output files will be saved to {output_folder}")

## Open files and get uuid

In [None]:
df_id_uuid = ddoc.getIdUuid()

In [None]:
analysis_folder = fh.open_dir("Open directory with analysis (grid) data for all mice!")

In [None]:
grid_files_list = []
for root, dirs, files in os.walk(analysis_folder):
    for fname in files:
        if "_grid.h5" in fname:
            grid_files_list.append(os.path.join(root,fname))

In [None]:
uuid_dict = dict()
exp_type_dict = dict()
uuid_exp_type_dict = dict()
for grid_fpath in grid_files_list:
    # ..._grid.h5 -> ..._cnmf.hdf5
    cnmf_fpath = os.path.join(os.path.split(grid_fpath)[0], os.path.split(grid_fpath)[-1][:-7] + "cnmf.hdf5")
    with h5py.File(cnmf_fpath, 'r') as hf:
        uuid = hf.attrs["uuid"]
        exp_type = ddoc.getExperimentTypeForUuid(uuid)
        uuid_dict[grid_fpath] = hf.attrs["uuid"]
        exp_type_dict[grid_fpath] = exp_type
        uuid_exp_type_dict[hf.attrs["uuid"]] = exp_type

## Combine all results into one dataframe

In [None]:
cols_set = set()
for fpath in grid_files_list:
    df = pd.read_hdf(fpath)
    for key in df.keys():
        cols_set.add(key)
cols_set.add("uuid")
cols_set.add("mouse_id")
cols_set.add("exp_type")

In [None]:
# defining empty dataframe does not work, as all data types will be object (except x, y, which will be proper integers)
all_onsets_df = pd.read_hdf(grid_files_list[0])
all_onsets_df["uuid"] = uuid_dict[grid_files_list[0]]
all_onsets_df["mouse_id"] = df_id_uuid[df_id_uuid["uuid"] == uuid_dict[grid_files_list[0]]]["mouse_id"].values[0]
all_onsets_df["exp_type"] = exp_type_dict[grid_files_list[0]]

assert all_onsets_df["uuid"].isna().sum() == 0
for fpath in grid_files_list[1:]:
    df = pd.read_hdf(fpath)
    df["uuid"] = uuid_dict[fpath]
    df["mouse_id"] = df_id_uuid[df_id_uuid["uuid"] == uuid_dict[fpath]]["mouse_id"].values[0]
    df["exp_type"] = exp_type_dict[fpath]
    
    assert df["uuid"].isna().sum() == 0
    assert df["exp_type"].isna().sum() == 0
    
    all_onsets_df = pd.concat([all_onsets_df, df])

### Remove 5% most deviant onset from median for each category
Wenzel 2017 (recruitment reliability) page 8, below fig 5 (not caption)

### Make sure to have integer and float data types for the columns, and not object! (int16, int64, float64)

In [None]:
# for old files containing onset data, n_seizures was not present, as one of the last recordings processed contained 2. 
# As a result, most of i_sz values are NaN; these contain 1 sz. Otherwise 0, 1... are the seizure indices.
if "i_sz" not in all_onsets_df:  # in case we did not need it, still have the column
    all_onsets_df["i_sz"] = np.nan
all_onsets_df["i_sz"].unique()

In [None]:
# make seizures unique in uuid_extended
def append_uuid(row):
    if pd.isna(row['i_sz']):
        return row['uuid']
    elif row['i_sz'] >= 0:
        return row['uuid'] + '_' + str(row["i_sz"]+1)
all_onsets_df['uuid_extended'] = all_onsets_df.apply(append_uuid, axis=1)

In [None]:
g = all_onsets_df.groupby("mouse_id")
for group in g:
    print(group[0])
    g2 = group[1].groupby("uuid")
    for grp in g2:
        print("\t" + grp[0])
        print("\t" + str(len(grp[1])) + "\n")

`all_onsets_df` now contains each recording with seizure and/or SD each neuron. For each neuron, there is a value for onset of each SD and seizure wave (NaN for all neurons in a session if none occurred in the recording) 

In [None]:
def replace_outliers(group, colname):
    # calculate 5% highest deviation threshold
    # get deviations
    if colname not in group.keys():
        print(group)
        raise Exception

    median_colname = group[colname].dropna().median()

    deviations = np.abs(group[colname] - median_colname)
    deviations_nonan = np.abs(group[colname].dropna() - median_colname)
    if len(deviations_nonan) == 0:  # empty column (only NaN): skip outlier removal
        return group
    # sort in descending order
    deviations_sorted_desc = np.flip(np.sort(deviations_nonan))
    # get 5% threshold deviation value
    threshold_percent = 0.05  # 5% threshold
    deviation_threshold = deviations_sorted_desc[ceil(threshold_percent*len(deviations_sorted_desc))]
    if sum(deviations > deviation_threshold) == 0:
        # in this case, most likely the outliers are not true outliers, but the range of onset is small.
        pass
    n_nan = group[colname].isna().sum()
    group.loc[deviations > deviation_threshold, colname] = np.nan
    n_nan_post = group[colname].isna().sum()
    #print(f"{n_nan} -> {n_nan_post} (should be {ceil(threshold_percent*len(deviations_sorted_desc))})")
    return group


In [None]:
replace_onset1_outliers = partial(replace_outliers, colname="onset1")
replace_onset2_outliers = partial(replace_outliers, colname="onset2")
replace_onsetsz_outliers = partial(replace_outliers, colname="onset_sz")

In [None]:
all_onsets_df_outliers_removed = all_onsets_df.groupby("uuid_extended").apply(replace_onset1_outliers).groupby("uuid_extended").apply(replace_onset2_outliers).groupby("uuid_extended").apply(replace_onsetsz_outliers)

### Overtake removal of the outliers

In [None]:
remove_outliers = True
if remove_outliers:
    all_onsets_df = all_onsets_df_outliers_removed

## SD speed based on grid approach

In [None]:
def SDSpeedsTileAlgorithm(df_onsets_input, i_wave):  
    # i_wave should be 1 or 2
    # returns a tuple:
    # uuids: a list of the uuids, and a 2d list of velocities: an array of all calculated velocities per session (uuid_extended)
    uuids = []
    vs_2d = []
    
    
    for i_group, session_group in df_onsets_input[df_onsets_input[f"onset{i_wave}"].notna()].groupby("uuid_extended"):
        exp_type = session_group["exp_type"].iloc[0]
        assert len(session_group["exp_type"].unique()) == 1
        tiles_group = session_group.groupby("tile").median()  # TODO: the center values should be mean, not median!
        x_y_onset = np.array([tiles_group["x"], tiles_group["y"], tiles_group["onset" + str(i_wave)]])
        x_y_onset = x_y_onset.T  # x_y_onset1[i] = [x_i, y_i, onset1_i]
        n_tiles = len(x_y_onset)
        
        # 1. find all tiles with later onset
        #      boolean array of arrays: in a row i, value at index j is True if onset j is greater than onset i. 
        larger_values = x_y_onset[:, 2][:, np.newaxis] < x_y_onset[:, 2]
        #      convert True/False into index. Use fact that within a row, i-th element corresponds to index i. Put np.inf if not larger
        larger_indices = np.where(larger_values, np.arange(n_tiles), np.inf)
        # 2. find all tile distances
        dist_matrix = distance_matrix(x_y_onset[:,:2],x_y_onset[:,:2])
        #      dist_matrix: each row contains distance to all the other tiles. inf if same tile! (diagonal)
        assert (dist_matrix == dist_matrix.T).all()  # symmetric
        np.fill_diagonal(dist_matrix, np.inf)  # exclude tile itself from being nearest neighbor
        later_tiles_distances = np.where(np.isfinite(larger_indices), dist_matrix, np.inf)
        nearest_indices_later_onset = np.argmin(later_tiles_distances, axis=1)
        vs = np.zeros(n_tiles)
        for i_tile, tile_nearest_indices in enumerate(nearest_indices_later_onset):
            if np.isinf(later_tiles_distances[i_tile]).all():  #  a later onset neuron is actually found
                continue
            else:
                i_nearest_later = tile_nearest_indices
                ds = dist_matrix[i_tile][i_nearest_later] * 1.579  # objective conversion factor  -> [pixel] * [µm] / [pixel]
                dt = (x_y_onset[i_nearest_later][2] - x_y_onset[i_tile][2]) / 15.0  # [frames] / ([frames]/[second]) 
                vs[i_tile] = ds/dt
        vs_2d.append(vs)
        uuids.append(i_group)
    vs_flat = [item for vs_row in vs_2d for item in vs_row]
    v_median = np.median(vs_flat)
    print(f"{v_median} µm/s = {v_median*6./100.} mm/min") 
    fig = plt.figure(figsize=(16,8))
    plt.hist(vs_flat, bins=150)
    plt.show()
    return (uuids, vs_2d)  # in µm/s

In [None]:
uuids_grid1, vs_grid1 = SDSpeedsTileAlgorithm(all_onsets_df, 1)

In [None]:
uuids_grid2, vs_grid2 = SDSpeedsTileAlgorithm(all_onsets_df, 2)

## SD speed based on cell approach
Algorithm stays same.

In [None]:
later_neurons_distances = [[1., 2.5, np.inf, 2.4], [np.inf, 1.5, np.inf, np.inf], [np.inf, np.inf, np.inf, np.inf], [1.6, 1.8, 2.5, 1.]]

In [None]:
nearest_indices_later_onset = np.argsort(later_neurons_distances, axis=1)[:,:1]

In [None]:
def SDSpeedsCellAlgorithm(df_onsets_input, i_wave, n_neighbors=1, plot_res = False, vectorize = False, onset_sz = False):  
    # i_wave should be 1 or 2
    # n_neighbors: average the closest n_neighbors cells (with a later onset)
    # plot_res: whether to plot the results (histogram with all velocities)
    # vectorize: whether to return not only the velocity, but in addition, the 2d vector velocity, as well as the centre of the neuron.
    # returns a tuple:
    # uuids: a list of the uuids, and a 2d list of velocities: an array of all calculated velocities per session (uuid_extended)
    uuids = []
    vs_2d = []
    neuron_ids = np.array([], dtype=np.int16)
    
    if onset_sz:
        onset_type = "onset_sz"
    else:
        onset_type = "onset" + str(i_wave)
    
    if vectorize:
        dx_2d = []
        dy_2d = []
        centres_x = np.array([])  # the centre coordinate of each neuron. Same as "x" column in all_onsets_df.
        centres_y = np.array([])
    for i_group, session_group in df_onsets_input[df_onsets_input[onset_type].notna()].groupby("uuid_extended"):
        # TODO: the center values should be mean, not median!
        x_y_onset = np.array([session_group["x"], session_group["y"], session_group[onset_type]])
        x_y_onset = x_y_onset.T  # x_y_onset1[i] = [x_i, y_i, onset1_i]
        n_neurons = len(x_y_onset)
        neuron_ids_curr_session = np.array(session_group["neuron_id"], dtype=np.int16)
        neuron_ids = np.concatenate([neuron_ids, neuron_ids_curr_session])
        # contains (mean) x/y distance to nearest neighbor for each neuron
        dx_session = np.zeros(n_neurons, dtype=np.float64)
        dy_session = np.zeros(n_neurons, dtype=np.float64)
        
        # 1. find all neurons with later onset
        #      boolean array of arrays: in a row i, value at index j is True if onset j is greater than onset i. 
        larger_values = x_y_onset[:, 2][:, np.newaxis] < x_y_onset[:, 2]
        #      convert True/False into index. Use fact that within a row, i-th element corresponds to index i. Put np.inf if not larger
        larger_indices = np.where(larger_values, np.arange(n_neurons), np.inf)
        # 2. find all neuron distances
        dist_matrix = distance_matrix(x_y_onset[:,:2],x_y_onset[:,:2])
        #      dist_matrix: each row contains distance to all the other tiles. inf if same tile! (diagonal)
        assert (dist_matrix == dist_matrix.T).all()  # symmetric
        np.fill_diagonal(dist_matrix, np.inf)  # exclude tile itself from being nearest neighbor
        # find distances neurons with later onset
        later_neurons_distances = np.where(np.isfinite(larger_indices), dist_matrix, np.inf)
        # find closest neurons with later onset
        nearest_indices_later_onset = np.argsort(later_neurons_distances, axis=1)[:,:n_neighbors]
        # calculate velocity with all neighbors above
        vs = np.zeros(n_neurons)
        if vectorize:
            dxs = np.zeros(n_neurons)
            dys = np.zeros(n_neurons)
        for i_neuron, neuron_nearest_indices in enumerate(nearest_indices_later_onset):
            if np.isinf(later_neurons_distances[i_neuron]).all():  #  a later onset neuron is actually found
                continue
            else:       
                if isinstance(neuron_nearest_indices, Iterable):
                    v_neighbors_list = np.zeros(len(neuron_nearest_indices))  
                    if vectorize:
                        dx_neighbors_list = np.zeros(len(neuron_nearest_indices))  
                        dy_neighbors_list = np.zeros(len(neuron_nearest_indices))  
                    
                    for i_neighbor, index_neighbor in enumerate(neuron_nearest_indices):
                        ds = dist_matrix[i_neuron][index_neighbor] * 1.579  # objective conversion factor  -> [pixel] * [µm] / [pixel]
                        dt = (x_y_onset[index_neighbor][2] - x_y_onset[i_neuron][2]) / 15.0  # [frames] / ([frames]/[second]) 
                        v_neighbor = ds/dt
                        v_neighbors_list[i_neighbor] = v_neighbor
                        if vectorize:  # 
                            # get x, y of current neighbor
                            x_nearest = x_y_onset[index_neighbor][0]
                            y_nearest = x_y_onset[index_neighbor][1]
                            # get x, y of current neuron
                            x_curr = x_y_onset[i_neuron][0]
                            y_curr = x_y_onset[i_neuron][1]
                            # get dx, dy
                            dx = x_nearest - x_curr
                            dy = y_nearest - y_curr
                            dx_neighbors_list[i_neighbor] = dx
                            dy_neighbors_list[i_neighbor] = dy
                        
                        
                    vs[i_neuron] = np.median(v_neighbors_list) 
                    if vectorize:
                        dxs[i_neuron] = np.mean(dx_neighbors_list)
                        dys[i_neuron] = np.mean(dy_neighbors_list)
                        
                else:
                    ds = dist_matrix[i_neuron][neuron_nearest_indices[0]] * 1.579  # objective conversion factor  -> [pixel] * [µm] / [pixel]
                    dt = (x_y_onset[neuron_nearest_indices[0]][2] - x_y_onset[i_neuron][2]) / 15.0  # [frames] / ([frames]/[second]) 
                    vs[i_neuron] = ds/dt
        vs_2d.append(vs)
        uuids.append(i_group)
        if vectorize:
            centres_x = np.concatenate([centres_x, session_group["x"]])
            centres_y = np.concatenate([centres_y, session_group["y"]])
            dx_2d.append(dxs)
            dy_2d.append(dys)
            
    vs_flat = [item for vs_row in vs_2d for item in vs_row]
    v_median = np.median(vs_flat)
    print(f"{v_median} µm/s = {v_median*6./100.} mm/min") 
    if plot_res:
        fig = plt.figure(figsize=(16,8))
        plt.hist(vs_flat, bins=150)
        plt.show()
    if vectorize:
        return (uuids, neuron_ids, vs_2d, dx_2d, dy_2d, centres_x, centres_y)
    else:
        return (uuids, neuron_ids, vs_2d)  # in µm/s
    

### Set number of neighbors to find

In [None]:
N_NEIGHBORS = 1

In [None]:
uuids_neuron1, ids_neuron1, vs_neuron1 = SDSpeedsCellAlgorithm(all_onsets_df, 1,N_NEIGHBORS)

In [None]:
uuids_neuron2, ids_neuron2, vs_neuron2 = SDSpeedsCellAlgorithm(all_onsets_df, 2,N_NEIGHBORS)

In [None]:
uuids_neuron_sz, ids_neuron_sz, vs_neuron_sz = SDSpeedsCellAlgorithm(all_onsets_df, 2,1,False,False,True)

## Create dataframe from results

### Sz

In [None]:
# flatten all arrays
vs_neuron_sz_flat = [element for sublist in vs_neuron_sz for element in sublist]
uuids_neuron_sz_flat = [uuids_neuron_sz[i] for i, neurons in enumerate(vs_neuron_sz) for j in range(len(neurons))]
assert len(vs_neuron_sz_flat) == len(uuids_neuron_sz_flat)
# create mean velocity for all sessions
vs_neuron_sz_mean = [np.median(element) for element in vs_neuron_sz]

### SD 1

In [None]:
# flatten all arrays
vs_neuron1_flat = [element for sublist in vs_neuron1 for element in sublist]
uuids_neuron1_flat = [uuids_neuron1[i] for i, neurons in enumerate(vs_neuron1) for j in range(len(neurons))]
assert len(vs_neuron1_flat) == len(uuids_neuron1_flat)
# create mean velocity for all sessions
vs_neuron1_mean = [np.median(element) for element in vs_neuron1]

In [None]:
# grid algorithm
# flatten all arrays
vs_grid1_flat = [element for sublist in vs_grid1 for element in sublist]
uuids_grid1_flat = [uuids_grid1[i] for i, tiles in enumerate(vs_grid1) for j in range(len(tiles))]
assert len(vs_grid1_flat) == len(uuids_grid1_flat)
# create mean velocity for all sessions
vs_grid1_mean = [np.median(element) for element in vs_grid1]

### SD 2

In [None]:
vs_neuron2_flat = [element for sublist in vs_neuron2 for element in sublist]
uuids_neuron2_flat = [uuids_neuron1[i] for i, neurons in enumerate(vs_neuron2) for j in range(len(neurons))]
assert len(vs_neuron2_flat) == len(uuids_neuron2_flat)
# create mean velocity for all sessions
vs_neuron2_mean = [np.mean(element) for element in vs_neuron2]

In [None]:
for a in vs_neuron2:
    print(len(np.isnan(a)))
    print(np.mean(a))
    print(np.mean(a[~np.isnan(a)]))
    print()

In [None]:
vs_grid2_flat = [element for sublist in vs_grid2 for element in sublist]
uuids_grid2_flat = [uuids_grid1[i] for i, tiles in enumerate(vs_grid2) for j in range(len(tiles))]
assert len(vs_grid2_flat) == len(uuids_grid2_flat)
# create mean velocity for all sessions
vs_grid2_mean = [np.mean(element) for element in vs_grid2]

## Create data frames

In [None]:
# grid-based algorithm
vs_grid_df1 = pd.DataFrame({"uuid": uuids_grid1_flat, "v_umps": vs_grid1_flat, "i_wave": 1})  # all velocities calculated
vs_grid_df1_means = pd.DataFrame({"uuid": uuids_grid1, "v_umps": vs_grid1_mean, "i_wave": 1})
vs_grid_df2 = pd.DataFrame({"uuid": uuids_grid2_flat, "v_umps": vs_grid2_flat, "i_wave": 2})
vs_grid_df2_means = pd.DataFrame({"uuid": uuids_grid2, "v_umps": vs_grid2_mean, "i_wave": 2})

# reset index, but keep old index just in case
vs_grid_df = pd.concat([vs_grid_df1, vs_grid_df2], axis=0).reset_index()
vs_grid_df_means = pd.concat([vs_grid_df1_means, vs_grid_df2_means], axis=0).reset_index()

In [None]:
# neuron-based algorithm
vs_df1 = pd.DataFrame({"uuid": uuids_neuron1_flat, "v_umps": vs_neuron1_flat, "i_wave": 1})  # all velocities calculated
vs_df1_means = pd.DataFrame({"uuid": uuids_neuron1, "v_umps": vs_neuron1_mean, "i_wave": 1})
vs_df2 = pd.DataFrame({"uuid": uuids_neuron2_flat, "v_umps": vs_neuron2_flat, "i_wave": 2})  # all velocities calculated
vs_df2_means = pd.DataFrame({"uuid": uuids_neuron2, "v_umps": vs_neuron2_mean, "i_wave": 2})

vs_df_sz = pd.DataFrame({"uuid": uuids_neuron_sz_flat, "v_umps": vs_neuron_sz_flat, "i_wave": 0})  # all velocities calculated
vs_df_sz_means = pd.DataFrame({"uuid": uuids_neuron_sz, "v_umps": vs_neuron_sz_mean, "i_wave": 1})

# reset index, but keep old index just in case
vs_df = pd.concat([vs_df1, vs_df2], axis=0).reset_index()
vs_df_means = pd.concat([vs_df1_means, vs_df2_means], axis=0).reset_index()

vs_df_sz = vs_df_sz.reset_index()
vs_df_sz_means = vs_df_sz_means.reset_index()


In [None]:
def extended_to_normal_uuid(uuid):
    if "_" in uuid:
        return uuid.split("_")[0]
    else:
        return uuid

In [None]:
# get rid of 0 values
vs_df = vs_df[vs_df["v_umps"] > 0.0]
vs_df_sz = vs_df_sz[vs_df_sz["v_umps"] > 0.0]

In [None]:
# todo: check this warning. Maybe it is due to the split() and it is not a problem?
vs_df["mouse_id"] = vs_df.apply(lambda row: ddoc.getMouseIdForUuid(extended_to_normal_uuid(row["uuid"])), axis=1)
vs_df_sz["mouse_id"] = vs_df_sz.apply(lambda row: ddoc.getMouseIdForUuid(extended_to_normal_uuid(row["uuid"])), axis=1)

In [None]:
vs_df["exp_type"] = vs_df.apply(lambda row: uuid_exp_type_dict[extended_to_normal_uuid(row["uuid"])], axis=1)
vs_grid_df["exp_type"] = vs_grid_df.apply(lambda row: uuid_exp_type_dict[extended_to_normal_uuid(row["uuid"])], axis=1)

vs_df_means["exp_type"] = vs_df_means.apply(lambda row: uuid_exp_type_dict[extended_to_normal_uuid(row["uuid"])], axis=1)
vs_grid_df_means["exp_type"] = vs_grid_df_means.apply(lambda row: uuid_exp_type_dict[extended_to_normal_uuid(row["uuid"])], axis=1)

vs_df_sz["exp_type"] = vs_df_sz.apply(lambda row: uuid_exp_type_dict[extended_to_normal_uuid(row["uuid"])], axis=1)
vs_df_sz_means["exp_type"] = vs_df_sz_means.apply(lambda row: uuid_exp_type_dict[extended_to_normal_uuid(row["uuid"])], axis=1)


In [None]:
vs_df_sz[vs_df_sz["exp_type"]=="tmev"].mouse_id.unique()

### Add mm/min
1 µm/s = 60 µm/min = 0.06 mm/min

In [None]:
CONVERSION_FACTOR = 0.06

vs_grid_df["v_mmpmin"] = vs_grid_df["v_umps"] * CONVERSION_FACTOR
vs_grid_df_means["v_mmpmin"] = vs_grid_df_means["v_umps"] * CONVERSION_FACTOR
vs_df["v_mmpmin"] = vs_df["v_umps"] * CONVERSION_FACTOR
vs_df_means["v_mmpmin"] = vs_df_means["v_umps"] * CONVERSION_FACTOR

vs_df_sz["v_mmpmin"] = vs_df_sz["v_umps"] * CONVERSION_FACTOR

In [None]:
fig = plt.figure(figsize=(16,8))
plt.hist(vs_df[(vs_df["v_mmpmin"] < 100) & (vs_df["i_wave"] == 2)]["v_mmpmin"], bins=100)
plt.xlim((0,50))
plt.show()

In [None]:
# create dataset with outliers removed
vs_df_sz_outliers_removed = vs_df_sz.copy()
for i, g in vs_df_sz_outliers_removed.groupby("uuid"):
    count = g.size
    drop = int(count*0.05)  # drop lowest and highest 5%
    vs_df_sz_outliers_removed.drop(g["v_mmpmin"].nlargest(drop).index, inplace=True)
    vs_df_sz_outliers_removed.drop(g["v_mmpmin"].nsmallest(drop).index, inplace=True)

In [None]:
sz_means_per_session = vs_df_sz_outliers_removed.groupby(["exp_type", "mouse_id", "uuid"]).mean().reset_index() #vs_df_sz.groupby(["exp_type", "mouse_id", "uuid"]).mean().reset_index()

In [None]:
# todo: group by uuid and i_wave (pivot_table) and make boxplot with sem
means_per_session = vs_df.groupby(["exp_type","mouse_id", "uuid", "i_wave"]).mean().reset_index()

In [None]:
fig = plt.figure(figsize=(12,10))
plt.suptitle("SD speed")
sns.barplot(means_per_session, x="exp_type", y="v_mmpmin", errorbar="sd")
plt.tight_layout()
if save_figs:
    export_fpath_fig = os.path.join(output_folder, f"mean_sd_speed_by_exp_type_{get_datetime_for_fname()}{file_format}")
    plt.savefig(export_fpath_fig)
    print(f"Saved to {export_fpath_fig}")
plt.show()

In [None]:
fig = plt.figure(figsize=(12,10))
plt.suptitle("SD speed")
sns.barplot(means_per_session, x="i_wave", y="v_mmpmin", hue="exp_type", errorbar="sd")
plt.tight_layout()
if save_figs:
    export_fpath_fig = os.path.join(output_folder, f"mean_sd_speed_{get_datetime_for_fname()}{file_format}")
    plt.savefig(export_fpath_fig)
    print(f"Saved to {export_fpath_fig}")
plt.show()

In [None]:
means_per_session["speed_type"] = "SD"
sz_means_per_session["speed_type"] = "Sz"

In [None]:
df_mean_speeds = pd.concat([means_per_session[["exp_type", "mouse_id", "uuid", "speed_type", "i_wave", "v_umps", "v_mmpmin"]],
sz_means_per_session[["exp_type", "mouse_id", "uuid", "speed_type", "i_wave", "v_umps", "v_mmpmin"]]]) 


In [None]:
if True:
    export_fpath_df_mean_speeds = os.path.join(output_folder, f"mean_onset_speed_{get_datetime_for_fname()}.xlsx")
    df_mean_speeds.to_excel(export_fpath_df_mean_speeds, index=False)
    print(f"Saved to {export_fpath_df_mean_speeds}")

In [None]:
vs_df_sz

In [None]:
d = ddoc.getSessionFilesForUuid("4ae789df9809469b8668ff01a8cc91ee")
print(d.folder.iloc[0])
print(d.nd2.iloc[0])


In [None]:
fig = plt.figure(figsize=(12,10))
plt.suptitle("Sz speed")
sns.barplot(sz_means_per_session, x="exp_type", y="v_mmpmin", hue="mouse_id", errorbar="sd")
plt.tight_layout()
if save_figs:
    export_fpath_fig = os.path.join(output_folder, f"mean_sz_speed_{get_datetime_for_fname()}{file_format}")
    plt.savefig(export_fpath_fig)
    print(f"Saved to {export_fpath_fig}")
plt.show()

In [None]:
fig = plt.figure(figsize=(12,10))
plt.suptitle("Sz speed")
sns.barplot(sz_means_per_session, x="exp_type", y="v_mmpmin", errorbar="sd")
plt.tight_layout()
if save_figs:
    export_fpath_fig = os.path.join(output_folder, f"mean_sz_speed_by_exp_type_{get_datetime_for_fname()}{file_format}")
    plt.savefig(export_fpath_fig)
    print(f"Saved to {export_fpath_fig}")
plt.show()

In [None]:
fig = plt.figure(figsize=(16,8))
plt.hist(vs_df[(vs_df["v_mmpmin"] < 100)]["v_mmpmin"], bins=100)
plt.xlim((0,50))
plt.show()

In [None]:
means_per_session

In [None]:
vs_df

In [None]:
fig = plt.figure()
