In [400]:
import json
import pickle
import numpy as np
import pandas as pd
import plotly.express as px
from pvd_io import *

### Load Pairwise Segment Data

In [401]:
data_dir = 'pvd_analysis/'
min_file_size = 1e3  # 1 kb
datasets, sessions, files = scan_directories(data_dir, min_file_size, filetype='voxels.pkl')
print(f"Located {len(files)} matched segment lists.")

Located 47 matched segment lists.


In [402]:
# Load Pickled lists of change matrices
matched_segments_voxels = []
matched_segments_spline = []

for ii, session in enumerate(sessions):

    file_path_vx = f"{data_dir}/{datasets[ii]}/{session}/length_change_voxels.pkl"
    file_path_sp = f"{data_dir}/{datasets[ii]}/{session}/length_change_spline.pkl"

    with open(file_path_vx, 'rb') as file:
        session_segments_vx = pickle.load(file)
    with open(file_path_sp, 'rb') as file:
        session_segments_sp = pickle.load(file)

    matched_segments_voxels.append(session_segments_vx)
    matched_segments_spline.append(session_segments_sp)

### Create Dataframe

In [403]:
# Take all sessions and extract pairwise changes to a pandas df
seg_changes = []
seg_dataset = []
seg_session = []
seg_timept = []
seg_id = []

# Calculate pairwise changes and populate related lists for columns
for ii, matched_segments in enumerate(matched_segments_spline):
    for kk, segment in enumerate(matched_segments):
        for vv in range(segment.shape[0]-1):
            change = segment[vv+1, vv]
            seg_changes.append(change)
            seg_dataset.append(datasets[ii])
            seg_session.append(sessions[ii])
            seg_timept.append(vv+1)
            seg_id.append(kk)

# Create pandas dataframe from lists
seg_change_df = pd.DataFrame({'pairwise_change': seg_changes, 'timepoint': seg_timept, 'segment_id': seg_id, 'dataset': seg_dataset, 'session': seg_session})

# Remove underscores from session directory names, if needed.
def remove_trailing_underscore(value):
    if isinstance(value, str) and value.endswith('_'):
        return value[:-1]
    return value

seg_change_df = seg_change_df.map(remove_trailing_underscore)

# Load conditions key and map to session values
with open('session_conditions.json', 'rb') as f:
    session_conditions = json.load(f)

seg_change_df['condition'] = seg_change_df['session'].map(session_conditions)

In [404]:
# Preview the dataframe
seg_change_df.loc[seg_change_df['session'] == 'expDS4_35']

Unnamed: 0,pairwise_change,timepoint,segment_id,dataset,session,condition
3024,-3.405786,1,0,DataSet04,expDS4_35,Control
3025,0.461233,2,0,DataSet04,expDS4_35,Control
3026,-5.039419,3,0,DataSet04,expDS4_35,Control
3027,0.270993,1,1,DataSet04,expDS4_35,Control
3028,-2.298613,2,1,DataSet04,expDS4_35,Control
...,...,...,...,...,...,...
3115,8.442451,2,30,DataSet04,expDS4_35,Control
3116,-6.723649,3,30,DataSet04,expDS4_35,Control
3117,-4.771790,1,31,DataSet04,expDS4_35,Control
3118,-1.790498,2,31,DataSet04,expDS4_35,Control


In [406]:
#Aggressively remove outliers as a test
#seg_change_df = seg_change_df[(seg_change_df['pairwise_change'] < 20) & (seg_change_df['pairwise_change'] > -20)]

In [407]:
# Identify some outliers for visual inspection
outlier_df = seg_change_df[(seg_change_df['pairwise_change'] > 30) | (seg_change_df['pairwise_change'] < -30)]
outlier_df

Unnamed: 0,pairwise_change,timepoint,segment_id,dataset,session,condition
64,34.723559,2,12,DataSet00,exp240104_00_01,Control
170,-33.586650,3,21,DataSet00,exp240129_01_01,Control
172,-40.421515,2,22,DataSet00,exp240129_01_01,Control
196,34.521929,2,30,DataSet00,exp240129_01_01,Control
197,-32.631026,3,30,DataSet00,exp240129_01_01,Control
...,...,...,...,...,...,...
3113,-64.532632,3,29,DataSet04,expDS4_35,Control
3119,-40.807530,3,31,DataSet04,expDS4_35,Control
3120,-31.516089,1,0,DataSet04,expDS4_13,DOI
3129,-41.038492,1,3,DataSet04,expDS4_13,DOI


In [424]:
# Plot absolutely everything
fig = px.histogram(seg_change_df, x='pairwise_change', nbins=1000, histnorm='probability density')

# Get the mean
mean_value = np.mean(seg_change_df['pairwise_change'])
std_value = np.std(seg_change_df['pairwise_change'])

# Add a vertical line for the mean
fig.add_vline(x=mean_value, line_width=1, line_dash="dash", line_color="black", annotation_text=f"{mean_value:.2f}", annotation_position="top")
# Add a vertical line for std
fig.add_vline(x=std_value*3, line_width=1, line_dash="dash", line_color="red", annotation_text=f"{std_value*3:.2f}", annotation_position="top")
fig.add_vline(x=std_value*-3, line_width=1, line_dash="dash", line_color="red", annotation_text=f"{std_value*3:.2f}", annotation_position="top")

fig.show()

In [436]:
def pairwise_change_histogram(dataset="", condition="", session=False, session_id="", nbins=None, pdf=True):
    # Filter dataframe
    if not session:
        df = seg_change_df[(seg_change_df['condition'] == condition ) & (seg_change_df['dataset'] == dataset)]
    else:
        df = seg_change_df[seg_change_df['session'] == session_id]
        condition = df['condition'].iloc[0]
    # Make histogram
    if pdf:
        fig = px.histogram(df, x='pairwise_change', histnorm='probability density', nbins=nbins)
    else:
        fig = px.histogram(df, x='pairwise_change')
    # Get the mean
    mean_value = np.mean(df['pairwise_change'])
    std_value = np.std(df['pairwise_change'])
    # Add a vertical line for the mean
    fig.add_vline(x=mean_value, line_width=1, line_dash="dash", line_color="black", annotation_text=f"{mean_value:.2f}", annotation_position="top")
    # Add a vertical lines for 3 sigma
    multipliers = [3,-3]
    for mult in multipliers:
        fig.add_vline(x=std_value*mult, line_width=1, line_dash="dash", line_color="red", annotation_text=f"", annotation_position="top")

    # Get sample count
    count = len(df)
    fig.update_layout(title_text=f"{dataset}<br>Condition: <b>{condition}</b>   Count: <b>{count}</b>")

    fig.show()

In [420]:
dataset = 'DataSet00'
pairwise_change_histogram(dataset=dataset, condition='Control')
pairwise_change_histogram(dataset=dataset, condition='DOI')

In [418]:
dataset = 'DataSet01'
pairwise_change_histogram(dataset=dataset, condition='Control')
pairwise_change_histogram(dataset=dataset, condition='DOI')

In [412]:
dataset = 'DataSet04'
pairwise_change_histogram(dataset=dataset, condition='Control')
pairwise_change_histogram(dataset=dataset, condition='DOI')

### T-Tests

In [413]:
from scipy import stats

dataset = 'DataSet00'
control_data = seg_change_df[(seg_change_df['condition'] == 'Control' ) & (seg_change_df['dataset'] == dataset)]
control_data = control_data['pairwise_change']
doi_data = seg_change_df[(seg_change_df['condition'] == 'DOI' ) & (seg_change_df['dataset'] == dataset)]
doi_data = doi_data['pairwise_change']

# Perform Levene's test
statistic, p_value = stats.levene(control_data, doi_data)
print(f"Levene's test statistic: {statistic}")
print(f"p-value: {p_value}")

# Interpret the results
alpha = 0.05  # common significance level
if p_value > alpha:
    print("Fail to reject the null hypothesis. The variances are likely equal.")
    equal_var = True
else:
    print("Reject the null hypothesis. The variances are likely not equal.")
    equal_var = False

Levene's test statistic: 0.10730569924623767
p-value: 0.7433119227224814
Fail to reject the null hypothesis. The variances are likely equal.


In [414]:
t_statistic, p_value = stats.ttest_ind(doi_data, control_data, equal_var=equal_var)

if not equal_var:
    print(f"Welch's t-statistic: {t_statistic}")
else:
    print(f"t-statistic: {t_statistic}")
    
print(f"P-value: {p_value}")

t-statistic: 2.028997474888454
P-value: 0.04276436422901894


### Single Session Histogram

In [442]:
session = 'exp240202_01_E'
seg_change_df.loc[seg_change_df['session'] == session].head()

Unnamed: 0,pairwise_change,timepoint,segment_id,dataset,session,condition
867,-4.199477,1,0,DataSet01,exp240202_01_E,DOI
868,-6.931292,2,0,DataSet01,exp240202_01_E,DOI
869,-3.097125,3,0,DataSet01,exp240202_01_E,DOI
870,5.815466,1,1,DataSet01,exp240202_01_E,DOI
871,-8.905253,2,1,DataSet01,exp240202_01_E,DOI


In [443]:
pairwise_change_histogram(session=True, session_id=session)

### Generate Histograms for each session