In [1]:
# Following script combines the annotation data with the time series data
path = r'C:\Users\Grover\Documents\GitHub\EZLabel\true_annotations'
dict_path = r'Z:\mfk\basty-projects\tmp_results\predictions\ProboscisPumping\bouts_dict.pkl'

import pandas as pd
import os
import glob


def process_row(row, dictionary, N):
    df_dict = dictionary[row['name']]
    df_dict_filtered = df_dict.drop(['start_index', 'stop_index', 'region'], axis=1)

    trial_id = int(row['trial_id'])
    peak_index = row['peak_index']

    # New dictionary to store sliced data with column names
    sliced_data_dict = {}

    for col in df_dict_filtered.columns:
        if peak_index-N >= 0 and peak_index+N <= len(df_dict_filtered.loc[trial_id, col]):
            start = max(0, peak_index - N)
            end = min(len(df_dict_filtered.loc[trial_id, col]), peak_index + N)
            sliced_data_dict[col] = df_dict_filtered.loc[trial_id, col][start:end]

    return sliced_data_dict


In [2]:

pkl_files = glob.glob(os.path.join(path, '*.pkl'))
ts_dict = pd.read_pickle(dict_path)

df_list = []  # A list to store each DataFrame


for file in pkl_files:
    data = pd.read_pickle(file)
    df = pd.DataFrame(data)
    df['name'] = os.path.splitext(os.path.basename(file))[0]
    df_list.append(df)  # Append the DataFrame to the list

# Concatenate all the DataFrames in the list into a single DataFrame
annotations = pd.concat(df_list, ignore_index=True)

# Rename the columns to make it more intuitive
annotations.rename(columns = {'index':'peak_index','column':'trial_id'},inplace=True)

In [3]:
annotations

Unnamed: 0,peak_index,value,trial_id,name
0,534,1014.361120,0,Fly05182022_5d
1,694,1017.733769,0,Fly05182022_5d
2,903,1027.228163,0,Fly05182022_5d
3,1207,1021.721591,0,Fly05182022_5d
4,1623,1023.545935,0,Fly05182022_5d
...,...,...,...,...
8486,835,387.119720,22,Fly08032022_6d_SD_B
8487,905,386.726096,22,Fly08032022_6d_SD_B
8488,966,389.234434,22,Fly08032022_6d_SD_B
8489,1041,504.997931,22,Fly08032022_6d_SD_B


In [None]:
annotations

In [None]:
# Create an empty DataFrame to store all the processed rows
processed_data_df = pd.DataFrame()

# Initialize a counter for slice
slice_counter = 0

# Loop through each row in annotations
for i, row in annotations.iterrows():
    processed_row = process_row(row, ts_dict, 30)

    # Create a DataFrame for this row
    row_df = pd.DataFrame(processed_row)

    # Add 'slice_id' to the DataFrame
    row_df['slice_id'] = slice_counter

    # Set a multi-index using 'name', 'slice_id' and the existing index of row_df
    row_df.index = pd.MultiIndex.from_tuples([(row['name'], slice_counter, i) for i in row_df.index],
                                             names=['name', 'slice_id', 'time'])

    # Append it to processed_data_df
    processed_data_df = pd.concat([processed_data_df, row_df])

    # Increment the slice_counter
    slice_counter += 1


In [None]:
import numpy as np
# Create a dictionary where key is (name, slice_id) and value is the sub-DataFrame
df_dict = dict(tuple(processed_data_df.groupby(level=['name', 'slice_id'])))

# Initialize an empty list to store each 2D array
array_list = []

# Loop over the dictionary
for key in df_dict:
    # Convert each DataFrame to a 2D numpy array and append to list
    array_list.append(df_dict[key].values)

# Convert list of arrays to a 3D numpy array
np_array = np.stack(array_list)


In [None]:
output_path = r'C:\Users\Grover\Documents\GitHub\EZLabel'

processed_data_df.to_pickle(os.path.join(output_path,'false_peak_annotations.pkl'))

In [None]:
np.save(os.path.join(output_path,'false_peak_annotations.npy'),np_array)

In [4]:
output_path = r'C:\Users\Grover\Documents\GitHub\EZLabel\true_annotations\output'
annotations.to_pickle(os.path.join(output_path,'true_annotations.pkl'))

In [None]:
annotations

In [None]:
import os
import pandas as pd

# Fetch all .mp4 files from the directory
directory = r'Y:\DeepSleepPaperData\Annotated\PredictedVideos\Pumping'
file_names = [f for f in os.listdir(directory) if f.endswith('.mp4')]

# Process each filename
data = []

for file in file_names:
    parts = file.split("_")

    index_position = parts.index('index')
    name = "_".join(parts[:index_position])
    trial_id = int(parts[index_position + 1])
    start = int(parts[parts.index('start') + 1])

    data.append([name, trial_id, start])

# Construct the DataFrame
df = pd.DataFrame(data, columns=["name", "trial_id", "start"])

print(df)


In [None]:
print(annotations)

In [None]:
annotations['trial_id'] = annotations['trial_id'].astype('int64')
df['trial_id'] = df['trial_id'].astype('int64')


# Merging dataframes on 'name' and 'trial_id'
merged_df = annotations.merge(df[['name', 'trial_id', 'start']], on=['name', 'trial_id'], how='left')

print(merged_df)


In [None]:
merged_df['pump_pos'] = merged_df['peak_index'] + merged_df['start']

In [None]:
merged_df

In [None]:
merged_df.to_csv('merged_annot.csv',index=False)

In [None]:
print(merged_df)

In [None]:
import pandas as pd
expt_info_df_path = r'Z:\mfk\basty-projects\expt_info_df.pkl'
expt_info_df = pd.read_pickle(expt_info_df_path)

In [None]:
expt_info_df

In [None]:

def generate_tick_data(FPS=30, sd=False):

    if sd == False:
        xticks = np.arange(
            start=0, stop=FPS * 60 * 60 * 16 + 1, step=FPS * 60 * 60 * 2
        )
        ZT_ticks = xticks
        ZT_ticklabels = [
            "ZT" + str((tick + 10) % 24) for tick in range(0, len(xticks) * 2, 2)
        ]
    else:
        xticks = np.arange(
            start=0, stop=FPS * 60 * 60 * 6 + 1, step=FPS * 60 * 60 * 1
        )
        ZT_ticks = xticks
        ZT_ticklabels = [
            "ZT" + str(tick) for tick in range(0, len(xticks) * 1, 1)
        ]

    return ZT_ticks, ZT_ticklabels


In [None]:
import matplotlib.pyplot as plt
import numpy as np
# Create a dictionary to map names to SD values from expt_info_df
name_to_sd = dict(zip(expt_info_df['ExptNames'], expt_info_df['SD']))

# Split df into two based on SD value
df_sd_true = df[df['name'].map(name_to_sd)]
df_sd_false = df[~df['name'].map(name_to_sd)]

# Initialize subplots
fig, axes = plt.subplots(nrows=2, figsize=(10, len(df['name'].unique())))

# Function to plot given a subset of df
def plot_subset(subset_df, ax, color):
    unique_names = subset_df['name'].unique()
    for idx, name in enumerate(unique_names):
        sub_subset_df = subset_df[subset_df['name'] == name]
        for _, row in sub_subset_df.iterrows():
            pump_pos = row['pump_pos']
            ax.barh(idx, 60, left=pump_pos - 30, color=color, edgecolor='none')
    ax.set_yticks(range(len(unique_names)))
    ax.set_yticklabels(unique_names, rotation=0)  # Consider rotating y-tick labels if they overlap

# Plot
plot_subset(df_sd_true, axes[0], '#377eb8')
plot_subset(df_sd_false, axes[1], '#d62728')

ZT_ticks, ZT_ticklabels = generate_tick_data(30, sd=True)
axes[0].set_xticks(ZT_ticks)
axes[0].set_xticklabels(ZT_ticklabels)  # Set x-tick labels for SD = True

ZT_ticks, ZT_ticklabels = generate_tick_data(30, sd=False)
axes[1].set_xticks(ZT_ticks)
axes[1].set_xticklabels(ZT_ticklabels)  # Set x-tick labels for SD = False

axes[0].set_title('SD = True')
axes[1].set_title('SD = False')

plt.tight_layout()
plt.savefig('Compact_Pump_Positions.pdf')
plt.show()



In [None]:
import matplotlib.pyplot as plt

# Create a dictionary to map names to SD values from expt_info_df
name_to_sd = dict(zip(expt_info_df['ExptNames'], expt_info_df['SD']))

# Helper function to plot given a subset of df
def plot_data(subset_df, color, sd):
    fig, ax = plt.subplots(figsize=(10, len(subset_df['name'].unique())))
    unique_names = subset_df['name'].unique()

    for idx, name in enumerate(unique_names):
        sub_subset_df = subset_df[subset_df['name'] == name]
        for _, row in sub_subset_df.iterrows():
            pump_pos = row['pump_pos']
            ax.barh(idx, 60, left=pump_pos - 30, color=color, edgecolor='none')

    ax.set_yticks(range(len(unique_names)))
    ax.set_yticklabels(unique_names)

    ZT_ticks, ZT_ticklabels = generate_tick_data(30, sd=sd)
    ax.set_xticks(ZT_ticks)
    ax.set_xticklabels(ZT_ticklabels)

    title = 'SD = True' if sd else 'SD = False'
    ax.set_title(title)

    plt.tight_layout()
    plt.savefig(f'Pump_Positions_{title}.pdf')
    plt.show()

# Function to plot data for SD = True
def plot_sd_true():
    df_sd_true = df[df['name'].map(name_to_sd)]
    plot_data(df_sd_true, '#377eb8', sd=True)


# Function to plot data for SD = False
def plot_sd_false():
    df_sd_false = df[~df['name'].map(name_to_sd)]
    plot_data(df_sd_false, '#d62728', sd=False)


In [None]:
plot_sd_true()

In [None]:
import matplotlib.pyplot as plt

# Extract pump_pos values for SD=True and SD=False
pump_pos_sd_true = df[df['name'].map(name_to_sd)]['pump_pos']
pump_pos_sd_false = df[~df['name'].map(name_to_sd)]['pump_pos']

# Initialize a figure with two subplots
fig, axes = plt.subplots(nrows=2, figsize=(10, 10))

# Plot histogram for SD=True
axes[0].hist(pump_pos_sd_true, bins=12, color='#377eb8', edgecolor='black')
axes[0].set_title('Histogram of pump_pos (SD=True)')
axes[0].set_xlabel('pump_pos values')
axes[0].set_ylabel('Frequency')
ZT_ticks, ZT_ticklabels = generate_tick_data(30, sd=True)
axes[0].set_xticks(ZT_ticks)
axes[0].set_xticklabels(ZT_ticklabels)

# Plot histogram for SD=False
axes[1].hist(pump_pos_sd_false, bins=32, color='#d62728', edgecolor='black')
axes[1].set_title('Histogram of pump_pos (SD=False)')
axes[1].set_xlabel('pump_pos values')
axes[1].set_ylabel('Frequency')
ZT_ticks, ZT_ticklabels = generate_tick_data(30, sd=False)
axes[1].set_xticks(ZT_ticks)
axes[1].set_xticklabels(ZT_ticklabels)

plt.tight_layout()
plt.savefig('Pump_Positions_Histograms.pdf')
plt.show()


In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Function to compute histograms for each group (name)
def compute_histograms(grouped_data, bins):
    histograms = {}
    for name, group in grouped_data:
        counts, _ = np.histogram(group['pump_pos'], bins=bins)
        histograms[name] = counts
    return histograms

FPS = 30
# Define bin edges
bins_sd_true = np.linspace(0, FPS * 60 * 60 * 6, 13) # 12 bins
bins_sd_false = np.linspace(0, FPS * 60 * 60 * 16, 33) # 32 bins

# Group data by 'name' and 'SD' condition
grouped_sd_true = df[df['name'].map(name_to_sd)].groupby('name')
grouped_sd_false = df[~df['name'].map(name_to_sd)].groupby('name')

# Compute histograms for each group
histograms_sd_true = compute_histograms(grouped_sd_true, bins_sd_true)
histograms_sd_false = compute_histograms(grouped_sd_false, bins_sd_false)

# Plot histograms (Here, I'm plotting aggregated histograms for demonstration)
fig, axes = plt.subplots(nrows=2, figsize=(10, 10))

axes[0].bar(bins_sd_true[:-1], np.nanmean(list(histograms_sd_true.values()), axis=0),
            width=np.diff(bins_sd_true), align="edge", color='#377eb8', edgecolor='black')
axes[0].set_title('Aggregated Histogram of pump_pos (SD=True)')
axes[0].set_xlabel('pump_pos values')
axes[0].set_ylabel('Frequency')
axes[0].set_xticks(bins_sd_true)
ZT_ticks, ZT_ticklabels = generate_tick_data(30, sd=True)
axes[0].set_xticks(ZT_ticks)
axes[0].set_xticklabels(ZT_ticklabels)

axes[1].bar(bins_sd_false[:-1], np.nanmean(list(histograms_sd_false.values()), axis=0),
            width=np.diff(bins_sd_false), align="edge", color='#d62728', edgecolor='black')
axes[1].set_title('Aggregated Histogram of pump_pos (SD=False)')
axes[1].set_xlabel('pump_pos values')
axes[1].set_ylabel('Frequency')
axes[1].set_xticks(bins_sd_false)
ZT_ticks, ZT_ticklabels = generate_tick_data(30, sd=False)
axes[1].set_xticks(ZT_ticks)
axes[1].set_xticklabels(ZT_ticklabels)

plt.tight_layout()
plt.savefig('Aggregated_Pump_Positions_Histograms.pdf')
plt.show()


In [None]:
name_to_sd

In [None]:
num_unique_names_sd_true = len(grouped_sd_true.groups)
num_unique_names_sd_false = len(grouped_sd_false.groups)

In [None]:
num_unique_names_sd_false

In [None]:
num_unique_names_sd_true