In [29]:
import os
import torch
import pandas as pd
from pathlib import Path
import numpy as np
import matplotlib
from itertools import combinations
from napatrackmater.Trackvector import (
    BROWNIAN_FEATURES
)
import matplotlib.pyplot as plt
import seaborn as sns

In [5]:
dataset_name = 'Sixth'
home_folder = '/home/debian/jz/'
channel = 'nuclei_'
tracking_directory = f'{home_folder}Mari_Data_Oneat/Mari_{dataset_name}_Dataset_Analysis/nuclei_membrane_tracking/'
data_frames_dir = os.path.join(tracking_directory, f'dataframes/')

device = 'cuda' if torch.cuda.is_available() else 'cpu'
tracklet_length = 25
normalized_dataframe = os.path.join(data_frames_dir , f'goblet_basal_dataframe_normalized_{channel}.csv')
print(f'reading data from {normalized_dataframe}')
tracks_dataframe = pd.read_csv(normalized_dataframe)
save_dir = os.path.join(tracking_directory, f'{channel}phasespaces')
Path(save_dir).mkdir(exist_ok=True)
deltat = 10
class_map_gbr = {
        0: "Basal",
        1: "Radial",
        2: "Goblet"
    }

reading data from /home/debian/jz/Mari_Data_Oneat/Mari_Sixth_Dataset_Analysis/nuclei_membrane_tracking/dataframes/goblet_basal_dataframe_normalized_nuclei_.csv


In [15]:

all_trackmate_ids = [trackid for trackid in tracks_dataframe['TrackMate Track ID'].unique()]
result_dict = {cell_type: {} for cell_type in class_map_gbr.values()}
unique_time_points = tracks_dataframe['t'].unique()

for time_point in unique_time_points:
    time_data = tracks_dataframe[tracks_dataframe['t'] == time_point]

    for cell_type in class_map_gbr.values():
        cell_type_data = time_data[time_data['Cell_Type'] == cell_type]
#
        if not cell_type_data.empty:
            if time_point not in result_dict[cell_type]:
                result_dict[cell_type][time_point] = {}

            for track_id in cell_type_data['Track ID'].unique():
                track_features = cell_type_data[cell_type_data['Track ID'] == track_id][BROWNIAN_FEATURES].to_numpy()

                feature_pairs = list(combinations(BROWNIAN_FEATURES, 2))

                for (feature_name1, feature_name2) in feature_pairs:
                    pair_key = f"{feature_name1}_vs_{feature_name2}"

                    pairwise_values = np.stack(
                        [track_features[:, BROWNIAN_FEATURES.index(feature_name1)],
                         track_features[:, BROWNIAN_FEATURES.index(feature_name2)]],
                        axis=1
                    )

                    if pair_key not in result_dict[cell_type][time_point]:
                        result_dict[cell_type][time_point][pair_key] = []

                    result_dict[cell_type][time_point][pair_key].append(pairwise_values)


In [38]:
def build_dataframe(result_dict):
    """Converts result_dict to a pandas DataFrame for plotting."""
    rows = []

    for cell_type, time_data in result_dict.items():
        for time_point, feature_dict in time_data.items():
            for feature_pair, data_lists in feature_dict.items():
                for pairwise_array in data_lists:
                    for feature1, feature2 in pairwise_array:
                        feature_name1, feature_name2 = feature_pair.split('_vs_')
                        rows.append({
                            'Cell Type': cell_type,
                            'Time Point': time_point,
                            'Feature Pair': feature_pair,
                            feature_name1: feature1,
                            feature_name2: feature2,
                        })

    df = pd.DataFrame(rows)
    return df





def plot_phasespace(df, title='phasespace'):
    """Plots the phase space using seaborn's kdeplot for all time points in one plot per feature pair, with a color bar."""
    
    cmap = plt.cm.viridis
    norm = matplotlib.colors.Normalize(vmin=df['Time Point'].min(), vmax=df['Time Point'].max())

    for cell_type in df['Cell Type'].unique():
        cell_type_df = df[df['Cell Type'] == cell_type]
        
        for feature_pair in cell_type_df['Feature Pair'].unique():
            pair_df = cell_type_df[cell_type_df['Feature Pair'] == feature_pair]
            print(f"Plotting for {cell_type} and feature pair {feature_pair}")

            actual_feature_name1, actual_feature_name2 = feature_pair.split('_vs_')

            fig, ax = plt.subplots(figsize=(8, 10))

            for time_point in sorted(pair_df['Time Point'].unique()):
                time_df = pair_df[pair_df['Time Point'] == time_point]
                
                color = cmap(norm(time_point))

                sns.kdeplot(
                    data=time_df,
                    x=actual_feature_name1,
                    y=actual_feature_name2,
                    label=f"Time {time_point}",
                    alpha=0.5,
                    ax=ax,
                    color=color
                )

            ax.set_xlabel(f"{actual_feature_name1}", fontsize=14)
            ax.set_ylabel(f"{actual_feature_name2}", fontsize=14)

            sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
            sm.set_array([]) 
            cbar = fig.colorbar(sm, ax=ax, orientation='vertical')
            cbar.set_label('Time Point', fontsize=14)

            ax.set_title(f"{title} - {cell_type}", fontsize=16)
            ax.legend(title="Time Points", loc="upper right", fontsize=10)

            plot_filename = os.path.join(save_dir, f"{title}_{cell_type}_{actual_feature_name1}_{actual_feature_name2}_{dataset_name}_phasespace.png")
            plt.savefig(plot_filename, dpi=300, bbox_inches='tight')
            plt.show()



            


In [17]:
feature_dataframe = build_dataframe(result_dict)


In [39]:

plot_phasespace(feature_dataframe)

Plotting for Basal and feature pair Radius_vs_Eccentricity_Comp_First
