In [None]:
import scanpy as sc
import scvelo as scv
import numpy as np

In [None]:
adata = sc.read_h5ad("../../outputs/pancreas/K11-1024-1encoderlr-2000epochs - best/adata/adata.h5ad")

In [None]:
ductal_cells = np.where(adata.obs["clusters"] == "Ductal")
deviation_u = np.log((((adata.layers["Mu"] - adata.layers["Mu"][ductal_cells].mean(0)))**2).sum(1))
deviation_s = np.log((((adata.layers["Ms"] - adata.layers["Ms"][ductal_cells].mean(0)))**2).sum(1))
total_deviation = deviation_s + deviation_u
min_val = np.min(total_deviation)
max_val = np.max(total_deviation)

adata.obs["total_deviation"] = (total_deviation - min_val) / (max_val - min_val)
sc.pl.umap(adata, color="total_deviation", color_map="gnuplot")

In [None]:
import numpy as np

def least_squares_slope(time_vector, quantity_vector):
    """
    Computes the slope of the least squares line for the given time and quantity vectors.
    
    Parameters:
    time_vector (numpy.ndarray): The input time vector.
    quantity_vector (numpy.ndarray): The input quantity vector.
    
    Returns:
    float: The slope of the least squares line.
    """
    n = len(time_vector)
    sum_x = np.sum(time_vector)
    sum_y = np.sum(quantity_vector)
    sum_xy = np.sum(time_vector * quantity_vector)
    sum_x_squared = np.sum(time_vector ** 2)
    
    numerator = n * sum_xy - sum_x * sum_y
    denominator = n * sum_x_squared - sum_x ** 2
    
    if denominator == 0:
        raise ValueError("Denominator is zero, cannot compute slope.")
    
    slope = numerator / denominator
    return slope

In [None]:
manifold = sc.read_h5ad("../../../dim_reduction/pancreas_nosmooth.h5ad")
adata.obs["z5_pseudotime"] = manifold.obs["z5_pseudotime"].copy()
adata.obs["z3_pseudotime"] = manifold.obs["z3_pseudotime"].copy()

In [None]:
import pandas as pd
adata.layers["slope_u"] = np.zeros(adata.shape)
adata.layers["slope_s"] = np.zeros(adata.shape)
for ctype in pd.unique(adata.obs["clusters"]):
    print(f"computing slopes for ctype: {ctype}..")
    ctype_obs = np.where(adata.obs["clusters"] == ctype)[0]
    for i,gene in enumerate(list(adata.var_names)):
        Mu = adata.layers["Mu"]
        Ms = adata.layers["Ms"]
        quantity_vector_u = Mu[ctype_obs,i]
        quantity_vector_s = Ms[ctype_obs,i]
        time_vector = adata.obs["z5_pseudotime"][ctype_obs]
        slope_u = least_squares_slope(time_vector, quantity_vector_u)
        slope_s = least_squares_slope(time_vector, quantity_vector_s)
        adata.layers["slope_u"][ctype_obs, i] = slope_u
        adata.layers["slope_s"][ctype_obs, i] = slope_s

        if f"slope_{gene}_u" not in adata.obs:
            adata.obs[f"slope_{gene}_u"] = 0
        if f"slope_{gene}_s" not in adata.obs:
            adata.obs[f"slope_{gene}_s"] = 0

        adata.obs[f"slope_{gene}_u"][ctype_obs] = slope_u
        adata.obs[f"slope_{gene}_s"][ctype_obs] = slope_s

In [None]:
sc.pl.umap(adata, color="z5_pseudotime")

In [None]:
sc.pp.neighbors(adata)
scv.tl.velocity_graph(adata, vkey="slope_s")
scv.tl.velocity_confidence(adata, vkey="slope_s")
scv.tl.velocity_pseudotime(adata, vkey="slope_s")
scv.pl.velocity_embedding_stream(adata, vkey="slope_s")
keys = ["velocity_confidence", "velocity_length"]
sc.pl.umap(adata, color=keys, color_map="coolwarm")
sc.pl.umap(adata, color="velocity_pseudotime", color_map="gnuplot")

In [None]:
import matplotlib.pyplot as plt
def plot_phase_plane(adata, gene_name, dataset, K, u_scale=.01, s_scale=0.01, alpha=0.5, head_width=0.02, head_length=0.03, length_includes_head=False, log=False,
                        norm_velocity=True, filter_cells=False, smooth_expr=True, show_plot=True, save_plot=True, save_path=".",
                        cell_type_key="clusters", vkey="velocity", vkey_u="velocity_u"):

    if smooth_expr:
        unspliced_expression = adata.layers["Mu"][:, adata.var_names.get_loc(gene_name)].flatten() 
        spliced_expression = adata.layers["Ms"][:, adata.var_names.get_loc(gene_name)].flatten() 
    else:
        unspliced_expression = adata.layers["unspliced"][:, adata.var_names.get_loc(gene_name)].flatten()
        spliced_expression = adata.layers["spliced"][:, adata.var_names.get_loc(gene_name)].flatten()

    # Normalize the expression data
    unspliced_expression_min, unspliced_expression_max = np.min(unspliced_expression), np.max(unspliced_expression)
    spliced_expression_min, spliced_expression_max = np.min(spliced_expression), np.max(spliced_expression)

    # Min-Max normalization
    unspliced_expression = (unspliced_expression - unspliced_expression_min) / (unspliced_expression_max - unspliced_expression_min)
    spliced_expression = (spliced_expression - spliced_expression_min) / (spliced_expression_max - spliced_expression_min)

    # Extract the velocity data
    unspliced_velocity = adata.layers[f'{vkey_u}'][:, adata.var_names.get_loc(gene_name)].flatten()
    spliced_velocity = adata.layers[f'{vkey}'][:, adata.var_names.get_loc(gene_name)].flatten()

    def custom_scale(data):
        max_abs_value = np.max(np.abs(data))  # Find the maximum absolute value
        scaled_data = data / max_abs_value  # Scale by the maximum absolute value
        return scaled_data

    if norm_velocity:
        unspliced_velocity = custom_scale(unspliced_velocity)
        spliced_velocity = custom_scale(spliced_velocity)


    # Apply any desired transformations (e.g., log) here
    if log:
        # Apply log transformation safely, ensuring no log(0)
        unspliced_velocity = np.log1p(unspliced_velocity)
        spliced_velocity = np.log1p(spliced_velocity)

    # Generate boolean masks for conditions and apply them
    if filter_cells:
        valid_idx = (unspliced_expression > 0) & (spliced_expression > 0)
    else:
        valid_idx = (unspliced_expression >= 0) & (spliced_expression >= 0)

    # Filter data based on valid_idx
    unspliced_expression_filtered = unspliced_expression[valid_idx]
    spliced_expression_filtered = spliced_expression[valid_idx]
    unspliced_velocity_filtered = unspliced_velocity[valid_idx]
    spliced_velocity_filtered = spliced_velocity[valid_idx]

    # Also filter cell type information to match the filtered expressions
    # First, get unique cell types and their corresponding colors
    unique_cell_types = adata.obs[cell_type_key].cat.categories
    celltype_colors = adata.uns[f"{cell_type_key}_colors"]
    
    # Create a mapping of cell type to its color
    celltype_to_color = dict(zip(unique_cell_types, celltype_colors))

    # Filter cell types from the data to get a list of colors for the filtered data points
    cell_types_filtered = adata.obs[cell_type_key][valid_idx]
    colors = cell_types_filtered.map(celltype_to_color).to_numpy()
    plt.figure(figsize=(9, 6.5), dpi=100)
  # Lower dpi here if the file is still too large    scatter = plt.scatter(unspliced_expression_filtered, spliced_expression_filtered, c=colors, alpha=0.6)

    """# Plot velocity vectors
    for i in range(len(unspliced_expression_filtered)):
        cell_type_index = np.where(unique_cell_types == cell_types_filtered[i])[0][0]
        arrow_color = celltype_to_color[cell_types_filtered[i]]  # Use the color corresponding to the cell type
        plt.arrow(
            unspliced_expression_filtered[i], spliced_expression_filtered[i], 
            unspliced_velocity_filtered[i] * u_scale, spliced_velocity_filtered[i] * s_scale, 
            color=arrow_color, alpha=alpha, head_width=head_width, head_length=head_length, length_includes_head=length_includes_head
        )"""

    # Plot velocity vectors
    for i in range(len(unspliced_expression_filtered)):
        cell_type_index = np.where(unique_cell_types == cell_types_filtered[i])[0][0]
        arrow_color = celltype_to_color[cell_types_filtered[i]]  # Use the color corresponding to the cell type
        plt.arrow(
            spliced_expression_filtered[i], unspliced_expression_filtered[i], 
            spliced_velocity_filtered[i] * s_scale, unspliced_velocity_filtered[i] * u_scale, 
            color=arrow_color, alpha=alpha, head_width=head_width, head_length=head_length, length_includes_head=length_includes_head
        )

    plt.ylabel(f'Normalized Unspliced Expression of {gene_name}')
    plt.xlabel(f'Normalized Spliced Expression of {gene_name}')
    plt.title(f'Expression and Velocity of {gene_name} by Cell Type')

    # Create a legend
    patches = [plt.Line2D([0], [0], marker='o', color='w', markerfacecolor=celltype_to_color[celltype], markersize=10, label=celltype) 
            for celltype in unique_cell_types]
    plt.legend(handles=patches, title="Cell Type", bbox_to_anchor=(1.05, 1), loc='upper left')

    
    plt.show()

In [None]:
plot_phase_plane(adata, "Foxo3", "pancreas", 10, vkey="velocity", vkey_u="velocity_u")
plot_phase_plane(adata, "Foxo3", "pancreas", 10, vkey="slope_s", vkey_u="slope_u")

In [None]:
sc.pl.violin(adata, "Abcc8", groupby="clusters")

In [None]:
sc.pl.scatter(adata, x="z5_pseudotime", y="Abcc8", color="clusters")