In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import os
import pandas as pd

# Set a new working directory
new_path = '../'
os.chdir(new_path)

import numpy as np
import pandas as pd
from morphomics import utils
import plotly.express as px


In [None]:
# Load morphoframe
path_to_mf = "/home/thomas/Desktop/morphOMICs/v1_experiments/results/dim_reduced/Morphomics.PID_v1.pi_0,130_umap_reduced_data"
mf = utils.load_obj(name = path_to_mf)

In [None]:
print(mf.keys())
print(len(mf))

In [None]:
mf[['x', 'y']] = pd.DataFrame(mf['umap'].tolist(), index=mf.index)


In [None]:
import re

# Helper function to scale RGB values by a coefficient
def scale_rgb(rgb_str, coeff):
    # Extract RGB values using a regular expression
    rgb_values = re.findall(r'\d+', rgb_str)
    r, g, b = map(int, rgb_values)  # Convert strings to integers
    
    # Multiply by the coefficient and ensure values are within 0-255
    r = min(int(r + coeff), 255)
    g = min(int(g + coeff), 255)
    b = min(int(b + coeff), 255)
    
    # Return the updated RGB string
    return f'rgb({r}, {g}, {b})'

colors = {}
circle_colors = {}

for I in range(1, 6):
    coeff_c = (I)/5 * 100
    coeff_g = (I)/5 * 50
    # Update 'colors' dictionary with scaled RGB values
    colors.update({
        '1xKXA_4h-M-' + str(I): scale_rgb('rgb(0, 135, 0)', coeff_c),
        '1xKXA_4h-F-' + str(I): scale_rgb('rgb(255, 170, 0)', coeff_c),
        '1xSaline_4h-M-' + str(I): scale_rgb('rgb(30, 30, 30)', coeff_g),
        '1xSaline_4h-F-' + str(I): scale_rgb('rgb(211, 211, 211)', coeff_g),
    })

In [None]:
x_min = min(mf['x'])
x_max = max(mf['x'])
y_min = min(mf['y'])
y_max = max(mf['y'])
import statistics
med = statistics.median(mf['x'])

In [None]:
import plotly.graph_objects as go

mf_sorted = mf.sort_values('condition')

bin_edges = list(np.linspace(x_min, x_max, 3))
# bin_edges = [x_min, med, x_max]
# Create histogram traces for each condition
fig = go.Figure()

tot_hist, _ = np.histogram(mf_sorted['x'], bins=bin_edges)
hist_list = []
for condition in mf_sorted['condition'].unique():
    subset = mf_sorted[mf_sorted['condition'] == condition]
    # Calculate histogram counts and edges
    hist, _ = np.histogram(subset['x'], bins=bin_edges)
    # Normalize by dividing by the total count for this condition
    hist_normalized = hist / hist.sum() 
    hist_list.append(hist_normalized)
norm = sum(hist_list)
i = 0
for condition in mf_sorted['condition'].unique():
    hist_normalized = hist_list[i] / norm
    i+=1
    # Add normalized histogram trace
    fig.add_trace(go.Bar(
        x=bin_edges[:-1],  # Use bin edges for x-axis
        y=hist_normalized,  # Use normalized counts for y-axis
        name=condition,
        marker_color=colors.get(condition, 'gray'),
        opacity=0.75,
        # width=1.  # Adjust width if needed
    ))

# Update layout to add labels
fig.update_layout(
    barmode='stack',
    title='Normalized Histogram of UMAP 1 by Condition',
    xaxis_title='UMAP 1',
    yaxis_title='Proportion',
    xaxis=dict(
        tickvals=bin_edges,  # Set x-axis ticks to match bin edges
        ticktext=[f'{int(bin_edges[i])} to {int(bin_edges[i + 1])}' for i in range(len(bin_edges) - 1)]  # Bin labels
    )
)

# Adjust the layout size (in pixels)
fig.update_layout(
    width=1200,  # Increase the width
    height=800   # Increase the height
)

# Show the plot
fig.show()

# Save the figure as a PDF
fig.write_image("/home/thomas/Desktop/morphOMICs/v1_experiments/results/plot/v1_stack_umap.pdf", format="pdf")

In [None]:
representation = mf_sorted['condition'].value_counts()
# Convert the Series into a DataFrame and reset the index
df = representation.reset_index()


# Rename the columns to 'condition' and 'count'
df.columns = ['condition', 'count']

df['Model'] = df['condition'].str.split('-').str[0]

df['Sex'] = df['condition'].str.split('-').str[1:]
df['Sex'] = df['Sex'].apply(lambda str_l: '_'.join(str_l))
df = df.sort_values('condition')

# Separate the data into two categories
kxa_data = df[df['Model'] == '1xKXA_4h']
saline_data = df[df['Model'] == '1xSaline_4h']

In [None]:
# Create a stacked histogram
fig = go.Figure()
for sex in df['Sex'].unique():
    kxa_data_subset = kxa_data[kxa_data['Sex'] == sex]
    saline_data_subset = saline_data[saline_data['Sex'] == sex]
    # Add the bar for 1xKXA
    fig.add_trace(go.Bar(
        x=kxa_data_subset['Model'],
        y=kxa_data_subset['count'],
        name=sex,
        marker_color=colors.get(kxa_data_subset['condition'].iloc[0], 'gray'),
        opacity=0.75,
    ))

    # Add the bar for 1xSaline
    fig.add_trace(go.Bar(
        x=saline_data_subset['Model'],
        y=saline_data_subset['count'],
        name=sex,
        marker_color=colors.get(saline_data_subset['condition'].iloc[0], 'gray'),
        opacity=0.75,
    ))

# Update layout for stacked bars
fig.update_layout(
    barmode='stack',
    xaxis_title="Label",
    yaxis_title="count",
    width=1200,  # Adjust width
    height=800,  # Adjust height
    legend_title="Category"
)

# Save the figure as a PDF
fig.write_image("/home/thomas/Desktop/morphOMICs/v1_experiments/results/plot/v1_stack_count.pdf", format="pdf")

# Show the figure (optional)
fig.show()
