# Phase 1: Cell Type Proportions by Region x Age Stacked Bar Graph
Code written and conceptualized by Monica E. Mesecar. Support for advanced aesthetics with Perplexity AI.

In [None]:
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
from matplotlib import colors
from matplotlib import rcParams
matplotlib.rcParams['pdf.fonttype'] = 42
from matplotlib.legend_handler import HandlerTuple

## Read in Metadata & Explore 

In [None]:
#Check current working directory (Bash)
!pwd

In [None]:
import os

# Get the current working directory
cwd = os.getcwd()
print("Current working directory:", cwd)

# Change the working directory
os.chdir("/gpfs/gsfs12/users/mesecarme/Phase1/")

# Verify the change
new_cwd = os.getcwd()
print("New working directory:", new_cwd)

In [None]:
#Read in metadata file and name object 
p1_metadata=pd.read_csv("Phase1_Metadata_Cleaned.csv")

In [None]:
#Preview dataframe
p1_metadata.head(10)

In [None]:
p1_metadata = p1_metadata[~((p1_metadata['broad_celltype'] == 'Indeterminate'))]

In [None]:
#Name unnamed column and set index
p1_metadata.rename(columns = {'Unnamed: 0':'Cell_barcode'}, inplace = True)
p1_metadata.set_index('Cell_barcode', inplace=True)
p1_metadata.head(3)

## Create Dictionaries of Metadata Subsets

In [None]:
#Create lists of subsets of interest
p1_region_list=p1_metadata.Brain_region.unique().tolist()
print(p1_region_list)
p1_age_list=p1_metadata.Age_group.unique().tolist()
print(p1_age_list)
p1_sex_list=p1_metadata.Sex.unique().tolist()
print(p1_sex_list)
p1_broad_list=p1_metadata.broad_celltype.unique().tolist()
print(p1_broad_list)

In [None]:
#Create df for each unique age category
p1_age_dict = {age: p1_metadata.loc[p1_metadata['Age_group'] == age] for age in p1_age_list}
p1_young_df=p1_age_dict['Young']
p1_old_df=p1_age_dict['Aged']

In [None]:
#Check df shape
p1_young_df.shape

In [None]:
#Check age filter 
p1_young_df.Age_group.unique()

In [None]:
#Check shape
p1_old_df.shape

In [None]:
#Check age filter 
p1_old_df.Age_group.unique()

In [None]:
#Check df 
p1_young_df

In [None]:
#Group df to show region on vertical axis and cell type on top
#Will count number of each cell type across regions
grouped_young = p1_young_df.groupby(['Brain_region', 'broad_celltype']).size().unstack(fill_value=0)

#Convert to percentage of cells within region
df_percentages_young = grouped_young.div(grouped_young.sum(axis=1), axis=0) * 100

In [None]:
#Show raw counts df
grouped_young

In [None]:
#Show percentatges df
df_percentages_young

In [None]:
#Repeat above for aged group
grouped_old = p1_old_df.groupby(['Brain_region', 'broad_celltype']).size().unstack(fill_value=0)
df_percentages_old = grouped_old.div(grouped_old.sum(axis=1), axis=0) * 100

In [None]:
grouped_old

In [None]:
df_percentages_old

In [None]:
# Define a consistent order for cell types
cell_type_order = ['InN', 'ExN', 'SPN', 'Microglia', 'Astrocyte', 'Oligodendrocyte', 'OPC', 'Mural', 'Endothelial', 'Ependymal'] #,'Indeterminate']

# Reorder the columns
df_percentages_young = df_percentages_young[cell_type_order]
df_percentages_old = df_percentages_old[cell_type_order]

In [None]:
#Define dark color palette for cell types
custom_palette_cells_dark = {
    'InN': '#3FA291',  # Teal Blue
    'ExN': '#D9C300', #Bright Yellow
    'SPN': '#514785', #Purple/Grey
    'Microglia': '#AD5E03',#Vermillion
    'Astrocyte': '#2B5B7D', #Blue/Grey
    'Oligodendrocyte': '#60861D', #Key Lime Green
    'OPC': '#BB3A8A', #Baby Pink
    'Mural': '#B21706', #Creamsicle Orange 
    'Endothelial': '#683668', #Violet
    'Ependymal': '#B8A200', #Mint Green
    'Indeterminate': '#7f7f7f' #Grey
}

In [None]:
#Define light color palette for cell types
custom_palette_cells_bright = {
    'InN': '#6FC7B8',  # Teal Blue
    'ExN': '#ffed6f', #Bright Yellow
    'SPN': '#7E74B4', #Purple/Grey
    'Microglia': '#FC921A',#Vermillion
    'Astrocyte': '#4D91C2', #Blue/Grey
    'Oligodendrocyte': '#9AD338', #Key Lime Green
    'OPC': '#D371AE', #Baby Pink
    'Mural': '#F73823', #Creamsicle Orange 
    'Endothelial': '#AB5CAB', #Violet
    'Ependymal': '#E0C600', #Mint Green
    'Indeterminate': '#D3D3D3' #Grey
}

In [None]:
#Plot aesthetics achieved with assistance from Perplexity AI LLM

# Create the plot dimensions
fig, ax = plt.subplots(figsize=(10, 10))

# Plot young data
df_percentages_young.plot(kind='barh', stacked=True, ax=ax, width=0.35, position=0, color=custom_palette_cells_bright)

# Plot aged data
df_percentages_old.plot(kind='barh', stacked=True, ax=ax, width=0.35, position=1, color=custom_palette_cells_dark)

# Customize the plot
ax.set_ylabel('Brain Regions', fontweight='bold')
ax.set_xlabel('Percentage of Cells', fontweight='bold')
ax.set_title('Proportion of Cell Types by Brain Region and Age Category',fontweight='bold')

# Function to add age group labels at the end of bars
def add_age_labels(ax, df, age, position):
    for i in range(len(df)):
        ax.text(1.01, i + position, age, va='center', ha='left', fontweight='bold', transform=ax.get_yaxis_transform())

# Add age labels
add_age_labels(ax, df_percentages_young, 'Young', 0.15)
add_age_labels(ax, df_percentages_old, 'Aged', -0.15)

# Get the current handles and labels
handles, labels = ax.get_legend_handles_labels()

# Create a dictionary to group handles by label
label_dict = {}
for handle, label in zip(handles, labels):
    if label not in label_dict:
        label_dict[label] = []
    label_dict[label].append(handle)

# Create new handles and labels for the legend
new_handles = [tuple(handles) if len(handles) > 1 else handles[0] for handles in label_dict.values()]
new_labels = list(label_dict.keys())

# Create the legend with grouped labels
ax.legend(new_handles, new_labels, 
          handler_map={tuple: HandlerTuple(ndivide=None)},
          title='Cell Types', 
          bbox_to_anchor=(1.1, 1), 
          loc='upper left')

# Adjust y-axis limits to add breathing room at the top
y_min, y_max = ax.get_ylim()
ax.set_ylim(y_min, y_max + (y_max - y_min) * 0.07)

# Adjust layout
plt.tight_layout()
plt.subplots_adjust(right=0.8)  # Make room for the legend

#plt.savefig('Updated_Proportions_Plot_bold_NOIndet_FINAL_adj.pdf', bbox_inches='tight')

plt.show()