# Donor and Tissue Sample Demographics Code: Supplemental Figure 1
Visualization conceptualized by Monica Mesecar. 
AI coding support for aesthetics by Perplexity. No proprietary information or PHI was entered into the LLM. 

# Phase 1 Demographics Charts

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib as mpl
from matplotlib import colors
from matplotlib import rcParams
mpl.rcParams['pdf.fonttype'] = 42
mpl.rcParams['ps.fonttype'] = 42
import textwrap

In [None]:
donor_data_df=pd.read_csv("P1_Donor_Data_Updated.csv")

In [None]:
#display(donor_data_df)

## Donor Info

### Sex

In [None]:
# Select Age Category Column (Categorical; Order Matters)
donor_data_df['Age_Cat'] = pd.Categorical(donor_data_df['Age_Cat'], categories=['Young', 'Old'], ordered=True)

# Create a cross-tabulation of the data with Sex
cross_tab = pd.crosstab(donor_data_df['Age_Cat'], donor_data_df['Sex'])

# Prepare data for the plot
group_names = cross_tab.index.tolist()
group_size = cross_tab.sum(axis=1).tolist()
subgroup_names = [f"{sex}" for age in cross_tab.index for sex in cross_tab.columns]
subgroup_size = cross_tab.values.flatten()

# Define custom colors using hex codes
young_color = '#445FB1'  # Blue for Young
old_color = '#FF9A5C'    # Orange for Old

# Define shades for inner pie chart
young_shades = ['#F2D57E', '#BC97D8']
old_shades = ['#EBBD33', '#965CC1']  

# Create the figure and axis
fig, ax = plt.subplots(figsize=(12, 10))
ax.axis('equal')

#Function to wrap text labels to pie sections 
def wrap_labels(labels, width=10):
    return ['\n'.join(textwrap.wrap(l, width)) for l in labels]

# First Ring (outside) - Age Category
group_labels = [f"{name}\n({size})" for name, size in zip(group_names, group_size)]
wrapped_group_labels = wrap_labels(group_labels, width=15)
mypie, _ = ax.pie(group_size, radius=1.3, labels=wrapped_group_labels, colors=[young_color, old_color], labeldistance=0.865)
plt.setp(mypie, width=0.3, edgecolor='white')

# Second Ring (Inside) - Sex within Age Category
inner_colors = young_shades + old_shades  # Alternating shades for Female and Male in each age group
subgroup_labels = [f"{name}\n({size})" for name, size in zip(subgroup_names, subgroup_size)]
wrapped_subgroup_labels = wrap_labels(subgroup_labels, width=15)

mypie2, _ = ax.pie(subgroup_size, radius=1.3-0.3, labels=wrapped_subgroup_labels, labeldistance=0.78, colors=inner_colors)
plt.setp(mypie2, width=0.4, edgecolor='white')

# Adjust label positions
for text in ax.texts:
    text.set_horizontalalignment('center')
    text.set_verticalalignment('center')

# Add title and legend
plt.title('Gender Distribution by Age Group')
plt.legend(mypie2, subgroup_names, title='Age & Sex', loc='center left', bbox_to_anchor=(1, 0, 0.5, 1))

plt.margins(0,0)
plt.tight_layout()
plt.savefig('Sex_Dist.pdf', transparent=True)
#plt.show()

## Race

In [None]:
#Repeat for Race 
# Select Age Category Column (Categorical; Order Matters)
donor_data_df['Age_Cat'] = pd.Categorical(donor_data_df['Age_Cat'], categories=['Young', 'Old'], ordered=True)

# Create a cross-tabulation of the data with Race Info
cross_tab = pd.crosstab(donor_data_df['Age_Cat'], donor_data_df['Race'])

# Prepare data for the plot
group_names = cross_tab.index.tolist()
group_size = cross_tab.sum(axis=1).tolist()
subgroup_names = [f"{race}" for age in cross_tab.index for race in cross_tab.columns]
subgroup_size = cross_tab.values.flatten()

# Define custom colors using hex codes
young_color = '#445FB1'  # Blue for Young
old_color = '#FF9A5C'    # Orange for Old

# Define shades for inner pie chart
young_shades = ['#2C83AB', '#9CBE37']  # Shades  for Black and White in Young group
old_shades = ['#216382', '#829E2E']    # Shades  for Black and White in Old group

# Create the figure and axis
fig, ax = plt.subplots(figsize=(12, 10))
ax.axis('equal')

#Function to wrap label text
def wrap_labels(labels, width=10):
    return ['\n'.join(textwrap.wrap(l, width)) for l in labels]

# First Ring (outside) - Age Category
group_labels = [f"{name}\n({size})" for name, size in zip(group_names, group_size)]
wrapped_group_labels = wrap_labels(group_labels, width=15)
mypie, _ = ax.pie(group_size, radius=1.3, labels=wrapped_group_labels, colors=[young_color, old_color], labeldistance=0.865)
plt.setp(mypie, width=0.3, edgecolor='white')

# Second Ring (Inside) - Race within Age Category
inner_colors = young_shades + old_shades  # Alternating shades for Black and White in each age group
subgroup_labels = [f"{name}\n({size})" for name, size in zip(subgroup_names, subgroup_size)]
wrapped_subgroup_labels = wrap_labels(subgroup_labels, width=15)

mypie2, _ = ax.pie(subgroup_size, radius=1.3-0.3, labels=wrapped_subgroup_labels, labeldistance=0.78, colors=inner_colors)
plt.setp(mypie2, width=0.4, edgecolor='white')

# Adjust label positions
for text in ax.texts:
    text.set_horizontalalignment('center')
    text.set_verticalalignment('center')

# Add title and legend
plt.title('Race Distribution by Age Group')
plt.legend(mypie2, subgroup_names, title='Age & Race', loc='center left', bbox_to_anchor=(1, 0, 0.5, 1))

plt.margins(0,0)
plt.tight_layout()
plt.savefig('Race_dist.pdf', transparent=True)
#plt.show()

## Smoking Status

In [None]:
#Repeat for Smoking Status 
# Select Age Category Column (Categorical; Order Matters)
donor_data_df['Age_Cat'] = pd.Categorical(donor_data_df['Age_Cat'], categories=['Young', 'Old'], ordered=True)

# Create a cross-tabulation of the data with Smoking Status
cross_tab = pd.crosstab(donor_data_df['Age_Cat'], donor_data_df['Smoker'])

# Prepare data for the plot
group_names = cross_tab.index.tolist()
group_size = cross_tab.sum(axis=1).tolist()
subgroup_names = [f"{race}" for age in cross_tab.index for race in cross_tab.columns]
subgroup_size = cross_tab.values.flatten()

# Define custom colors using hex codes
young_color = '#445FB1'  # Blue for Young
old_color = '#FF9A5C'    # Orange for Old

# Define shades for inner pie chart
young_shades = ['#6C6E6C', '#E37F98']  # Shades  for Smoking/Non-smoking within young
old_shades = ['#474847', '#C92C53']     # Shades  for Smoking/Non-smoking within old

# Create the figure and axis
fig, ax = plt.subplots(figsize=(12, 10))
ax.axis('equal')

#Function to wrap label text within pie sections 
def wrap_labels(labels, width=10):
    return ['\n'.join(textwrap.wrap(l, width)) for l in labels]

# First Ring (outside) - Age Category
group_labels = [f"{name}\n({size})" for name, size in zip(group_names, group_size)]
wrapped_group_labels = wrap_labels(group_labels, width=15)
mypie, _ = ax.pie(group_size, radius=1.3, labels=wrapped_group_labels, colors=[young_color, old_color], labeldistance=0.865)
plt.setp(mypie, width=0.3, edgecolor='white')

# Second Ring (Inside) - Smoking Status within Age Category
inner_colors = young_shades + old_shades  # Alternating shades for Smoking/Non-smoking in each age group
subgroup_labels = [f"{name}\n({size})" for name, size in zip(subgroup_names, subgroup_size)]
wrapped_subgroup_labels = wrap_labels(subgroup_labels, width=15)

mypie2, _ = ax.pie(subgroup_size, radius=1.3-0.3, labels=wrapped_subgroup_labels, labeldistance=0.78, colors=inner_colors)
plt.setp(mypie2, width=0.4, edgecolor='white')

# Adjust label positions
for text in ax.texts:
    text.set_horizontalalignment('center')
    text.set_verticalalignment('center')

# Add title and legend
plt.title('Smoking Status Distribution by Age Group')
plt.legend(mypie2, subgroup_names, title='Age & Smoking Status', loc='center left', bbox_to_anchor=(1, 0, 0.5, 1))

plt.margins(0,0)
plt.tight_layout()
plt.savefig('Smoking_dist.pdf', transparent=True)
#plt.show()

## Age Distribution (yrs) within each Age Category

In [None]:
from matplotlib.ticker import MultipleLocator
import matplotlib.cm as cm
import matplotlib.colors as mcolors

# Assuming your dataframe is called 'donor_data_df' with columns 'Age_yrs', 'Age_Cat', and 'Donor_ID'

# Create separate dataframes for young and old donors, define category age ranges for each 
young_donors = donor_data_df[(donor_data_df['Age_Cat'] == 'Young') & (donor_data_df['Age_yrs'].between(20, 31))]
old_donors = donor_data_df[(donor_data_df['Age_Cat'] == 'Old') & (donor_data_df['Age_yrs'].between(60, 86))]

# Sort dataframes by age
young_donors = young_donors.sort_values(by='Age_yrs')
old_donors = old_donors.sort_values(by='Age_yrs')

# Create a figure with two subplots
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(8, 6))

# Generate colors using different colormaps
cmap_young = plt.get_cmap('Set2')
cmap_old = plt.get_cmap('Dark2')

#Set color map range range to the number of individuals within a given age category
norm_young = mcolors.Normalize(vmin=0, vmax=len(young_donors))
norm_old = mcolors.Normalize(vmin=0, vmax=len(old_donors))

# Plot young donors
colors_young = [cmap_young(norm_young(i)) for i in range(len(young_donors))]
ax1.scatter(range(len(young_donors)), young_donors['Age_yrs'], c=colors_young)
ax1.set_title('Age of Young Donors (20-30 years)')
ax1.set_xlabel('Donor Index')
ax1.set_ylabel('Age')
ax1.set_ylim(20, 30.5)
ax1.set_xticks(range(len(young_donors)))
ax1.set_xticklabels(young_donors['ID'], rotation=90) #Label donors by ID number 
ax1.yaxis.set_major_locator(MultipleLocator(2))

# Plot old donors
colors_old = [cmap_old(norm_old(i)) for i in range(len(old_donors))]
ax2.scatter(range(len(old_donors)), old_donors['Age_yrs'], c=colors_old)
ax2.set_title('Age of Old Donors (60-85 years)')
ax2.set_xlabel('Donor Index')
ax2.set_ylabel('Age')
ax2.set_ylim(60, 86)
ax2.set_xticks(range(len(old_donors)))
ax2.set_xticklabels(old_donors['ID'], rotation=90) #Label donors by ID number 
ax2.yaxis.set_major_locator(MultipleLocator(2))

plt.tight_layout()
plt.savefig('Age_Dist.pdf', transparent=True)
#plt.show()

## Tissue Quality

### PMI

In [None]:
plt.figure(figsize=(8, 6))  # Optional: Set the figure size

#Define plot type; set x and y axes; define dataframe 
ax = sns.boxplot(x='Age_Cat', y='PMI_hrs', data=donor_data_df)

# Calculate mean and median for each category
means = donor_data_df.groupby('Age_Cat')['PMI_hrs'].mean().values
medians = donor_data_df.groupby('Age_Cat')['PMI_hrs'].median().values

# Annotate mean and median values on the plot
for i, category in enumerate(donor_data_df['Age_Cat'].unique()):
    ax.text(i, means[i] + 2, f'Mean: {means[i]:.1f}', horizontalalignment='center', color='white')
    ax.text(i, medians[i] + 2, f'Median: {medians[i]:.1f}', horizontalalignment='center', color='white')

plt.title('PMI Hrs by Age Category')
plt.xlabel('Age Category')
plt.ylabel('PMI (hrs)')
plt.ylim(10, 60)  # Adjust y-axis limits as needed
plt.savefig('PMI_Box.pdf', transparent=True)

#plt.show()

### pH 

In [None]:
#Define plot type; set x and y axes; define dataframe 
sns.boxplot(x='Age_Cat', y='pH', data=donor_data_df)
plt.ylim(6, 7)  # Set y-axis limit, based on narrow pH range for interpretability 
plt.title('pH by Age Category')
plt.xlabel('Age Category')
plt.ylabel('pH')
plt.savefig('pH_Box.pdf', transparent=True)

#plt.show()

In [None]:
pH_means = donor_data_df.groupby('Age_Cat')['pH'].mean().values
pH_medians = donor_data_df.groupby('Age_Cat')['pH'].median().values

In [None]:
pH_means

In [None]:
pH_medians

# Tissue Sample Distribution

In [None]:
#X-axis: Age Group
#Y-axis: Sample count
#Bar Stacks: Brain Region

# Reshape the DataFrame to count samples by region and age
regions = ['MTG_sample_id', 'EC_sample_id', 'PUT_sample_id', 'SVZ_sample_id']
donor_data_df_melted = pd.melt(donor_data_df, id_vars='Age_Cat', value_vars=regions, var_name='Region', value_name='Sample')
donor_data_df_melted = donor_data_df_melted.dropna()

# Count samples by region and age
donor_data_df_count = donor_data_df_melted.groupby(['Age_Cat', 'Region']).size().unstack('Region')

# Define custom colors for each region
custom_colors = {
  'EC_sample_id': '#2B79A9',  
  'MTG_sample_id':'#841F84',  
  'PUT_sample_id': '#A8244B',  
  'SVZ_sample_id': '#E5B710'
} 

# Plot stacked bars for each age category
donor_data_df_count.plot(kind='bar', stacked=True, color=[custom_colors[region] for region in regions])

# Customize and show the plot
plt.title('Sample Count by Age and Region')
plt.xlabel('Age Category')
plt.ylabel('Sample Count')

# Move the legend outside the plot
plt.legend(title='Region', loc='center left', bbox_to_anchor=(1, 0.5))

plt.tight_layout(rect=[0,0,0.8,1])  # Adjust the layout so the legend fits
plt.savefig('Sample_Dist_Age.pdf', transparent=True)
#plt.show()

In [None]:
#X-axis: Brain Region
#Y-axis: Sample count
#Bar Stacks: Age Group

# Reshape the DataFrame to count samples by region and age
regions = ['MTG_sample_id', 'EC_sample_id', 'PUT_sample_id', 'SVZ_sample_id']
donor_data_df_melted = pd.melt(donor_data_df, id_vars='Age_Cat', value_vars=regions, var_name='Region', value_name='Sample')
donor_data_df_melted = donor_data_df_melted.dropna()

# Count samples by region and age
donor_data_df_count = donor_data_df_melted.groupby(['Region', 'Age_Cat']).size().unstack('Age_Cat')

# Define custom colors for each age category
age_categories = donor_data_df_melted['Age_Cat'].unique()
custom_colors = {
    age_categories[0]: '#FF9A5C',  # Blue for Young
    age_categories[1]: '#445FB1'   # Purple for Old
}

# Plot stacked bars for each region
donor_data_df_count.plot(kind='bar', stacked=True, color=[custom_colors[age] for age in donor_data_df_count.columns])

# Customize and show the plot
plt.title('Sample Count by Region and Age')
plt.xlabel('Region')
plt.ylabel('Sample Count')

# Move the legend outside the plot
plt.legend(title='Age Category', loc='center left', bbox_to_anchor=(1, 0.5))

plt.tight_layout(rect=[0,0,0.8,1])  # Adjust the layout so the legend fits
plt.savefig('Sample_Dist_Region.pdf', transparent=True)
plt.show()
