# UpSets to Show aDEGs by Broad Cell Type x Region
Code written and conceptualized by Monica E. Mesecar. Support for aesthetics and function looping with Perplexity AI.

In [None]:
import pandas as pd
import numpy as np
import matplotlib
matplotlib.rcParams['pdf.fonttype'] = 42
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib import colors
from matplotlib import rcParams

In [None]:
!pwd

In [None]:
p1_deg=pd.read_csv("../P1_aging.glmmtmb_age_diffs_fdr_cleaned.csv")

In [None]:
display(p1_deg)

In [None]:
#Create lists of subsets of interest
p1_type_list=p1_deg.type.unique().tolist()
print(p1_type_list)

In [None]:
#Subset only 'region_broad_celltype'
p1_region_broad_df = p1_deg.loc[p1_deg['type'] == 'region_broad_celltype']

In [None]:
#Show df
p1_region_broad_df 

## UpSet Plots 

In [None]:
!pip install upsetplot --user

In [None]:
import upsetplot

In [None]:
print (upsetplot.__version__)

In [None]:
from upsetplot import plot
from upsetplot import UpSet
from upsetplot import from_contents
from upsetplot import from_memberships
from matplotlib import pyplot

In [None]:
display(p1_region_broad_df)

# Cell Type x Region

In [None]:
p1_CT_list=p1_region_broad_df.celltype.unique().tolist()
print(p1_CT_list)

In [None]:
p1_region_broad_df = p1_region_broad_df[p1_region_broad_df["celltype"] != 'Indeterminate']

In [None]:
p1_CT_list

In [None]:
from collections import defaultdict
from matplotlib.patches import Patch

def create_upset_plot(df, cell_type, region_colors):
    """
    Create an UpSet plot with a custom color palette for brain regions
    
    Parameters:
    - df: Input dataframe
    - cell_type: Specific cell type to analyze
    - region_colors: Dictionary of {region: hex_color}
    """
    # Subset dataframe for the specific cell type
    ct_df = df[df['celltype'] == cell_type]
    
    # Check if the cell type exists in the data
    if ct_df.empty:
        print(f"No data found for cell type: {cell_type}")
        return
    
    # Create gene-region dictionary
    gene_region_dict = defaultdict(list)
    for gene, region in zip(ct_df['feature'], ct_df['region']):
        gene_region_dict[gene].append(region)
    
    # Get unique regions for this cell type
    regions = sorted(set(region for regions in gene_region_dict.values() for region in regions))
    
    # Validate color dictionary
    missing_regions = set(regions) - set(region_colors.keys())
    if missing_regions:
        print(f"Warning: No colors defined for regions: {missing_regions}")
        # Assign default color for missing regions
        for missing_region in missing_regions:
            region_colors[missing_region] = '#808080'  # Default gray
    
    # Create the UpSet dataframe
    upset_df = pd.DataFrame(0, index=gene_region_dict.keys(), columns=regions)
    
    # Fill the dataframe
    for gene, gene_regions in gene_region_dict.items():
        upset_df.loc[gene, gene_regions] = 1
    
    # Convert to Boolean
    upset_df_bool = upset_df.astype(bool)
    
    # Create UpSet object
    upset = UpSet(
        from_indicators(lambda df: df.select_dtypes(bool), data=upset_df_bool),
        sort_by="cardinality",
        show_counts=True
    )
    
    # Create custom legend elements
    legend_elements = []
    
    for region in regions:
        other_regions = [r for r in regions if r != region]
        upset.style_subsets(
            present=region, 
            absent=other_regions, 
            facecolor=region_colors[region], 
            linewidth=2, 
            label=f'{region} unique'
        )
        
        # Create legend handle for each region
        legend_elements.append(
            Patch(facecolor=region_colors[region], label=f'{region} unique')
        )
    
    # Create a new figure with desired size
    plt.figure(figsize=(12, 8))
    
    # Plot
    upset.plot()
    
    # Adjust layout to make room for legend
    plt.tight_layout()
    
    # Add title
    plt.suptitle(f"aDEG Count by Region among {cell_type}", y=1.02)
    
    # Add legend outside the plot
    plt.legend(handles=legend_elements, 
               loc='upper left', 
               bbox_to_anchor=(1.05, 1), 
               borderaxespad=0., 
               fontsize='small', 
               ncol=1)
    
    # Adjust subplot to make room for legend
    plt.subplots_adjust(right=0.85)
    
    # Save figure
    plt.savefig(f"{cell_type}_aDEG_UpSet_UnCol_NoDir_FINAL2.pdf", format="pdf", bbox_inches="tight", dpi=300)
    plt.close()  # Close the plot to free up memory

In [None]:
# Example usage
# Define your region color dictionary
region_colors = {
    'EC': '#841F84',  
    'MTG': '#2B79A9',  
    'PUT': '#A8244B', 
    'SVZ': '#E5B710',  
}

# Assuming p1_region_broad_df is your master dataframe
p1_CT_list = p1_region_broad_df.celltype.unique().tolist()
p1_CT_list.remove("SPN")

# Create UpSet plot for each cell type
for cell_type in p1_CT_list:
    create_upset_plot(p1_region_broad_df, cell_type, region_colors)
    print(f"Processed UpSet plot for {cell_type}")