# libraries and data

In [None]:
import warnings
warnings.filterwarnings("ignore")

In [None]:
import scanpy as sc
import pandas as pd
import numpy as np
#from pyclustree import clustree
import matplotlib.pyplot as plt
from sklearn_ann.kneighbors.annoy import AnnoyTransformer


In [None]:
adata = sc.read_h5ad('../PDAC_Final/Downstream/final_scanVI/final_object_all_genes.h5ad')

In [None]:
adata.obs['Level_4'].to_csv('/home/daniele/Level_4.csv.gz', index=True)

In [None]:
adata.obs['mask_level_4'] = adata.obs['Level_4'].replace({k: v for v, k in enumerate(adata.obs['Level_4'].unique())})


# export mask mapping


In [None]:
import json 

mask_mapping = {v:k for k,v in zip(adata.obs['Level_4'].unique(), adata.obs['mask_level_4'].unique())}
with open('../../../supplementary_data/human/mask_mapping.json', 'w') as f:
    json.dump(mask_mapping, f)

# import cell types and markers

In [None]:
import json 
with open('../../../supplementary_data/human/annotation_markers.json', 'r') as f:
    celltype_dict = json.load(f)

# Plots func

In [None]:
def plot_macro_celltype_umap(adata, celltype_dict, macro_type, vmax=1):
    """
    Generate UMAP and dotplot for a macro cell group.

    Parameters:
    - adata: AnnData object
    - celltype_dict: dictionary structured by macro cell types
    - macro_type: string key for the macro cell type group to process
    """
    
    # precheck
    if macro_type not in celltype_dict:
        print(f"Error: '{macro_type}' not found in the provided dictionary.")
        return
    celltypes = celltype_dict[macro_type]["celltypes"]
    markers_dict = celltype_dict[macro_type]["markers"]
    valid_celltypes = [ct for ct in celltypes if ct in adata.obs['Level_4'].unique()]
    if not valid_celltypes:
        print(f"Warning: None of the cell types in '{macro_type}' are present in adata.obs['Level_4'].")
        return
    grouped_markers = {
        str(adata.obs['mask_level_4'][adata.obs['Level_4'] == subtype].unique()[0]): markers_dict[subtype]
        for subtype in valid_celltypes
        if markers_dict.get(subtype)
    }
    

    adata_subset = adata[adata.obs['Level_4'].isin(valid_celltypes)].copy()
    group_order = sorted(
        adata_subset.obs['mask_level_4'].unique(),
    )
    group_order_str = [str(g) for g in group_order]
    grouped_markers= {k: grouped_markers[k] for k in group_order_str if k in grouped_markers}
    
    # printing information
    print('=' * 50)
    print(f'Number of cells in {macro_type}: {adata_subset.n_obs}')
    print('=' * 50)
    print(f'Available cell types for {macro_type}:')
    for cell in celltypes:
        print(cell)
    print('=' * 50)
    
    # compute embeddings and plot umap

    sc.pp.neighbors(adata_subset, use_rep='scanvi_L4_emb', transformer=AnnoyTransformer(15))
    sc.tl.umap(adata_subset, min_dist=0.25)
    sc.pl.umap(adata_subset, color='mask_level_4', title="")

    print('=' * 50)
    
    # plot expression of markers
    size_x = max(6, int(4 * sum(len(v) for v in grouped_markers.values()) / 4))
    size_y = max(4, int(4 * len(grouped_markers) / 6))
    fig, ax = plt.subplots(1,2, figsize=(size_x, size_y))




    # Matrixplot
    sc.pl.matrixplot(
        adata_subset, var_names=grouped_markers, groupby='mask_level_4',
        layer='log_norm', show=False, ax=ax[0], categories_order=group_order, cmap = 'Blues', vmax=vmax   
    )

    # Stacked violin
    sc.pl.stacked_violin(
        adata_subset, var_names=grouped_markers, groupby='mask_level_4',
        layer='log_norm', show=False, ax=ax[1], categories_order=group_order, cmap = 'Blues', vmax=vmax
    )

    # Adjust and show
    plt.tight_layout()
    plt.show()


# Level 4 cell type validation

## ðŸ”¬ Goal of This Notebook: Cell Type Annotation Validation

In this notebook, **Level 4 cell type annotations have been masked** in the provided `anndata` object.

For each **Level 3 macrocategory**, we display:

- The number of cells  
- The masked cell clusters  
- The expression levels of selected **marker genes** used during annotation  
- A list of all possible **Level 4 subtypes** contained within the macrocategory  

### ðŸ§ª Your Task

Based on the marker gene expression shown, please assign the masked cells to the appropriate **Level 4 cell type**.

ðŸ‘‰ **Fill in your annotations using this table:**  
[Annotation Table (Google Sheet)](https://docs.google.com/spreadsheets/d/1Io9YcXBv2TjIHRC-sRK1X29J8KYOfntPSvigKs9-9-U/edit?usp=sharing)

### âœ… Instructions

1. **Make a copy** of the table (File > Make a copy).  
2. For each masked cell cluster, assign the most appropriate Level 4 category by placing a lowercase **`x`** in the cell where the cluster row and category column intersect.  
3. **Download your completed table as a CSV file** (File > Download > Comma Separated Values).  
4. Name the file **`annotation_validation_<your_initials>.csv`** and share it with us.

# Immune cells

## CD4 T cells

In [None]:
plot_macro_celltype_umap(adata, celltype_dict, 'CD4 T cells')

## CD8 T cells

In [None]:
plot_macro_celltype_umap(adata, celltype_dict, 'CD8 T cells')

## Other T cells

In [None]:
plot_macro_celltype_umap(adata, celltype_dict, 'Other T cells')

## B and plasma cells

In [None]:
plot_macro_celltype_umap(adata, celltype_dict, 'B cells and Plasma cells')

## Macrophages and Monocytes

In [None]:
plot_macro_celltype_umap(adata, celltype_dict, 'Macrophages and Monocytes', vmax=2)

## Neutrophils

In [None]:
plot_macro_celltype_umap(adata, celltype_dict, 'Neutrophils', 2)

## Dendritic and Mast Cells

In [None]:
plot_macro_celltype_umap(adata, celltype_dict, 'Dendritic and Mast Cells', 1)

# Malignant cells

In [None]:
plot_macro_celltype_umap(adata, celltype_dict, 'Malignant Cells', 2)

# Other macro cell types

## Endothelial cells

In [None]:
plot_macro_celltype_umap(adata, celltype_dict, 'Endothelial Cells')

## Endocrine cells

In [None]:
plot_macro_celltype_umap(adata, celltype_dict, 'Endocrine Cells')

## Exocrine cells

In [None]:
plot_macro_celltype_umap(adata, celltype_dict, 'Exocrine Cells', 2)

## Fibroblasts

In [None]:
plot_macro_celltype_umap(adata, celltype_dict, 'Fibroblasts', 2)