In [93]:
import scanpy as sc
import anndata as ad
import pooch
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')


In [65]:
sc.set_figure_params(dpi=1000, facecolor='white')

In [42]:
count = pd.read_csv("/media/ubuntu/sda/Patch-seq/data/trancsriptome/Nature_2018/processed/VISp_raw_count.csv", index_col= 0)
metadata = pd.read_csv("/media/ubuntu/sda/Patch-seq/data/trancsriptome/Nature_2018/processed/VISp_metadata.csv", index_col=0)

In [43]:
gene_rows = pd.read_csv("/media/ubuntu/sda/Patch-seq/data/trancsriptome/Nature_2018/mouse_ALM_2018-06-14_genes-rows.csv")

In [44]:
id_to_symbol = gene_rows.set_index('gene_entrez_id')['gene_symbol'].to_dict()
count.index = count.index.map(id_to_symbol)

In [45]:
metadata.index = metadata['sample_name']

In [46]:
data = sc.AnnData(X=count.T)
data.obs = metadata.copy()
data.var_names = count.index

In [47]:
del count

In [48]:
sc.pp.normalize_total(data, target_sum=1e6)
sc.pp.log1p(data)
sc.pp.highly_variable_genes(
    data,
    flavor='seurat_v3',  
    n_top_genes=2000,     
    subset=True          
)

  return fn(*args_all, **kw)


In [49]:
data = data[data.obs['class'] == 'GABAergic']
data = data[~data.obs['cluster'].isin(["L2/3 IT VISp Agmat", 'Meis2 Adamts19'])]

In [57]:
sc.tl.pca(data, svd_solver='arpack', n_comps=50)
sc.pp.neighbors(data, n_neighbors=15, n_pcs=9)
sc.tl.umap(data, min_dist=0.3, spread=1)

In [51]:
palette = ["#532cd8", "#da28a1", "#675ee5", "#d632dc", "#4428a7", "#9b34ec", "#a434bc", #Lamp5,
           "#376c78", "#4f97b1", "#3a5166", "#5f8fd4", "#3a5e95", "#868aa6", # Serpinf1, Sncg
           "#a32621", "#751610", "#a13e2f", "#d6614b", "#ce3926", "#ec4d21", "#a03c14", "#ce6c2b", "#914a16", "#b07625", # Pvalb
           "#d48746", "#8a501a", "#6e3901", "#de821d", "#a36219", "#b68f28", "#9fa82d", "#7e882b", "#516615", "#638d2f", "#78af41", "#3e6914", "#106003", "#106003", "#106003", "#106003", "#317921", "#3f9732", "#45b639", "#2d8346", "#3bb06e", #Sst
           "#757ae8", "#464ba1", "#4f4471", "#6046ce", "#6b28dd", "#9a7bbd", "#7e35a1", "#c643e5", "#bd6bc2", "#7b3578", "#8b6a86", "#d843ba", "#5e4454", "#7e2e57", "#c26c93", "#cc3b87" #Vip
]

In [94]:
from matplotlib.backends.backend_pdf import PdfPages

unique_dates = np.unique(data.obs['cluster'])
color_map = {date: palette[i] for i, date in enumerate(unique_dates)} 
colors = [color_map[date] for date in data.obs['cluster']]

with PdfPages('figure/umap_plot.pdf') as pdf:
    plt.figure(figsize=(8, 8))
    
    scatter = sns.scatterplot(x = data.obsm['X_umap'][:, 0],
                    y = data.obsm['X_umap'][:, 1],
                    hue = data.obs['cluster'],
                    palette=palette,
                    linewidth = 0,
                    s = 3)
    
    plt.grid(False)  
    plt.xticks([])  
    plt.yticks([])   
    plt.xlabel('')   
    plt.ylabel('')  
    plt.tight_layout()

    scatter.legend_.remove()  

    pdf.savefig(bbox_inches='tight', pad_inches=0) 
    plt.close()
    
    plt.figure(figsize=(8, 10))
    ax = plt.gca()
    sc.pl.umap(
        data,
        color='cluster',
        palette=palette,
        s=5,                    
        alpha=0.8,              
        frameon=False,           
        show=False,             
        ax=ax
    )
    
    plt.tight_layout()
    pdf.savefig(bbox_inches='tight')
    plt.close()