In [1]:
import scanpy as sc
import anndata as ad
import pooch
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')


In [2]:
sc.set_figure_params(dpi=1000, facecolor='white')

In [3]:
count = pd.read_csv("/media/ubuntu/sda/Patch-seq/data/trancsriptome/Cell_2020/processed/raw_counts_4270.csv", index_col= 0)
metadata = pd.read_csv("/media/ubuntu/sda/Patch-seq/data/trancsriptome/Cell_2020/processed/metadata_4270.csv", index_col=0)

In [5]:
metadata.index = metadata['transcriptomics_sample_id']

In [6]:
data = sc.AnnData(X=count.T)
data.obs = metadata.copy()
data.var_names = count.index

In [7]:
del count

In [8]:
sc.pp.normalize_total(data, target_sum=1e6)
sc.pp.log1p(data)
sc.pp.highly_variable_genes(
    data,
    flavor='seurat_v3',  
    n_top_genes=2000,     
    subset=True          
)

In [9]:
data.obs_keys

<bound method AnnData.obs_keys of AnnData object with n_obs × n_vars = 4270 × 2000
    obs: 'project', 'cell_specimen_id', 'cell_specimen_name', 'hemisphere', 'structure', 'donor_id', 'donor_name', 'biological_sex', 'age', 'ethnicity', 'medical_conditions', 'full_genotype', 'dendrite_type', 'apical_dendrite_status', 'neuron_reconstruction_type', 'cell_soma_normalized_depth', 'depth_from_pia_um', 'ephys_session_id', 'transcriptomics_sample_id', 'transcriptomics_batch', 'corresponding_AIT2.3.1_ID', 'corresponding_AIT2.3.1_alias'
    var: 'highly_variable', 'highly_variable_rank', 'means', 'variances', 'variances_norm'
    uns: 'log1p', 'hvg'>

In [10]:
#data = data[data.obs['class'] == 'GABAergic']
data = data[~data.obs['corresponding_AIT2.3.1_alias'].isin(["L2/3 IT VISp Agmat", 'Meis2 Adamts19'])]

In [11]:
sc.tl.pca(data, svd_solver='arpack', n_comps=50)
sc.pp.neighbors(data, n_neighbors=15, n_pcs=9)
sc.tl.umap(data, min_dist=0.3, spread=1)

In [42]:
sc.tl.leiden(data, resolution=0.5)

In [29]:
data_filtered = data[data.obs['leiden'] != "6"]

In [12]:
palette = ["#532cd8", "#da28a1", "#675ee5", "#d632dc", "#4428a7", "#9b34ec", "#a434bc", #Lamp5,
           "#376c78", "#4f97b1", "#3a5166", "#5f8fd4", "#3a5e95", "#868aa6", # Serpinf1, Sncg
           "#a32621", "#751610", "#a13e2f", "#d6614b", "#ce3926", "#ec4d21", "#a03c14", "#ce6c2b", "#914a16", "#b07625", # Pvalb
           "#d48746", "#8a501a", "#6e3901", "#de821d", "#a36219", "#b68f28", "#9fa82d", "#7e882b", "#516615", "#638d2f", "#78af41", "#3e6914", "#106003", "#106003", "#106003", "#106003", "#317921", "#3f9732", "#45b639", "#2d8346", "#3bb06e", #Sst
           "#757ae8", "#464ba1", "#4f4471", "#6046ce", "#6b28dd", "#9a7bbd", "#7e35a1", "#c643e5", "#bd6bc2", "#7b3578", "#8b6a86", "#d843ba", "#5e4454", "#7e2e57", "#c26c93", "#cc3b87" #Vip
]

In [17]:
import pickle
with open("/media/ubuntu/sda/Patch-seq/data/trancsriptome/Nature_2018/color_map.pkl", 'rb') as f:
    color_map = pickle.load(f)

In [30]:
from matplotlib.backends.backend_pdf import PdfPages

colors = [color_map[date] for date in data_filtered.obs['corresponding_AIT2.3.1_alias']]

with PdfPages('figure/umap_plot.pdf') as pdf:
    plt.figure(figsize=(8, 8))
    
    scatter = sns.scatterplot(x = data_filtered.obsm['X_umap'][:, 0],
                    y = data_filtered.obsm['X_umap'][:, 1],
                    hue = data_filtered.obs['corresponding_AIT2.3.1_alias'],
                    palette=palette,
                    linewidth = 0,
                    s = 3)
    
    plt.grid(False)  
    plt.xticks([])  
    plt.yticks([])   
    plt.xlabel('')   
    plt.ylabel('')  
    plt.tight_layout()

    scatter.legend_.remove()  

    pdf.savefig(bbox_inches='tight', pad_inches=0) 
    plt.close()
    
    plt.figure(figsize=(8, 10))
    ax = plt.gca()
    sc.pl.umap(
        data_filtered,
        color='corresponding_AIT2.3.1_alias',
        palette=palette,
        s=5,                    
        alpha=0.8,              
        frameon=False,           
        show=False,             
        ax=ax
    )
    
    plt.tight_layout()
    pdf.savefig(bbox_inches='tight')
    plt.close()

    plt.figure(figsize=(8, 8))
    
    scatter = sns.scatterplot(x = data_filtered.obsm['X_umap'][:, 0],
                    y = data_filtered.obsm['X_umap'][:, 1],
                    hue = data_filtered.obs['leiden'],
                    palette=palette,
                    linewidth = 0,
                    s = 3)
    
    plt.grid(False)  
    plt.xticks([])  
    plt.yticks([])   
    plt.xlabel('')   
    plt.ylabel('')  
    plt.tight_layout()

    scatter.legend_.remove()  

    pdf.savefig(bbox_inches='tight', pad_inches=0) 
    plt.close()

In [43]:
data.write('raw_data_cluster.h5ad')
data_filtered.write("filtered_data_cluster.h5ad")