In [None]:
import scanpy as sc
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
pd.set_option('display.max_columns', None)

In [None]:
pwd

In [None]:
adata = sc.read_h5ad('adata_scanvi_neighbours.h5ad')

In [None]:
import scipy.sparse as sp

# Convert adata.X to a sparse matrix (in CSR format)
adata.X = sp.csr_matrix(adata.X)

In [None]:
adata.layers['binned_data'] = sp.csr_matrix(adata.layers['binned_data'])

In [None]:
adata.X = adata.layers['raw']

In [None]:
del adata.layers['binned_data_25_bins']

In [None]:
adata.write('adata_scanvi_neighbours.h5ad', compression='gzip')

In [None]:
sc.pl.umap(adata, color=['Level_1_refined', 'Technology', 'Dataset', 'leiden'], frameon=False, ncols=4, wspace=0.75)

In [None]:
plt.rcParams['figure.figsize'] = (12,10)
random_indices = np.random.permutation(list(range(adata.shape[0])))
sc.pl.umap(adata[random_indices, :], color=['Level_1_refined', 'Technology'], frameon=False, legend_fontoutline=3, legend_loc='on data', legend_fontsize=6, size=4)

In [None]:
sc.pl.umap(adata[random_indices, :], color=['Level_1_refined'], groups='Endocrine Cell', frameon=False, legend_fontoutline=3, legend_loc='on data', legend_fontsize=6, size=4)

In [None]:
sc.pl.umap(adata[random_indices, :], color=['Level_1_refined'], groups='Myeloid Cell', frameon=False, legend_fontoutline=3, legend_loc='on data', legend_fontsize=6, size=4)

In [None]:
sc.pl.umap(adata[random_indices, :], color=['Level_1_refined'], groups='T Cell', frameon=False, legend_fontoutline=3, legend_loc='on data', legend_fontsize=6, size=4)

In [None]:
adata.obs.groupby(['Dataset', 'Level_1_refined']).size().unstack()

In [None]:
adata.obs.groupby(['Technology', 'Level_1_refined']).size().unstack().T

In [None]:
def subcluster_celltype(adata, cell_type, embed):
    # Create a mask for the cells corresponding to the current cell_type
    cell_mask = adata.obs.Level_1_refined.str.contains(cell_type)
    
    # Subset the data for the current cell type using the mask
    adata_temp = adata[cell_mask].copy()  # Use .copy() to avoid issues with views
    
    # Calculate the neighbors
    sc.pp.neighbors(adata_temp, use_rep=embed, metric='cosine', n_neighbors=round(adata_temp.n_obs/100))
    
    # Calculate Leiden clustering for the subset
    leiden_key = f'{cell_type}_leiden_0.5'
    sc.tl.leiden(adata_temp, resolution=0.5, flavor="igraph", n_iterations=2, key_added=leiden_key)
    
    # Compute UMAP
    sc.tl.umap(adata_temp, min_dist=0.75)
    
    # Plot UMAP
    sc.pl.umap(adata_temp, color=['Dataset', 'Technology', f'{cell_type}_leiden_0.5'], frameon=False, wspace=0.5, size=3)
    
    # Add the Leiden clustering results back to the original adata using the mask
    adata.obs[leiden_key] = np.nan  # Initialize the column with NaNs
    adata.obs.loc[cell_mask, leiden_key] = adata_temp.obs[leiden_key].values  # Assign the Leiden results for the current cell type
    return adata_temp

In [None]:
acinar = subcluster_celltype(adata, cell_type='Acinar', embed='scanvi_emb')

In [None]:
acinar.obs.groupby('Acinar_leiden_0.5').size()

In [None]:
sc.pl.umap(acinar, color='Acinar_leiden_0.5')

In [None]:
sc.pl.umap(adata, color='Acinar_leiden_0.5')

In [None]:
adata.obs.Level_1_refined.unique().tolist()

In [None]:
malignant = subcluster_celltype(adata, cell_type='Malignant', embed='scanvi_emb')

In [None]:
'Intra-pancreatic Neurons'.replace('/', '_').replace(' ', '_').replace('-', '_')

In [None]:
max(1, round(-2000))


In [None]:
round(101/200)

In [None]:
adata_all_genes = sc.read_h5ad('adata_scpoli_final_all_genes.h5ad')

In [None]:
adata

In [None]:
adata_all_genes

In [None]:
adata_all_genes = adata_all_genes[adata.obs_names]

In [None]:
adata_all_genes.obsm= adata.obsm.copy()

In [None]:
adata_all_genes.uns = adata.uns.copy()

In [None]:
adata_all_genes.obsm['binned_data'] = adata.layers['binned_data'].copy()

In [None]:
adata_all_genes.obsp = adata.obsp.copy()

In [None]:
sc.pl.umap(adata_all_genes, color=['Level_1_refined', 'Technology', 'Dataset', 'leiden'], frameon=False, ncols=4, wspace=0.75)

In [None]:
sc.pl.umap(adata, color=['Level_1_refined', 'Technology', 'Dataset', 'leiden'], frameon=False, ncols=4, wspace=0.75)

In [None]:
adata_all_genes.obs['Global_Leiden'] = adata.obs['leiden'].copy()

In [None]:
adata_all_genes.write('adata_scanvi_all_genes.h5ad', compression='gzip')

In [None]:
pwd

In [None]:
adata_all_genes

In [None]:
adata.obs.groupby(['Dataset', 'Treatment']).size().unstack()

In [None]:
adata.obs.groupby(['Technology', 'Level_1_refined']).size().unstack()[['Ductal Cell', 'Ductal Cell/Malignant', 'Malignant', 'EMT']]

In [None]:
adata.obs.groupby(['Technology', 'infercnv_score_malignant']).size().unstack() #[['Ductal Cell', 'Ductal Cell/Malignant', 'Malignant', 'EMT']]