In [14]:
import pertpy as pt
import scanpy as sc
import pandas as pd
import numpy as np

In [50]:
adata = pt.dt.dialogue_example()
adata

AnnData object with n_obs × n_vars = 5374 × 6329
    obs: 'nCount_RNA', 'nFeature_RNA', 'cellQ', 'gender', 'location', 'clinical.status', 'cell.subtypes', 'pathology', 'origin', 'subset'
    var: 'name'

In [51]:
sc.pp.pca(adata)
sc.pp.neighbors(adata)
sc.tl.umap(adata)

In [69]:
dl = pt.tl.Dialogue(sample_id = "Sample",
                   celltype_key = "cell.subtypes",
                   n_counts_key = "nCount_RNA",
                   n_mpcs = 3)

In [70]:
penalties = None
ct_order = None
agg_pca = True
solver = "lp"
normalize= True

In [64]:
if ct_order is not None:
    cell_types = ct_order
else:
    ct_order = cell_types = adata.obs[dl.celltype_key].astype("category").cat.categories

In [65]:
ct_order

Index(['CD8+ IELs', 'CD8+ IL17+', 'CD8+ LP', 'Macrophages', 'TA2'], dtype='object')

In [66]:
mcca_in, ct_subs = dl.load(adata, ct_order=cell_types, agg_pca=agg_pca, normalize=normalize)

In [15]:
np.shape(mcca_in)

(5, 3, 50)

In [16]:
n_samples = mcca_in[0].shape[1]

In [22]:
adata.obsm['X_pca'].shape

(5374, 50)

In [25]:
n_samples

50

In [26]:
adata.obs

Unnamed: 0_level_0,nCount_RNA,nFeature_RNA,cellQ,gender,location,clinical.status,cell.subtypes,pathology,origin,subset
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
N7.EpiA.AAACGCACAATCGC,2176.911552,269,0.037467,Female,Epi,Non-inflamed,TA2,True,A,A
N7.EpiA.AGATATTGATCGGT,4319.159178,660,0.093293,Female,Epi,Non-inflamed,TA2,True,A,A
N7.EpiA.AGTCTACTTCTCTA,7230.356204,1543,0.242912,Female,Epi,Non-inflamed,TA2,True,A,A
N7.EpiA.ATATACGAAGTACC,5868.425665,1051,0.155489,Female,Epi,Non-inflamed,TA2,True,A,A
N7.EpiA.ATCTGTTGTCATTC,3130.076031,432,0.058324,Female,Epi,Non-inflamed,TA2,True,A,A
...,...,...,...,...,...,...,...,...,...,...
N110.LPB.TCTTCGGTCACGCATA,3346.150450,492,0.198183,Female,LP,Inflamed,CD8+ IELs,True,,C
N110.LPB.TTAGGCACAATCCAAC,4341.610208,672,0.268843,Female,LP,Inflamed,CD8+ LP,True,,C
N110.LPB.TTGAACGTCGTACGGC,4410.712825,706,0.289367,Female,LP,Inflamed,CD8+ LP,True,,C
N110.LPB.TTGGCAATCCTCCTAG,2755.624716,373,0.149731,Female,LP,Inflamed,CD8+ LP,True,,C


In [38]:
meta = pd.read_csv("data/all.meta2.txt", sep="\t", index_col=0, skiprows=[1])



In [39]:
meta

Unnamed: 0_level_0,Cluster,nGene,nUMI,Subject,Health,Location,Sample
NAME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
N7.EpiA.AAACATACACACTG,TA 1,328,891,N7,Non-inflamed,Epi,N7.EpiA
N7.EpiA.AAACCGTGCATCAG,TA 1,257,663,N7,Non-inflamed,Epi,N7.EpiA
N7.EpiA.AAACGCACAATCGC,TA 2,300,639,N7,Non-inflamed,Epi,N7.EpiA
N7.EpiA.AAAGATCTAACCGT,Enterocyte Progenitors,250,649,N7,Non-inflamed,Epi,N7.EpiA
N7.EpiA.AAAGATCTAGGCGA,Enterocyte Progenitors,284,769,N7,Non-inflamed,Epi,N7.EpiA
...,...,...,...,...,...,...,...
N110.LPB.TTTGGTTAGGATGGTC,Macrophages,635,1366,N110,Inflamed,LP,N110.LPB
N110.LPB.TTTGGTTCACCTCGTT,Plasma,610,2730,N110,Inflamed,LP,N110.LPB
N110.LPB.TTTGGTTTCGGAAACG,Macrophages,859,1979,N110,Inflamed,LP,N110.LPB
N110.LPB.TTTGTCAGTTGACGTT,Macrophages,965,2696,N110,Inflamed,LP,N110.LPB


In [45]:
meta = meta.loc[adata.obs_names,:]

In [61]:
adata.obs['Sample']= meta['Sample'].astype("category")

index
N7.EpiA.AAACGCACAATCGC        N7.EpiA
N7.EpiA.AGATATTGATCGGT        N7.EpiA
N7.EpiA.AGTCTACTTCTCTA        N7.EpiA
N7.EpiA.ATATACGAAGTACC        N7.EpiA
N7.EpiA.ATCTGTTGTCATTC        N7.EpiA
                               ...   
N110.LPB.TCTTCGGTCACGCATA    N110.LPB
N110.LPB.TTAGGCACAATCCAAC    N110.LPB
N110.LPB.TTGAACGTCGTACGGC    N110.LPB
N110.LPB.TTGGCAATCCTCCTAG    N110.LPB
N110.LPB.TTTACTGCATATGGTC    N110.LPB
Name: Sample, Length: 5374, dtype: object

In [67]:
adata, mcps, ws, ct_subs = dl.calculate_multifactor_PMD(
    adata,
    normalize=True,
    solver="lp"
)

ValueError: matmul: Input operand 1 has a mismatch in its core dimension 0, with gufunc signature (n?,k),(k,m?)->(n?,m?) (size 107 is different from 42)

In [68]:
adata

AnnData object with n_obs × n_vars = 5374 × 6329
    obs: 'nCount_RNA', 'nFeature_RNA', 'cellQ', 'gender', 'location', 'clinical.status', 'cell.subtypes', 'pathology', 'origin', 'subset', 'Sample'
    var: 'name'
    uns: 'pca', 'neighbors', 'umap'
    obsm: 'X_pca', 'X_umap'
    varm: 'PCs'
    obsp: 'distances', 'connectivities'