In [25]:
import commot as ct
import scanpy as sc
import pandas as pd
import numpy as np
import torch
import anndata as ad

# Mouse brain dataset

In [46]:
df=pd.read_csv("./data/mouse/mouse.csv")
df=df[df['slice_id']=="mouse1_slice201"].copy()
print(df.shape)
genes=torch.load("./data/mouse/genes.pth")
adata=ad.AnnData(X=df[genes].values)
adata.obs["centerx"]=df["centerx"].values
adata.obs["centery"]=df["centery"].values
adata.obsm["spatial"]=np.stack([df["centerx"].values,df["centery"].values],axis=-1)
adata.var_names=genes
print(adata)

(6137, 275)
AnnData object with n_obs × n_vars = 6137 × 254
    obs: 'centerx', 'centery'
    obsm: 'spatial'


In [47]:
sc.pp.normalize_total(adata, inplace=True)
sc.pp.log1p(adata)
df_ligrec=ct.pp.ligand_receptor_database(database='CellChat', species='mouse')

In [48]:
ct.tl.spatial_communication(adata, database_name='user_database', df_ligrec=df_ligrec, dis_thr=200, heteromeric=True)

In [49]:
adata.write_h5ad("./COMMOT/mouse.h5ad")

# AD dataset

In [42]:
df=pd.read_csv("./data/AD/AD.csv")
df=df[df["section"]=="H20.33.001.CX28.MTG.02.007.1.02.03"].copy()
print(df.columns)
genes=torch.load("./data/AD/genes.pth")
adata=ad.AnnData(X=df[genes].values)
adata.obs["centerx"]=df["centerx"].values
adata.obs["centery"]=df["centery"].values
adata.obsm["spatial"]=np.stack([df["centerx"].values,df["centery"].values],axis=-1)
adata.var_names=genes
print(adata)

Index(['PRRT4', 'GRIP2', 'FOXP2', 'PDZD2', 'KIAA1217', 'PALMD', 'LRRC4C',
       'ASIC2', 'NPAS3', 'LUZP2',
       ...
       'ADAMTSL1', 'CHODL', 'SULF1', 'NRG1', 'NXPH2', 'TLL1', 'centerx',
       'centery', 'section', 'subclass'],
      dtype='object', length=144)
AnnData object with n_obs × n_vars = 15225 × 140
    obs: 'centerx', 'centery'
    obsm: 'spatial'


In [43]:
sc.pp.normalize_total(adata, inplace=True)
sc.pp.log1p(adata)

In [44]:
df_ligrec=ct.pp.ligand_receptor_database(database='CellChat', species='human')
print(df_ligrec)

           0              1          2                   3
0      TGFB1  TGFBR1_TGFBR2       TGFb  Secreted Signaling
1      TGFB2  TGFBR1_TGFBR2       TGFb  Secreted Signaling
2      TGFB3  TGFBR1_TGFBR2       TGFb  Secreted Signaling
3      TGFB1  ACVR1B_TGFBR2       TGFb  Secreted Signaling
4      TGFB1  ACVR1C_TGFBR2       TGFb  Secreted Signaling
...      ...            ...        ...                 ...
1194   UTS2B          UTS2R  UROTENSIN  Secreted Signaling
1195   UTS2B          SSTR5  UROTENSIN  Secreted Signaling
1196    BAG6           NCR3        BAG  Secreted Signaling
1197  LGALS9         HAVCR2   GALECTIN  Secreted Signaling
1198  LGALS9           CD44   GALECTIN  Secreted Signaling

[1199 rows x 4 columns]


In [45]:
try:
    ct.tl.spatial_communication(adata, database_name='user_database', df_ligrec=df_ligrec, dis_thr=200, heteromeric=True)
except Exception as e:
    print(e)

single positional indexer is out-of-bounds


# NSCLC dataset

In [51]:
df=pd.read_csv("./data/NSCLC/NSCLC.csv")
df=df[df["section"]=="Lung6"].copy()
print(df.columns)
genes=torch.load("./data/NSCLC/genes.pth")
adata=ad.AnnData(X=df[genes].values)
adata.obs["centerx"]=df['CenterX_global_px'].values
adata.obs["centery"]=df['CenterY_global_px'].values
adata.obsm["spatial"]=np.stack([df['CenterX_global_px'].values,df['CenterY_global_px'].values],axis=-1)
adata.var_names=genes
print(adata)

Index(['fov', 'cell_ID', 'AATK', 'ABL1', 'ABL2', 'ACE', 'ACE2', 'ACKR1',
       'ACKR3', 'ACKR4',
       ...
       'SampleID', 'Area', 'AspectRatio', 'CenterX_local_px',
       'CenterY_local_px', 'CenterX_global_px', 'CenterY_global_px', 'Width',
       'Height', 'section'],
      dtype='object', length=974)
AnnData object with n_obs × n_vars = 89948 × 960
    obs: 'centerx', 'centery'
    obsm: 'spatial'


In [52]:
sc.pp.normalize_total(adata, inplace=True)
sc.pp.log1p(adata)

In [53]:
df_ligrec=ct.pp.ligand_receptor_database(database='CellChat', species='human')
print(df_ligrec)

           0              1          2                   3
0      TGFB1  TGFBR1_TGFBR2       TGFb  Secreted Signaling
1      TGFB2  TGFBR1_TGFBR2       TGFb  Secreted Signaling
2      TGFB3  TGFBR1_TGFBR2       TGFb  Secreted Signaling
3      TGFB1  ACVR1B_TGFBR2       TGFb  Secreted Signaling
4      TGFB1  ACVR1C_TGFBR2       TGFb  Secreted Signaling
...      ...            ...        ...                 ...
1194   UTS2B          UTS2R  UROTENSIN  Secreted Signaling
1195   UTS2B          SSTR5  UROTENSIN  Secreted Signaling
1196    BAG6           NCR3        BAG  Secreted Signaling
1197  LGALS9         HAVCR2   GALECTIN  Secreted Signaling
1198  LGALS9           CD44   GALECTIN  Secreted Signaling

[1199 rows x 4 columns]


In [54]:
ct.tl.spatial_communication(adata, database_name='user_database', df_ligrec=df_ligrec, dis_thr=200, heteromeric=True)

In [55]:
print(adata)
adata.write_h5ad("./COMMOT/NSCLC.h5ad")

AnnData object with n_obs × n_vars = 89948 × 960
    obs: 'centerx', 'centery'
    uns: 'log1p', 'commot-user_database-info'
    obsm: 'spatial', 'commot-user_database-sum-sender', 'commot-user_database-sum-receiver'
    obsp: 'commot-user_database-MIF-ACKR3', 'commot-user_database-MIF-CD74_CXCR4', 'commot-user_database-MIF-CD74_CD44', 'commot-user_database-MIF-CD74_CXCR2', 'commot-user_database-FGF18-FGFR2', 'commot-user_database-FGF18-FGFR1', 'commot-user_database-FGF18-FGFR3', 'commot-user_database-FASLG-FAS', 'commot-user_database-CXCL10-ACKR1', 'commot-user_database-CXCL10-CXCR3', 'commot-user_database-ANGPT1-TEK', 'commot-user_database-CXCL12-ACKR3', 'commot-user_database-CXCL12-CXCR4', 'commot-user_database-PDGFA-PDGFRA', 'commot-user_database-PDGFA-PDGFRB', 'commot-user_database-WNT11-FZD1', 'commot-user_database-WNT11-FZD3', 'commot-user_database-WNT11-FZD7', 'commot-user_database-WNT11-FZD4', 'commot-user_database-WNT11-FZD8', 'commot-user_database-WNT11-FZD5', 'commot-user_d

# BC dataset

In [None]:
df=pd.read_csv("./data/BC/BC.csv")
df=df[df["section"]=="sample1_rep1"].copy()
print(df.columns)
genes=torch.load("./data/BC/genes.pth")
adata=ad.AnnData(X=df[genes].values)
adata.obs["centerx"]=df["centerx"].values
adata.obs["centery"]=df["centery"].values
adata.obsm["spatial"]=np.stack([df["centerx"].values,df["centery"].values],axis=-1)
adata.var_names=genes
print(adata)

In [None]:
sc.pp.normalize_total(adata, inplace=True)
sc.pp.log1p(adata)

In [None]:
df_ligrec=ct.pp.ligand_receptor_database(database='CellChat', species='human')
print(df_ligrec)

In [None]:
ct.tl.spatial_communication(adata, database_name='user_database', df_ligrec=df_ligrec, dis_thr=200, heteromeric=True)

In [None]:
print(adata)
adata.write_h5ad("./COMMOT/BC.h5ad")