# Notebook Summary

This notebook is intended to show the workflow for using the Cell2Sentence (C2S) code base to:
1. Load a pretrained C2S model
2. Generate new cells conditioned on cell type

In [1]:
# Python built-in libraries
import os

# Local imports
import cell2sentence as cs
from cell2sentence.csmodel import CSModel

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Define paths to pretrained C2S model, as well as the save directory and save name of the model (which will be resaved to disk)
cell_type_cond_generation_model_path = "/home/sr2464/palmer_scratch/C2S_Files_Syed/multicell_pretraining_v2_important_models/pythia-410m-multicell_v2_2024-07-28_14-10-44_checkpoint-7000_cell_type_cond_generation"
save_dir = "/home/sr2464/palmer_scratch/C2S_Files_Syed/c2s_api_testing/csmodel_testing"
save_name = "cell_type_cond_generation_pythia_410M_1"

In [3]:
# Define CSModel object
csmodel = CSModel(
    model_path_or_path=cell_type_cond_generation_model_path,
    save_dir=save_dir,
    save_name=save_name
)

Using device: cuda


In [4]:
print(csmodel)

CSModel Object; Path=/home/sr2464/palmer_scratch/C2S_Files_Syed/c2s_api_testing/csmodel_testing/cell_type_cond_generation_pythia_410M_1


In [5]:
# Define a list of cell types which we want to prompt the C2S model to generate
cell_types_list = ["neuron", "IgA plasma cell", "CD8-positive, alpha-beta T cell"]

In [7]:
# Generate cells using C2S model
generated_cell_sentences = csmodel.generate_cells_conditioned_on_cell_type(
    cell_types_list=cell_types_list,
    n_genes=200,
    organism="Homo sapiens"
)

Reloading model from path on disk: /home/sr2464/palmer_scratch/C2S_Files_Syed/c2s_api_testing/csmodel_testing/cell_type_cond_generation_pythia_410M_1
Generating 3 cells using CSModel...


100%|██████████| 3/3 [00:54<00:00, 18.10s/it]


In [8]:
print(len(generated_cell_sentences))

3


In [9]:
idx = 0
print("Generated {} cell".format(cell_types_list[idx]))
print(generated_cell_sentences[idx])

Generated neuron cell
MALAT1 KCNIP4 ROBO2 NRG1 SNHG14 LSAMP FAM155A MEG3 LRP1B DPP10 GPC5 NRXN3 NRG3 KCND2 LRRTM4 GRIK1 PTPRD ROBO1 GRIK2 NRXN1 KCNQ5 LINGO2 FRMD4A NLGN1 SGCZ DSCAM ZFPM2 ANK2 DCC ADGRL3 RIMS2 CTNNA2 MAGI2 MEG8 PDE4D ZNF804B RALYL OPCML CNTNAP5 CNTNAP2 CDH18 CNTN5 PCDH15 ADGRB3 CSMD3 RBFOX1 IL1RAPL1 FGF14 MACROD2 CTNND2 ANKS1B GALNTL6 RGS6 FGF12 KCNQ3 DST CADM2 LRFN5 NCOA1 LRRC7 PPFIA2 CADPS ERBB4 RORA ADARB2 AHI1 GRIA4 TNRC6A DACH1 SLC35F1 CACNB2 UNC5C RIMS1 KAZN TMEM108 ZNF804A SMYD3 PCDH7 ADGRL2 DOCK3 SORCS1 SYT1 PDE4B SLC8A1 MAST4 RYR2 DGKB NEGR1 FTX GPHN ANK3 GRID2 TAFA1 UNC13C ATRNL1 UNC5D DPP6 GPC6 ATP8A2 TAFA2 RGS7 MEIS2 AIG1 RP11-384F7-1 DACH2 RP11-436D23 UNC79 RP11-563M4-1 PCDH9 AGBL4 PTPRM DNM3 DOCK4 NAV3 PPP2R2B LINC00632 CCSER1 TANC2 DCC TMEM178B ANO4 GPR158 PTPRN2 SNAP25 GRM8 MIR99AHG CACNA1C LHFPL3 CNTN1 SLC35F4 KCNH7 ATP1B1 MAPK10 NBEA ERC2 PAM PPP3CA EPHA6 MYT1L DNM1 MCTP1 STXBP5L NOVA1 ERC1 R3HDM1 ARID1B MLLT3 NTM UBE2E2 CADM1 RP11-307P5-1 PLXDC2 UNC80

In [10]:
idx = 1
print("Generated {} cell".format(cell_types_list[idx]))
print(generated_cell_sentences[idx])

Generated IgA plasma cell cell
JCHAIN MALAT1 B2M JUN HSP90AA1 HSPA1A MT-CO1 MT-CO2 RPS4X RPL41 MT-ND4L FOS HSPA6 HSPB1 FTL RPS19 MT-ND1 DNAJB1 MT-CO3 MT-ND2 RPLP1 RPL3 DNAJB9 RPL13 MZB1 RPL10 BTG2 RPL34 TSC22D3 EEF1A1 TPT1 RPS18 RPL30 RPS27 RPS24 RPL15 RPL7A RPS15A RPS3A HSP90B1 RPL35A IER2 MT-CYB CD79A RPS12 RPL32 RPL14 UBC RPL18A RPL36AL RPL11 MT-ND3 RPL6 RPS8 PPP1R15A SSR4 RPL18 CD69 MT-ATP8 RPL8 RPS15 ACTB HSPA1B RPL12 RPS7 RPL29 RPL36 RPS14 RPS6 HLA-B RPS25 RPS2 FAU HLA-C EEF2 EIF1 RPS28 RPL5 FTH1 EIF4A1 UBB UBA52 RPS13 JUNB DNAJA1 RPL39 TMSB4X RPS11 RPL26 DNAAF1 RPL7 RPS3 BTG1 KLF6 IER3 RPL28 RPL35 RPL10A HSP90AB1 RPS9 RPS27A KLF2 RPL22 SSR3 FOSB RPS26 RPL19 RPL37 CYBA JUND TMSB10 RPS23 OAZ1 PSMB1 RPS21 RPS29 RPL37A RPSA SEC61G SRGN PFDN5 ARF1 RPS5 SERP1 PTMA RPL24 COX4I1 PPIA RPS16 CYCS EEF1D RACK1 HLA-E MYL6 DUSP1 HSPA8 TMA7 RPL9 RPL27A RPL21 SELENOK MT-ATP6 HLA-A TMEM59 RPL36A RPL13A DNAJC3 EAF2 EAF1 RPS20 EIF3F ZFP36 ZFAS1 RPLP0 RPL31 PABPC1 RPL4 RPLP2 RPL27 MT-ND4 IFRD1 UBE2