# Setup

## Create/Update envs

```bash
source TissDiss/tissdiss_env.sh
```
## Activate renv

```bash
conda activate "/home/jupyter/TissDiss/tissdiss_r_env"
```

In [3]:
hd <- "/home/jupyter"
wd <- file.path(hd, "TissDiss/EXP-01244")

setwd(wd)

suppressMessages(source(file.path(hd, "TissDiss/tissdiss_r_util.r")))

# Ingest cell/gene matrix into Seurat

In [4]:
process_h5 <- function(cr_out) {
    files <- list.files(
        path = file.path(cr_out),
        pattern = "sample_filtered_feature_bc_matrix.h5$",
        recursive = TRUE,
        full.names = TRUE
    )

    so_ls <- lapply(seq_along(files), function(i) {
        name <- unlist(strsplit(files[[i]], '/'))[[9]]
        mtx <- Read10X_h5(files[[i]])
        so <- CreateSeuratObject(counts = mtx, assay = "RNA", min.cells = 1)
        so@meta.data[['replicate']] <- name
        so@meta.data[['orig.ident']] <- unlist(strsplit(name, '_'))[1]
        return(so)
    })

    so <- merge(so_ls[[1]], y = so_ls[2:length(so_ls)])
    so <- JoinLayers(so)

    print(str(so))

    so[["percent.mt"]] <- PercentageFeatureSet(so, pattern = "^MT-")
    so[["percent.ig"]] <- PercentageFeatureSet(so, pattern = "^IG-")
    
    so <- SCTransform(so, assay = "RNA", verbose = FALSE)
    so <- RunPCA(so, verbose = FALSE)
    so <- FindNeighbors(so, dims = 1:20, verbose = FALSE)
    so <- FindClusters(so, resolution = 1.0)
    so <- RunUMAP(so, dims = 1:20)

    return(so)
}

In [None]:
cr_out <- file.path(hd, "TissDiss/EXP-01244/EXP-01244_cr_outs")

so <- process_h5(cr_out)

Formal class 'Seurat' [package "SeuratObject"] with 13 slots
  ..@ assays      :List of 1
  .. ..$ RNA:Formal class 'Assay5' [package "SeuratObject"] with 8 slots
  .. .. .. ..@ layers    :List of 1
  .. .. .. .. ..$ counts:Formal class 'dgCMatrix' [package "Matrix"] with 6 slots
  .. .. .. .. .. .. ..@ i       : int [1:131361199] 11 50 55 60 84 108 119 122 147 153 ...
  .. .. .. .. .. .. ..@ p       : int [1:49436] 0 831 1730 2202 2705 2997 3141 3711 4698 6092 ...
  .. .. .. .. .. .. ..@ Dim     : int [1:2] 17595 49435
  .. .. .. .. .. .. ..@ Dimnames:List of 2
  .. .. .. .. .. .. .. ..$ : NULL
  .. .. .. .. .. .. .. ..$ : NULL
  .. .. .. .. .. .. ..@ x       : num [1:131361199] 1 1 2 1 1 1 2 1 1 1 ...
  .. .. .. .. .. .. ..@ factors : list()
  .. .. .. ..@ cells     :Formal class 'LogMap' [package "SeuratObject"] with 1 slot
  .. .. .. .. .. ..@ .Data: logi [1:49435, 1] TRUE TRUE TRUE TRUE TRUE TRUE ...
  .. .. .. .. .. .. ..- attr(*, "dimnames")=List of 2
  .. .. .. .. .. .. .. ..$ 

`vst.flavor` is set to 'v2' but could not find glmGamPoi installed.
Please install the glmGamPoi package for much faster estimation.
--------------------------------------------
install.packages('BiocManager')
BiocManager::install('glmGamPoi')
--------------------------------------------
Falling back to native (slower) implementation.




In [None]:
str(so)

In [None]:
cr_out <- file.path(hd, "TissDiss/EXP-01244/EXP-01244_cr_outs")

files <- list.files(
    path = file.path(cr_out),
    pattern = "sample_filtered_feature_bc_matrix.h5$",
    recursive = TRUE,
    full.names = TRUE
)

name <- unlist(strsplit(files[[1]], '/'))[[9]]
mtx <- Read10X_h5(files[[1]])
so <- CreateSeuratObject(counts = mtx, assay = "RNA", min.cells = 1)
so@meta.data[['sample_replicate']] <- name
so@meta.data[['sample']] <- unlist(strsplit(name, '_'))[1]

str(so)

# so_ls <- lapply(seq_along(files[0:1]), function(i) {
#     name <- unlist(strsplit(files[[i]], '/'))[[10]]
#     mtx <- Read10X_h5(files[[i]])
#     so <- CreateSeuratObject(counts = mtx, assay = "RNA", min.cells = 1)
#     so@meta.data[['sample_replicate']] <- name
#     so@meta.data[['sample']] <- unlist(strsplit(name, '_'))[1]
# })