In [1]:
]activate .

[32m[1m  Activating[22m[39m project at `~/Julia`


In [2]:
using CellScopes
using DataFrames
using CSV
using Plots
using GLM
using StatsPlots
using UMAP
using Statistics
using LinearAlgebra
using SparseArrays

Welcome to use CellScopes.jl!


In [3]:
import CellScopes as cs

In [4]:
counts_h5 = cs.read_10x_h5("/Users/jingyi/Julia/h5/A2780.h5")

CellScopes.RawCountObject


Genes x Cells = 33538 x 3870
All fields:
- count_mtx
- cell_name
- gene_name


In [5]:
counts_qc = cs.read_10x("A2780_QC")

CellScopes.RawCountObject


Genes x Cells = 17832 x 3449
All fields:
- count_mtx
- cell_name
- gene_name


## Calcaulate fraction of MT- genes

In [6]:
mt_gene_indices = findall(gname -> startswith(gname, "MT-"), counts_qc.gene_name)

13-element Vector{Int64}:
 17810
 17811
 17812
 17813
 17814
 17815
 17816
 17817
 17818
 17819
 17820
 17821
 17822

In [7]:
mt_counts = sum(counts_qc.count_mtx[mt_gene_indices, :], dims=1);
total_counts = sum(counts_qc.count_mtx, dims=1);
fraction_mt = mt_counts ./ total_counts

1×3449 Matrix{Float64}:
 0.0511259  0.0312758  0.0182593  …  0.0800816  0.0964081  0.035503

## Construct the scRNAobj

In [8]:
A2780 = cs.scRNAObject(counts_qc)

scRNAObject in CellScopes.jl


Genes x Cells = 17832 x 3449
Available data:
- Raw count
- Metadata
All fields:
- rawCount
- normCount
- scaleCount
- metaData
- varGene
- dimReduction
- clustData
- undefinedData


In [9]:
A2780.metaData[!, :percent_mito] = vec(fraction_mt);
A2780.metaData

Row,Cell_id,nFeatures,nGenes,percent_mito
Unnamed: 0_level_1,String,Int64,Int64,Float64
1,A2780S_CT_S_AAACCTGCAGGGTATG,19540,3903,0.0511259
2,A2780S_CT_S_AAACCTGGTAGCGTCC,19408,3499,0.0312758
3,A2780S_CT_S_AAACCTGGTATAGGGC,1643,859,0.0182593
4,A2780S_CT_S_AAACCTGGTGAAGGCT,970,661,0.0515464
5,A2780S_CT_S_AAACCTGGTGCCTTGG,580,404,0.0396552
6,A2780S_CT_S_AAACCTGTCAGTTTGG,1162,680,0.0301205
7,A2780S_CT_S_AAACCTGTCATGCAAC,1408,810,0.024858
8,A2780S_CT_S_AAACGGGCAAGGTGTG,14693,3462,0.0811271
9,A2780S_CT_S_AAACGGGCACATCTTT,6551,2232,0.0619753
10,A2780S_CT_S_AAACGGGCAGGCGATA,2156,864,0.405844


In [10]:
A2780.metaData.Identity = ifelse.(startswith.(A2780.metaData.Cell_id, "A2780S_CT_R"), "A2780S_CT_R", "A2780S_CT_S")

3449-element Vector{String}:
 "A2780S_CT_S"
 "A2780S_CT_S"
 "A2780S_CT_S"
 "A2780S_CT_S"
 "A2780S_CT_S"
 "A2780S_CT_S"
 "A2780S_CT_S"
 "A2780S_CT_S"
 "A2780S_CT_S"
 "A2780S_CT_S"
 "A2780S_CT_S"
 "A2780S_CT_S"
 "A2780S_CT_S"
 ⋮
 "A2780S_CT_R"
 "A2780S_CT_R"
 "A2780S_CT_R"
 "A2780S_CT_R"
 "A2780S_CT_R"
 "A2780S_CT_R"
 "A2780S_CT_R"
 "A2780S_CT_R"
 "A2780S_CT_R"
 "A2780S_CT_R"
 "A2780S_CT_R"
 "A2780S_CT_R"

In [11]:
A2780.metaData

Row,Cell_id,nFeatures,nGenes,percent_mito,Identity
Unnamed: 0_level_1,String,Int64,Int64,Float64,String
1,A2780S_CT_S_AAACCTGCAGGGTATG,19540,3903,0.0511259,A2780S_CT_S
2,A2780S_CT_S_AAACCTGGTAGCGTCC,19408,3499,0.0312758,A2780S_CT_S
3,A2780S_CT_S_AAACCTGGTATAGGGC,1643,859,0.0182593,A2780S_CT_S
4,A2780S_CT_S_AAACCTGGTGAAGGCT,970,661,0.0515464,A2780S_CT_S
5,A2780S_CT_S_AAACCTGGTGCCTTGG,580,404,0.0396552,A2780S_CT_S
6,A2780S_CT_S_AAACCTGTCAGTTTGG,1162,680,0.0301205,A2780S_CT_S
7,A2780S_CT_S_AAACCTGTCATGCAAC,1408,810,0.024858,A2780S_CT_S
8,A2780S_CT_S_AAACGGGCAAGGTGTG,14693,3462,0.0811271,A2780S_CT_S
9,A2780S_CT_S_AAACGGGCACATCTTT,6551,2232,0.0619753,A2780S_CT_S
10,A2780S_CT_S_AAACGGGCAGGCGATA,2156,864,0.405844,A2780S_CT_S


## Normalization

In [12]:
A2780_normalized = cs.normalize_object(A2780; scale_factor = 10000)

Internal error: stack overflow in type inference of _is_fixed(SparseArrays.SparseVector{Float64, Int64}, SparseArrays.SparseVector{Float64, Int64}, SparseArrays.SparseVector{Float64, Int64}, SparseArrays.SparseVector{Float64, Int64}, SparseArrays.SparseVector{Float64, Int64}, SparseArrays.SparseVector{Float64, Int64}, SparseArrays.SparseVector{Float64, Int64}, SparseArrays.SparseVector{Float64, Int64}, SparseArrays.SparseVector{Float64, Int64}, SparseArrays.SparseVector{Float64, Int64}, SparseArrays.SparseVector{Float64, Int64}, SparseArrays.SparseVector{Float64, Int64}, SparseArrays.SparseVector{Float64, Int64}, SparseArrays.SparseVector{Float64, Int64}, SparseArrays.SparseVector{Float64, Int64}, SparseArrays.SparseVector{Float64, Int64}, SparseArrays.SparseVector{Float64, Int64}, SparseArrays.SparseVector{Float64, Int64}, SparseArrays.SparseVector{Float64, Int64}, SparseArrays.SparseVector{Float64, Int64}, SparseArrays.SparseVector{Float64, Int64}, SparseArrays.SparseVector{Float64, 

scRNAObject in CellScopes.jl


Genes x Cells = 17832 x 3449
Available data:
- Raw count
- Normalized count
- Metadata
All fields:
- rawCount
- normCount
- scaleCount
- metaData
- varGene
- dimReduction
- clustData
- undefinedData


In [13]:
A2780_normalized

scRNAObject in CellScopes.jl


Genes x Cells = 17832 x 3449
Available data:
- Raw count
- Normalized count
- Metadata
All fields:
- rawCount
- normCount
- scaleCount
- metaData
- varGene
- dimReduction
- clustData
- undefinedData


In [14]:
A2780_normalized.normCount

CellScopes.NormCountObject


Genes x Cells = 17832 x 3449
All fields:
- count_mtx
- cell_name
- gene_name
- scale_factor
- norm_method
- pseudocount


In [15]:
A2780_normalized_featured = cs.find_variable_genes(A2780_normalized)

scRNAObject in CellScopes.jl


Genes x Cells = 17832 x 3449
Available data:
- Raw count
- Normalized count
- Metadata
- Variable genes
All fields:
- rawCount
- normCount
- scaleCount
- metaData
- varGene
- dimReduction
- clustData
- undefinedData


In [16]:
A2780_normalized_featured.varGene

CellScopes.VariableGeneObject


All fields:
- var_gene
- vst_data


In [17]:
A2780_normalized_featured.varGene.vst_data

Row,mean,variance,variance_expected,variance_standardized,gene
Unnamed: 0_level_1,Float64,Float64,Float64,Float64,String
1,0.434619,61.7296,0.742146,83.1771,TMSB4X
2,0.147289,9.89825,0.186649,53.0313,NTS
3,0.014497,0.696426,0.0160891,43.2857,ANXA1
4,0.0661061,3.13832,0.076376,41.0904,CALCB
5,0.0104378,0.355459,0.0116581,30.4903,H3.Y
6,0.0840823,2.47553,0.0989558,25.0165,GCG
7,0.0258046,0.707861,0.028591,24.7581,HIST1H2BH
8,0.00956799,0.229317,0.0106907,21.4502,OTOR
9,0.0171064,0.314382,0.018934,16.6041,KRT8
10,0.0121774,0.215629,0.0135518,15.9115,KRT18


### filter featured genes

In [18]:
A2780_normalized_featured.varGene.vst_data = filter(row -> 
                    0.0125 <= row.mean <= 3.0 && 
                    row.variance_standardized >= 0.5, A2780_normalized_featured.varGene.vst_data)

Row,mean,variance,variance_expected,variance_standardized,gene
Unnamed: 0_level_1,Float64,Float64,Float64,Float64,String
1,0.434619,61.7296,0.742146,83.1771,TMSB4X
2,0.147289,9.89825,0.186649,53.0313,NTS
3,0.014497,0.696426,0.0160891,43.2857,ANXA1
4,0.0661061,3.13832,0.076376,41.0904,CALCB
5,0.0840823,2.47553,0.0989558,25.0165,GCG
6,0.0258046,0.707861,0.028591,24.7581,HIST1H2BH
7,0.0171064,0.314382,0.018934,16.6041,KRT8
8,0.028414,0.446408,0.031551,14.1488,GNG11
9,0.028704,0.409559,0.0318816,12.8462,CALCA
10,0.0864019,1.21759,0.101939,11.9444,CTGF


###  Can't run following 

In [None]:
A2780_scaled = cs.scale_object(A2780_normalized_featured) 

In [None]:
A2780_pca = cs.run_pca(A2780_scaled;  method=:svd, pratio = 1, maxoutdim = 10)

In [None]:
A2780_cluster = cs.run_clustering(A2780_pca; res=0.06, n_neighbors=100)

In [None]:
A2780_umap = cs.run_umap(A2780_cluster; min_dist=0.4)

In [None]:
markers = cs.find_markers(A2780_umap; cluster_1 = "7", cluster_2 = "6")

In [None]:
all_markers = cs.find_all_markers(A2780_umap)