In [24]:
out = "/home/mdonovan/pancreas_scRNA_map/Table/Final/"

# Table 1

Description of samples in study, %PDX1+/NKX6-1, and UUIDs for all sequencing data

In [1]:
table_count = 1

In [2]:
t1 = read.csv("../Table/Table S1_Clinical and Molecular Metadata_V03.csv")

In [3]:
t1 = t1[! t1$UDID == "PPC_069", ]

In [4]:
t1$Bulk_ATAC.seq_UUID <- NULL
t1$bulk_RNAseq_UUID   <- NULL

In [5]:
colnames(t1) <- c("Subject_UUID", "Sex", "Ethinicity", "iPSCORE_Family", "Cell_type", "UDID", "PDX1_pos", "PDX1.NKX61_pos", "WGS_UUID",
                  "scRNAseq_live_UUID", "scRNAseq_Cryporeserved_UUID", "scRNAseq_Cryporeserved_Pool_scheme", "snATACseq._UUID",
                  "snATACseq._Pool_scheme")

In [27]:
write.csv(t1, file = paste(out, "Table_", table_count, ".csv", sep = ""))

In [58]:
table_count = table_count + 1

# Table 2
Differentially expressed genes from each cell type

In [19]:
table_count = 2

In [7]:
out = "/projects/PPC/analysis/ppc_pilot/data/aggr_live_then_integrate/"

iPSC_markers        = read.table(paste(out, "iPSC_markers.txt", sep = ""))
repl_markers        = read.table(paste(out, "repl_markers.txt", sep = ""))
ppc_markers         = read.table(paste(out, "all.ppc_markers.txt", sep = ""))
Endocrine_markers   = read.table(paste(out, "Endocrine_markers.txt", sep = ""))
mesenchymal_markers = read.table(paste(out, "mesenchymal_markers.txt", sep = ""))
endothelial_markers = read.table(paste(out, "endothelial_markers.txt", sep = ""))

In [25]:
AnnotateTableMarker <- function(df, celltype){
    
    df$marker_gene_name = rownames(df)
    rownames(df) = NULL
    df$cell_type <- celltype
    df = df[, c("cell_type", "marker_gene_name", "p_val", "avg_logFC", "p_val_adj")]
    
    return(df)
    
}

In [27]:
t2 = rbind(AnnotateTableMarker(iPSC_markers, "iPSC"),
           AnnotateTableMarker(repl_markers, "Replicating cells"),
           AnnotateTableMarker(ppc_markers, "PPCs (1, 2, 3, and 4)"),
           AnnotateTableMarker(Endocrine_markers, "Endocrine cells"),
           AnnotateTableMarker(mesenchymal_markers, "Mesenchyme cells"),
           AnnotateTableMarker(endothelial_markers, "Endothelial cells")
     )

In [None]:
write.csv(t2, file = paste(out, "Table_", table_count, ".csv", sep = ""))

# Table 3
GO enrichment of marker genes from each cell type

In [None]:
table_count = 3

In [21]:
gowall_ipsc        = read.table(paste(out, "gowall_ipsc.txt", sep = ""))
gowall_repl        = read.table(paste(out, "gowall_repl.txt", sep = ""))
gowall_mesenchyme  = read.table(paste(out, "gowall_mesenchyme.txt", sep = ""))
gowall_endocrine   = read.table(paste(out, "gowall_endocrine.txt", sep = ""))
gowall_endothelial = read.table(paste(out, "gowall_endothelial.txt", sep = ""))
gowall_ppc         = read.table(paste(out, "gowall_ppc.txt", sep = ""))

In [28]:
AnnotateTableGO <- function(df, celltype){
    

    rownames(df) = NULL
    df$cell_type <- celltype
    df = df[, c("cell_type", "category", "over_represented_pvalue", "under_represented_pvalue", "numDEInCat", "numInCat", "term", "ontology", "pvalue", "bonferroni")]
    return(df)
    
}

In [34]:
t3 = rbind(AnnotateTableGO(gowall_ipsc, "iPSC"),
           AnnotateTableGO(gowall_repl, "Replicating cells"),
           AnnotateTableGO(gowall_ppc, "PPCs (1, 2, 3, and 4)"),
           AnnotateTableGO(gowall_endocrine, "Endocrine cells"),
           AnnotateTableGO(gowall_mesenchyme, "Mesenchyme cells"),
           AnnotateTableGO(gowall_endothelial, "Endothelial cells")
     )

In [None]:
write.csv(t3, file = paste(out, "Table_", table_count, ".csv", sep = ""))

# Table 4

PPC genes that linearly change over time

In [None]:
table_count = 4

In [35]:
out = "/projects/PPC/analysis/ppc_pilot/data/aggr_live_then_integrate/"
lm_fit_coefs = read.table(paste(out, "ppc_fit_coefs_frazer_ppc_allexpressedgenes.txt", sep = ""))

In [37]:
lm_fit_coefs$Type = NA
lm_fit_coefs[lm_fit_coefs$q_value < 0.05 & lm_fit_coefs$estimate > 0, ]$Type = "Type_1"
lm_fit_coefs[lm_fit_coefs$q_value < 0.05 & lm_fit_coefs$estimate < 0, ]$Type = "Type_2"

In [40]:
lm_fit_coefs = lm_fit_coefs[order(lm_fit_coefs$q_value), ]

In [None]:
write.csv(lm_fit_coefs, file = paste(out, "Table_", table_count, ".csv", sep = ""))

# Table 5
PPC genes that change non-linearly over time (MFuzz membership matrix)

In [None]:
table_count = 5

In [52]:
out = "/projects/PPC/analysis/ppc_pilot/data/aggr_live_then_integrate/"
mfuzz = read.table(paste(out, "mfuzz_k10_b4_memberships.txt", sep = ""))

In [55]:
mfuzz$Type = NA
mfuzz[mfuzz$maxCol == 2 & mfuzz$real == TRUE, ]$Type = "Type_3"
mfuzz[mfuzz$maxCol == 6 & mfuzz$real == TRUE, ]$Type = "Type_4"
mfuzz = mfuzz[order(mfuzz$Type), ]
mfuzz$maxCol <- NULL
mfuzz$real <- NULL

In [59]:
colnames(mfuzz) = c("Cluster_1", "Cluster_2", "Cluster_3", "Cluster_4", "Cluster_5", 
                    "Cluster_6", "Cluster_7", "Cluster_8", "Cluster_9", "Cluster_10", "Type")

In [None]:
write.csv(mfuzz, file = paste(out, "Table_", table_count, ".csv", sep = ""))

# Table 2

Demuxlet results for batch 1 cryopreserved scRNA-seq

In [122]:
table_count = 2

In [115]:
t2 = read.csv("../Table/Table_S2_Demuxlet_Results_V04.csv")

In [116]:
t2 = t2[! t2$Assay == "snATAC_seq", ]

In [117]:
t2$Concordance_rate_singlets <- NULL
t2$Concordance_rate_doublets <- NULL

In [118]:
t2$Assay <- NULL

In [119]:
t2$UDID <- t2$Subject
t2$Subject <- NULL

In [120]:
t2 = t2[, c(3, 1, 2)]

In [123]:
write.csv(t2, file = paste(out, "Table_", table_count, ".csv", sep = ""))

# Table 3

Demuxlet results for batch 2 cryopreserved scRNA-seq

In [106]:
table_count = 3

In [107]:
t3 = read.csv("../Table/Table_S3_Scrublet_Results_V01.csv")

In [108]:
colnames(t3) <- c("Assay", "Assay_UUID", "UDID", "Batch", "Cells","Features", "Threshold", "Detected_doublet_rate",
                 "Estimated_doublet_rate", "Expected_doublet_rate", "Estimated_doublet_rate.1")

In [109]:
t3$Expected_doublet_rate <- NULL
t3$Estimated_doublet_rate.1 <- NULL

In [110]:
t3 = t3[! t3$Assay == "snATAC_seq", ]

In [111]:
t3$Assay <- NULL
t3$Cells <- NULL
t3$Features <- NULL

In [112]:
t3$Estimated_doublet_rate <- NULL

In [113]:
write.csv(t3, file = paste(out, "Table_", table_count, ".csv", sep = ""))