In [112]:
out = "/home/mdonovan/pancreas_scRNA_map/Table/Final/"

# Table 1

Description of samples in study, %PDX1+/NKX6-1, and UUIDs for all sequencing data

In [113]:
table_count = 1

In [114]:
t1 = read.csv("../Table/Table S1_Clinical and Molecular Metadata_V03.csv")

In [115]:
t1 = t1[! t1$UDID == "PPC_069", ]

In [116]:
t1$Bulk_ATAC.seq_UUID <- NULL
t1$bulk_RNAseq_UUID   <- NULL

In [117]:
colnames(t1) <- c("Subject_UUID", "Sex", "Ethinicity", "iPSCORE_Family", "Cell_type", "UDID", "PDX1_pos", "PDX1.NKX61_pos", "WGS_UUID",
                  "scRNAseq_live_UUID", "scRNAseq_Cryporeserved_UUID", "scRNAseq_Cryporeserved_Pool_scheme", "snATACseq._UUID",
                  "snATACseq._Pool_scheme")

In [118]:
write.csv(t1, file = paste(out, "Table_", table_count, ".csv", sep = ""))

# Table 2
Differentially expressed genes from each cell type

In [120]:
table_count = 2

In [121]:
in.f = "/projects/PPC/analysis/ppc_pilot/data/aggr_live_then_integrate/"

iPSC_markers        = read.table(paste(in.f, "iPSC_markers.txt", sep = ""))
repl_markers        = read.table(paste(in.f, "repl_markers.txt", sep = ""))
ppc_markers         = read.table(paste(in.f, "all.ppc_markers.txt", sep = ""))
Endocrine_markers   = read.table(paste(in.f, "Endocrine_markers.txt", sep = ""))
mesenchymal_markers = read.table(paste(in.f, "mesenchymal_markers.txt", sep = ""))
endothelial_markers = read.table(paste(in.f, "endothelial_markers.txt", sep = ""))

In [122]:
AnnotateTableMarker <- function(df, celltype){
    
    df$marker_gene_name = rownames(df)
    rownames(df) = NULL
    df$cell_type <- celltype
    df = df[, c("cell_type", "marker_gene_name", "p_val", "avg_logFC", "p_val_adj")]
    
    return(df)
    
}

In [123]:
t2 = rbind(AnnotateTableMarker(iPSC_markers, "iPSC"),
           AnnotateTableMarker(repl_markers, "Replicating cells"),
           AnnotateTableMarker(ppc_markers, "PPCs (1, 2, 3, and 4)"),
           AnnotateTableMarker(Endocrine_markers, "Endocrine cells"),
           AnnotateTableMarker(mesenchymal_markers, "Mesenchyme cells"),
           AnnotateTableMarker(endothelial_markers, "Endothelial cells")
     )

In [124]:
write.csv(t2, file = paste(out, "Table_", table_count, ".csv", sep = ""))

# Table 3
GO enrichment of marker genes from each cell type

In [125]:
table_count = 3

In [126]:
in.f = "/projects/PPC/analysis/ppc_pilot/data/aggr_live_then_integrate/"

gowall_ipsc        = read.table(paste(in.f, "gowall_ipsc.txt", sep = ""))
gowall_repl        = read.table(paste(in.f, "gowall_repl.txt", sep = ""))
gowall_mesenchyme  = read.table(paste(in.f, "gowall_mesenchyme.txt", sep = ""))
gowall_endocrine   = read.table(paste(in.f, "gowall_endocrine.txt", sep = ""))
gowall_endothelial = read.table(paste(in.f, "gowall_endothelial.txt", sep = ""))
gowall_ppc         = read.table(paste(in.f, "gowall_ppc.txt", sep = ""))

In [127]:
AnnotateTableGO <- function(df, celltype){
    
    rownames(df) = NULL
    df$cell_type <- celltype
    df = df[, c("cell_type", "category", "over_represented_pvalue", "under_represented_pvalue", "numDEInCat", "numInCat", "term", "ontology", "pvalue", "bonferroni")]
    return(df)
    
}

In [128]:
t3 = rbind(AnnotateTableGO(gowall_ipsc, "iPSC"),
           AnnotateTableGO(gowall_repl, "Replicating cells"),
           AnnotateTableGO(gowall_ppc, "PPCs (1, 2, 3, and 4)"),
           AnnotateTableGO(gowall_endocrine, "Endocrine cells"),
           AnnotateTableGO(gowall_mesenchyme, "Mesenchyme cells"),
           AnnotateTableGO(gowall_endothelial, "Endothelial cells")
     )

In [129]:
write.csv(t3, file = paste(out, "Table_", table_count, ".csv", sep = ""))

# Table 4

PPC genes that linearly change over time

In [130]:
table_count = 4

In [131]:
in.f = "/projects/PPC/analysis/ppc_pilot/data/aggr_live_then_integrate/"
lm_fit_coefs = read.table(paste(in.f, "ppc_fit_coefs_frazer_ppc_allexpressedgenes.txt", sep = ""))

In [132]:
lm_fit_coefs$Type = NA
lm_fit_coefs[lm_fit_coefs$q_value < 0.05 & lm_fit_coefs$estimate > 0, ]$Type = "Type_1"
lm_fit_coefs[lm_fit_coefs$q_value < 0.05 & lm_fit_coefs$estimate < 0, ]$Type = "Type_2"

In [133]:
lm_fit_coefs = lm_fit_coefs[order(lm_fit_coefs$q_value), ]

In [134]:
rownames(lm_fit_coefs) <- NULL

In [135]:
write.csv(lm_fit_coefs, file = paste(out, "Table_", table_count, ".csv", sep = ""))

# Table 5
PPC genes that change non-linearly over time (MFuzz membership matrix)

In [136]:
table_count = 5

In [137]:
in.f = "/projects/PPC/analysis/ppc_pilot/data/aggr_live_then_integrate/"
mfuzz = read.table(paste(in.f, "mfuzz_k10_b4_memberships.txt", sep = ""))

In [138]:
mfuzz$Type = NA
mfuzz[mfuzz$maxCol == 2 & mfuzz$real == TRUE, ]$Type = "Type_3"
mfuzz[mfuzz$maxCol == 6 & mfuzz$real == TRUE, ]$Type = "Type_4"
mfuzz = mfuzz[order(mfuzz$Type), ]
mfuzz$maxCol <- NULL
mfuzz$real <- NULL

In [139]:
colnames(mfuzz) = c("Cluster_1", "Cluster_2", "Cluster_3", "Cluster_4", "Cluster_5", 
                    "Cluster_6", "Cluster_7", "Cluster_8", "Cluster_9", "Cluster_10", "Type")

In [140]:
write.csv(mfuzz, file = paste(out, "Table_", table_count, ".csv", sep = ""))

# Table 6

In [141]:
table_count = 6

In [142]:
in.f = "/projects/PPC/analysis/ppc_pilot/data/aggr_live_then_integrate/"
go_up   = read.table(paste(in.f, "gowall_ppc_up.txt", sep = ""))
go_down = read.table(paste(in.f, "gowall_ppc_down.txt", sep = ""))
go_mid.up = read.table(paste(in.f, "gowall_ppc_mid.up.txt", sep = ""))
go_mid.down = read.table(paste(in.f, "gowall_ppc_mid.down.txt", sep = ""))

In [143]:
AnnotateTableGO2 <- function(df, celltype){
    
    rownames(df) = NULL
    df$Type_gene_expression_change <- celltype
    df = df[, c("Type_gene_expression_change", "category", "over_represented_pvalue", "under_represented_pvalue", "numDEInCat", "numInCat", "term", "ontology", "pvalue", "bonferroni")]
    return(df)
    
}

In [144]:
t6 = rbind(AnnotateTableGO2(go_up,       "Type_1"),
           AnnotateTableGO2(go_down,     "Type_2"),
           AnnotateTableGO2(go_mid.up,   "Type_3"),
           AnnotateTableGO2(go_mid.down, "Type_4")
     )

In [145]:
write.csv(t6, file = paste(out, "Table_", table_count, ".csv", sep = ""))

# Table 7

Demuxlet results for batch 1 cryopreserved scRNA-seq

In [146]:
table_count = 7

In [147]:
t2 = read.csv("../Table/Table_S2_Demuxlet_Results_V04.csv")

In [148]:
t2 = t2[! t2$Assay == "snATAC_seq", ]

In [149]:
t2$Concordance_rate_singlets <- NULL
t2$Concordance_rate_doublets <- NULL

In [150]:
t2$Assay <- NULL

In [151]:
t2$UDID <- t2$Subject
t2$Subject <- NULL

In [152]:
t2 = t2[, c(3, 1, 2)]

In [153]:
write.csv(t2, file = paste(out, "Table_", table_count, ".csv", sep = ""))

# Table 8

Demuxlet results for batch 2 cryopreserved scRNA-seq

In [154]:
table_count = 8

In [155]:
t3 = read.csv("../Table/Table_S3_Scrublet_Results_V01.csv")

In [156]:
colnames(t3) <- c("Assay", "Assay_UUID", "UDID", "Batch", "Cells","Features", "Threshold", "Detected_doublet_rate",
                 "Estimated_doublet_rate", "Expected_doublet_rate", "Estimated_doublet_rate.1")

In [157]:
t3$Expected_doublet_rate <- NULL
t3$Estimated_doublet_rate.1 <- NULL

In [158]:
t3 = t3[! t3$Assay == "snATAC_seq", ]

In [159]:
t3$Assay <- NULL
t3$Cells <- NULL
t3$Features <- NULL

In [160]:
t3$Estimated_doublet_rate <- NULL

In [161]:
write.csv(t3, file = paste(out, "Table_", table_count, ".csv", sep = ""))