Notebook summary:
- First look at exercise data
- Examines possible values for each column
- Quality control: Check levels of RNA (including mitochondrial RNA) and genes across the 4 test groups to make sure they're relatively similar and aren't contaminated
- Calculates transcriptional noise
- Plots included in the notebook:
    Bar plot: used for quality control and p values of
    Volcano plot: plots log fold between two groups

In [1]:
library(ggplot2)
library(tidyverse)
library(lubridate)
library(Seurat)

── [1mAttaching core tidyverse packages[22m ──────────────────────── tidyverse 2.0.0 ──
[32m✔[39m [34mdplyr    [39m 1.1.4     [32m✔[39m [34mreadr    [39m 2.1.4
[32m✔[39m [34mforcats  [39m 1.0.0     [32m✔[39m [34mstringr  [39m 1.5.1
[32m✔[39m [34mlubridate[39m 1.9.3     [32m✔[39m [34mtibble   [39m 3.2.1
[32m✔[39m [34mpurrr    [39m 1.0.2     [32m✔[39m [34mtidyr    [39m 1.3.0
── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()
[36mℹ[39m Use the conflicted package ([3m[34m<http://conflicted.r-lib.org/>[39m[23m) to force all conflicts to become errors
Attaching SeuratObject

‘SeuratObject’ was built with package ‘Matrix’ 1.6.1.1 but the current
version is 1.6.4; it is recomended that you reinstall ‘SeuratObject’ as
the ABI for ‘Matrix’ may have changed



In [2]:
library(pheatmap)

In [3]:
filename <- "/gpfs/home/meyin/published_data/parabiosis/pb_combined.rds?download=1"
data <- readRDS(filename)

In [4]:
# Quality Control

In [5]:
metadata <- data@meta.data %>% filter(data@meta.data$Celltype != "Doublet")

In [6]:
metadata <- metadata %>%
  mutate(ID = sub("-.*", "", hash.ID))

In [7]:
data@meta.data <- data@meta.data %>%
  mutate(ID = sub("-.*", "", hash.ID))

In [8]:
metadata <- metadata %>%
  mutate(AgeCond = paste0(substr(Age, 1, 1), "_", Type))

In [9]:
data@meta.data <- data@meta.data %>%
  mutate(AgeCond = paste0(substr(Age, 1, 1), "_", Type))

In [10]:
# Split off Old Control data
ocontroldata <- metadata %>% filter(AgeCond=="O_Iso")
head(ocontroldata)

Unnamed: 0_level_0,orig.ident,nCount_RNA,nFeature_RNA,percent.mt,RNA_snn_res.0.15,seurat_clusters,RNA_snn_res.0.25,RNA_snn_res.0.5,LMO_maxID,LMO_secondID,⋯,nGene,nUMI,percent.mito,res.0.2,SCT_snn_res.0.5,SCT_snn_res.0.6,SCT_snn_res.0.55,SCT_snn_res.0.52,SCT_snn_res.0.53,ID
Unnamed: 0_level_1,<chr>,<dbl>,<int>,<dbl>,<chr>,<fct>,<chr>,<chr>,<chr>,<chr>,⋯,<int>,<dbl>,<dbl>,<chr>,<fct>,<fct>,<fct>,<fct>,<fct>,<chr>
20_AAACGAACAACCAACT-1_1,GEX1,7542,2698,4.62742,1,7,4,4,BC3-Old-Iso-20.50-6A-B1-35260-CTCTAGAC,BC4-Old-Iso-20.50-6B-B1-16224-ACCAATGC,⋯,,,,,5,5,5,4,7,BC3
20_AAACGAACAGGTTCAT-1_1,GEX1,15424,3461,3.060166,0,8,7,8,BC4-Old-Iso-20.50-6B-B1-16224-ACCAATGC,BC6-Old-Iso-20.50-9B-B1-29863-CGAACAAG,⋯,,,,,0,7,0,0,8,BC4
20_AAACGAACATGGAACG-1_1,GEX1,11212,3462,4.584374,5,6,4,6,BC3-Old-Iso-20.50-6A-B1-35260-CTCTAGAC,BC4-Old-Iso-20.50-6B-B1-16224-ACCAATGC,⋯,,,,,7,8,7,4,6,BC3
20_AAACGAAGTTGCGGCT-1_1,GEX1,10125,3303,3.348148,4,9,5,5,BC6-Old-Iso-20.50-9B-B1-29863-CGAACAAG,BC2-Young-Het-5.10-1B-B1-14300-TCAATGGC,⋯,,,,,8,10,8,7,9,BC6
20_AAACGCTAGCCTAACT-1_1,GEX1,4324,1829,3.075856,3,3,3,3,BC3-Old-Iso-20.50-6A-B1-35260-CTCTAGAC,BC6-Old-Iso-20.50-9B-B1-29863-CGAACAAG,⋯,,,,,1,2,1,1,3,BC3
20_AAACGCTAGCGACCCT-1_1,GEX1,13439,3710,5.305454,1,6,4,4,BC4-Old-Iso-20.50-6B-B1-16224-ACCAATGC,BC5-Old-Iso-20.50-9A-B1-24388-AGTTGCGT,⋯,,,,,7,8,7,4,6,BC4


In [11]:
# Split off Old Exercise data
oexercisedata <- metadata %>% filter(AgeCond=="O_Het")
head(oexercisedata)

Unnamed: 0_level_0,orig.ident,nCount_RNA,nFeature_RNA,percent.mt,RNA_snn_res.0.15,seurat_clusters,RNA_snn_res.0.25,RNA_snn_res.0.5,LMO_maxID,LMO_secondID,⋯,nGene,nUMI,percent.mito,res.0.2,SCT_snn_res.0.5,SCT_snn_res.0.6,SCT_snn_res.0.55,SCT_snn_res.0.52,SCT_snn_res.0.53,ID
Unnamed: 0_level_1,<chr>,<dbl>,<int>,<dbl>,<chr>,<fct>,<chr>,<chr>,<chr>,<chr>,⋯,<int>,<dbl>,<dbl>,<chr>,<fct>,<fct>,<fct>,<fct>,<fct>,<chr>
20_AAACGAATCGTTCATT-1_1,GEX1,7902,2773,3.809162,3,3,3,3,BC1-Old-Het-20.50-1A-B1-25000-TGTGATGG,BC6-Old-Iso-20.50-9B-B1-29863-CGAACAAG,⋯,,,,,1,2,1,1,3,BC1
20_AAAGAACCACTGGATT-1_1,GEX1,7086,2607,4.106689,3,3,3,3,BC1-Old-Het-20.50-1A-B1-25000-TGTGATGG,BC6-Old-Iso-20.50-9B-B1-29863-CGAACAAG,⋯,,,,,1,2,1,1,3,BC1
20_AAAGAACCATACTGAC-1_1,GEX1,6149,2400,7.188161,1,4,1,1,BC1-Old-Het-20.50-1A-B1-25000-TGTGATGG,BC5-Old-Iso-20.50-9A-B1-24388-AGTTGCGT,⋯,,,,,4,4,4,5,4,BC1
20_AAAGTCCAGTTGTCAC-1_1,GEX1,6349,1991,2.819342,0,8,0,0,BC1-Old-Het-20.50-1A-B1-25000-TGTGATGG,BC6-Old-Iso-20.50-9B-B1-29863-CGAACAAG,⋯,,,,,0,7,0,0,8,BC1
20_AACAAGAAGAACTCCT-1_1,GEX1,7780,2448,4.023136,0,1,0,0,BC1-Old-Het-20.50-1A-B1-25000-TGTGATGG,BC2-Young-Het-5.10-1B-B1-14300-TCAATGGC,⋯,,,,,0,1,0,0,1,BC1
20_AACCAACGTCCCAAAT-1_1,GEX1,17656,4983,2.61101,10,19,10,13,BC1-Old-Het-20.50-1A-B1-25000-TGTGATGG,BC2-Young-Het-5.10-1B-B1-14300-TCAATGGC,⋯,,,,,18,19,19,13,19,BC1


In [12]:
# Split off Young Control data
ycontroldata <- metadata %>% filter(AgeCond=="Y_Iso")
head(ycontroldata)

Unnamed: 0_level_0,orig.ident,nCount_RNA,nFeature_RNA,percent.mt,RNA_snn_res.0.15,seurat_clusters,RNA_snn_res.0.25,RNA_snn_res.0.5,LMO_maxID,LMO_secondID,⋯,nGene,nUMI,percent.mito,res.0.2,SCT_snn_res.0.5,SCT_snn_res.0.6,SCT_snn_res.0.55,SCT_snn_res.0.52,SCT_snn_res.0.53,ID
Unnamed: 0_level_1,<chr>,<dbl>,<int>,<dbl>,<chr>,<fct>,<chr>,<chr>,<chr>,<chr>,⋯,<int>,<dbl>,<dbl>,<chr>,<fct>,<fct>,<fct>,<fct>,<fct>,<chr>
20_AAACCCAGTACCGTGC-1_2,GEX2,3131,1434,4.535292,,3,,2,BC11-Young-Iso-5.67-10A-B2-70700-GAGTCGAT,BC10-Old-Iso-20.53-7B-B2-21000-ATTCGCAC,⋯,,,,,1,2,1,1,3,BC11
20_AAACCCATCCCGGTAG-1_2,GEX2,10213,3255,3.446588,,6,,4,BC12-Young-Iso-5.67-10B-B2-70100-AAGGCTAG,BC9-Old-Iso-20.53-7A-B2-33700-AAGTACGC,⋯,,,,,7,8,7,4,6,BC12
20_AAACGAAAGCTCTGTA-1_2,GEX2,3182,1497,3.048397,,2,,3,BC12-Young-Iso-5.67-10B-B2-70100-AAGGCTAG,BC10-Old-Iso-20.53-7B-B2-21000-ATTCGCAC,⋯,,,,,3,3,3,3,2,BC12
20_AAACGCTCAACTCCAA-1_2,GEX2,5224,2231,4.364472,,0,,1,BC12-Young-Iso-5.67-10B-B2-70100-AAGGCTAG,BC10-Old-Iso-20.53-7B-B2-21000-ATTCGCAC,⋯,,,,,2,0,5,2,0,BC12
20_AAACGCTTCGAGCCTG-1_2,GEX2,5157,2078,5.080473,,4,,1,BC12-Young-Iso-5.67-10B-B2-70100-AAGGCTAG,BC9-Old-Iso-20.53-7A-B2-33700-AAGTACGC,⋯,,,,,4,4,4,5,4,BC12
20_AAAGGATAGTAAGCAT-1_2,GEX2,2550,1234,0.745098,,8,,0,BC12-Young-Iso-5.67-10B-B2-70100-AAGGCTAG,BC10-Old-Iso-20.53-7B-B2-21000-ATTCGCAC,⋯,,,,,0,7,0,0,8,BC12


In [13]:
# Split off Young Exercise data
yexercisedata <- metadata %>% filter(AgeCond=="Y_Het")
head(yexercisedata)

Unnamed: 0_level_0,orig.ident,nCount_RNA,nFeature_RNA,percent.mt,RNA_snn_res.0.15,seurat_clusters,RNA_snn_res.0.25,RNA_snn_res.0.5,LMO_maxID,LMO_secondID,⋯,nGene,nUMI,percent.mito,res.0.2,SCT_snn_res.0.5,SCT_snn_res.0.6,SCT_snn_res.0.55,SCT_snn_res.0.52,SCT_snn_res.0.53,ID
Unnamed: 0_level_1,<chr>,<dbl>,<int>,<dbl>,<chr>,<fct>,<chr>,<chr>,<chr>,<chr>,⋯,<int>,<dbl>,<dbl>,<chr>,<fct>,<fct>,<fct>,<fct>,<fct>,<chr>
20_AAACCCATCGGTAGGA-1_1,GEX1,5461,2259,3.9186962,3,3,3,3,BC2-Young-Het-5.10-1B-B1-14300-TCAATGGC,BC1-Old-Het-20.50-1A-B1-25000-TGTGATGG,⋯,,,,,1,2,1,1,3,BC2
20_AAACGCTTCGCAGTCG-1_1,GEX1,5762,2182,5.9007289,1,4,1,1,BC2-Young-Het-5.10-1B-B1-14300-TCAATGGC,BC4-Old-Iso-20.50-6B-B1-16224-ACCAATGC,⋯,,,,,4,4,4,5,4,BC2
20_AAAGGATAGGAAACGA-1_1,GEX1,12454,3794,4.0308335,4,9,5,5,BC2-Young-Het-5.10-1B-B1-14300-TCAATGGC,BC1-Old-Het-20.50-1A-B1-25000-TGTGATGG,⋯,,,,,8,10,8,7,9,BC2
20_AAATGGATCACAATGC-1_1,GEX1,6266,2143,2.0108522,0,1,0,0,BC2-Young-Het-5.10-1B-B1-14300-TCAATGGC,BC4-Old-Iso-20.50-6B-B1-16224-ACCAATGC,⋯,,,,,0,1,0,0,1,BC2
20_AACCAACTCTCCTGTG-1_1,GEX1,3006,1403,0.7984032,0,8,0,0,BC2-Young-Het-5.10-1B-B1-14300-TCAATGGC,BC5-Old-Iso-20.50-9A-B1-24388-AGTTGCGT,⋯,,,,,0,7,0,0,8,BC2
20_AACCACAAGCATGCAG-1_1,GEX1,14970,3999,5.6112224,1,4,1,1,BC2-Young-Het-5.10-1B-B1-14300-TCAATGGC,BC6-Old-Iso-20.50-9B-B1-29863-CGAACAAG,⋯,,,,,4,4,4,5,4,BC2


In [14]:
# Look at differences in transcriptional noise in different cell types + see how they change with age

In [15]:
data <- subset(data, subset = hash.ID != "NA")

In [16]:
data@meta.data <- data@meta.data %>%
  mutate(ID = sub("-.*", "", hash.ID))

In [17]:
seurat_list <- SplitObject(data, split.by = "ID")

In [18]:
initial_matrices <- lapply(seurat_list, function(x) GetAssayData(x, slot = "data"))

In [19]:
# Find transcriptional noise across all individuals and compare
noiselist <- c();

hashidlist <- c('BC2-Young-Het-5.10-1B-B1-14300-TCAATGGC', 'BC3-Old-Iso-20.50-6A-B1-35260-CTCTAGAC', 
            'BC4-Old-Iso-20.50-6B-B1-16224-ACCAATGC', 'BC6-Old-Iso-20.50-9B-B1-29863-CGAACAAG', 
            'BC1-Old-Het-20.50-1A-B1-25000-TGTGATGG', 'BC5-Old-Iso-20.50-9A-B1-24388-AGTTGCGT',
            'BC7-Old-Het-20.53-2A-B2-51000-GTACCTGT', 'BC13-Old-Het-20.53-5A-B2-50000-CAGTTAGG',
            'BC11-Young-Iso-5.67-10A-B2-70700-GAGTCGAT', 'BC9-Old-Iso-20.53-7A-B2-33700-AAGTACGC',
            'BC12-Young-Iso-5.67-10B-B2-70100-AAGGCTAG', 'BC8-Young-Het-5.13-2B-B2-30500-GAAGCTTG',
            'BC14-Young-Het-5.13-5B-B2-50100-AACCGAAC', 'BC10-Old-Iso-20.53-7B-B2-21000-ATTCGCAC',
            'BC23-Young-Iso-4.60-15A-B4-28000-ATCTACGG', 'BC22-Young-Het-5.40-14B-B4-38000-CGATTAGC',
            'BC21-Old-Het-20.80-14A-B4-24000-GAGAGACT', 'BC24-Young-Iso-4.60-15B-B4-22800-TGTACCAG');

idlist <- c('BC2', 'BC3', 
            'BC4', 'BC6', 
            'BC1', 'BC5',
            'BC7', 'BC13',
            'BC11', 'BC9',
            'BC12', 'BC8',
            'BC14', 'BC10',
            'BC23', 'BC22',
            'BC21', 'BC24');

experimentalgroup <- c("young het", "old iso", "old iso", "old iso", "old het", "old iso", "old het", "old het", "young iso", 
                       "old iso", "young iso", "young het", "young het", "old iso", "young iso", "young het", "old het", "young iso");

cols <- c("#556F44", "#78CDD7", "#78CDD7", "#78CDD7", "#235789", "#78CDD7", "#235789", "#235789", "#A7A650", 
                       "#78CDD7", "#A7A650", "#556F44", "#556F44", "#78CDD7", "#A7A650", "#556F44", "#235789", "#A7A650");

for (x in 1:15) {
    noise <- (seurat_list[[x]]$nCount_SCT %>% sd())/(seurat_list[[x]]$nCount_SCT %>% mean())
    noiselist <- c(noiselist, noise)
}


In [20]:
calculateNoise <- function(gene){
sd(gene)/mean(gene)
}

In [21]:
# For each matrix in initial_matrices, go row by row (gene by gene) and calculate noise for that gene
noiseDistributions <- lapply(initial_matrices, function(matrix) {
apply(matrix, MARGIN = 2, FUN = calculateNoise)
})

In [22]:
# Convect noiseDistributions to a dataframe again
noiseDistributions_asDF <- do.call(rbind, lapply(seq_along(noiseDistributions), function(i) {
  data.frame(ID = names(noiseDistributions)[i], Value = noiseDistributions[[i]])
}))

In [23]:
head(noiseDistributions_asDF)

Unnamed: 0_level_0,ID,Value
Unnamed: 0_level_1,<chr>,<dbl>
20_AAACCCATCGGTAGGA-1_1,BC2,3.076132
20_AAACGCTTCGCAGTCG-1_1,BC2,3.149995
20_AAAGGATAGGAAACGA-1_1,BC2,2.995256
20_AAATGGATCACAATGC-1_1,BC2,3.241494
20_AACCAACTCTCCTGTG-1_1,BC2,4.094403
20_AACCACAAGCATGCAG-1_1,BC2,3.335202


In [24]:
medians <- noiseDistributions_asDF %>%
  group_by(ID) %>%
  summarize(MedianValue = median(Value, na.rm = TRUE))
medians

ID,MedianValue
<chr>,<dbl>
BC1,3.234427
BC10,3.277864
BC11,3.255911
BC12,3.204199
BC13,3.229412
BC14,3.173003
BC2,3.129944
BC21,3.262629
BC22,3.152871
BC23,3.177993


In [25]:
medians <- medians %>%
  mutate(ExperimentalGroup = experimentalgroup)

In [26]:
idstoname <- function(ids) {
    ids <- unlist(strsplit(ids, "/"))
    genenames <- mapIds(org.Mm.eg.db, keys = ids, column = "SYMBOL", keytype = "ENTREZID", multiVals = "first")
    paste(genenames, collapse = "/")
}

In [27]:
clean_cell <- function(cell) {
  if (length(cell) == 0) {
    return(0)
  }

  return(cell[1])
}

In [28]:
differentially_expressed_genes_old <- c()
differentially_expressed_genes_young <- c()

list_of_cell_specific_de_results_OLD <- data.frame()
list_of_cell_specific_de_results_YOUNG <- data.frame()

significant_genes_celltype_specific_OLD <- c()
significant_genes_celltype_specific_YOUNG <- c()

In [29]:
celltypes <- unique(data@meta.data$Celltype)

In [30]:
isoid <- c('BC3', 'BC4', 'BC6', 'BC5', 'BC9', 'BC10')
hetid <- c('BC1', 'BC7', 'BC13', 'BC21')
isoname <- "OldIso"
hetname <- "OldHet"
prefix <- "Old"

metadata$group <- ifelse(metadata$ID %in% isoid, isoname, 
                     ifelse(metadata$ID %in% hetid, hetname, NA))
data@meta.data <- metadata

# get list of important genes for old
old_de_results <- FindMarkers(data, ident.1 = hetname, ident.2 = isoname, group.by = "group", logfc.threshold = 0)
old_de_results <- old_de_results %>%
                mutate(significant = ifelse(p_val_adj < 0.005 & abs(avg_log2FC) > 0.25, "yes", "no"))

old_significant_data <- old_de_results %>% filter(significant == "yes")

old_significant_genes <- rownames(old_significant_data)

In [None]:
isoid <- c('BC11', 'BC12', 'BC23', 'BC24')
hetid <- c('BC2', 'BC8', 'BC14', 'BC22')


isoname <- "YoungIso"
hetname <- "YoungHet"
prefix <- "Young"

metadata$group <- ifelse(metadata$ID %in% isoid, isoname, 
                     ifelse(metadata$ID %in% hetid, hetname, NA))
data@meta.data <- metadata

young_de_results <- FindMarkers(data, ident.1 = hetname, ident.2 = isoname, group.by = "group", logfc.threshold = 0)
young_de_results <- young_de_results %>%
                mutate(significant = ifelse(p_val_adj < 0.005 & abs(avg_log2FC) > 0.25, "yes", "no"))

young_significant_data <- young_de_results %>% filter(significant == "yes")

young_significant_genes <- rownames(young_significant_data)


In [None]:
all_significant_genes <- c(old_significant_genes, young_significant_genes)

In [None]:
length(all_significant_genes)

In [None]:
# iterate through all cell types, get the de_results, add to list of de results
for (celltype in celltypes[-11]) {
    prefix <- "Old"

    print(celltype)
    prefix = paste(celltype, prefix)
    
    de_results <- FindMarkers(subset(data, subset = Celltype == celltype), ident.1 = hetname, ident.2 = isoname, group.by = "group", logfc.threshold = 0)
    de_results <- de_results %>%
                    mutate(significant = ifelse(p_val_adj < 0.005 & abs(avg_log2FC) > 0.25, "yes", "no"))
    significantdata <- de_results %>% filter(significant == "yes")
    
    significantgenes <- rownames(significantdata)

    df_with_genes <- de_results %>%
      rownames_to_column(var = "gene")
    
    df_with_genes <- df_with_genes %>%
      filter(gene %in% all_significant_genes)
    
    df_with_genes["Cell_Type"] <- celltype
    
    list_of_cell_specific_de_results_OLD <- rbind(list_of_cell_specific_de_results_OLD, df_with_genes)
    
    significant_genes_celltype_specific_OLD <- c(significant_genes_celltype_specific_OLD, significantgenes)
}

In [None]:
wide_data <- list_of_cell_specific_de_results_OLD %>%
  select(gene, Cell_Type, avg_log2FC) %>%
  pivot_wider(names_from = Cell_Type, values_from = avg_log2FC)

wide_data <- as.data.frame(lapply(wide_data, function(col) sapply(col, clean_cell)))

wide_data_df <- as.data.frame(wide_data)

rownames(wide_data_df) <- wide_data_df$gene

wide_data_df <- wide_data_df %>%
  select(-gene)

In [None]:
dim(wide_data_df)

In [None]:
pheatmap(as.matrix(wide_data_df), 
         scale = "row", 
         clustering_distance_rows = "euclidean", 
         clustering_distance_cols = "euclidean",
         clustering_method = "complete",
         fontsize_row = 10,
         fontsize_col = 10 
)

In [None]:
heatmap(as.matrix(wide_data_df), 
         scale = "row", 
         clustering_distance_rows = "euclidean", 
         clustering_distance_cols = "euclidean",
         clustering_method = "complete",
         fontsize_row = 10,
         fontsize_col = 10,
         Rowv=FALSE
)

In [None]:
isoid <- c('BC11', 'BC12', 'BC23', 'BC24')
hetid <- c('BC2', 'BC8', 'BC14', 'BC22')


isoname <- "YoungIso"
hetname <- "YoungHet"
prefix <- "Young"

metadata$group <- ifelse(metadata$ID %in% isoid, isoname, 
                     ifelse(metadata$ID %in% hetid, hetname, NA))
data@meta.data <- metadata

In [None]:
# iterate through all cell types, get the de_results, add to list of de results
for (celltype in celltypes[-11]) {
    prefix <- "Young"

    print(celltype)
    prefix = paste(celltype, prefix)
    
    de_results <- FindMarkers(subset(data, subset = Celltype == celltype), ident.1 = hetname, ident.2 = isoname, group.by = "group", logfc.threshold = 0)
    de_results <- de_results %>%
                    mutate(significant = ifelse(p_val_adj < 0.005 & abs(avg_log2FC) > 0.25, "yes", "no"))
    significantdata <- de_results %>% filter(significant == "yes")
    
    significantgenes <- rownames(significantdata)

    df_with_genes <- de_results %>%
      rownames_to_column(var = "gene")
    
    df_with_genes <- df_with_genes %>%
      filter(gene %in% all_significant_genes)

    if (dim(df_with_genes) > 0)
    {
        df_with_genes["Cell_Type"] <- celltype
    
        list_of_cell_specific_de_results_YOUNG <- rbind(list_of_cell_specific_de_results_YOUNG, df_with_genes)
    } else {
        print(celltype, "doesn't have any significant genes")
    }

    significant_genes_celltype_specific_YOUNG <- c(significant_genes_celltype_specific_YOUNG, significantgenes)
    
}

In [None]:
wide_data_YOUNG <- list_of_cell_specific_de_results_YOUNG %>%
  select(gene, Cell_Type, avg_log2FC) %>%
  pivot_wider(names_from = Cell_Type, values_from = avg_log2FC)

wide_data_YOUNG <- as.data.frame(lapply(wide_data_YOUNG, function(col) sapply(col, clean_cell)))

wide_data_df_YOUNG <- as.data.frame(wide_data_YOUNG)

rownames(wide_data_df_YOUNG) <- wide_data_df_YOUNG$gene

wide_data_df_YOUNG <- wide_data_df_YOUNG %>%
  select(-gene)

In [None]:
pheatmap(as.matrix(wide_data_df_YOUNG), 
         scale = "row", 
         clustering_distance_rows = "euclidean", 
         clustering_distance_cols = "euclidean",
         clustering_method = "complete",
         fontsize_row = 10,
         fontsize_col = 10 
)

In [None]:
mat <- as.matrix(wide_data_df_YOUNG)

mat_sorted <- mat[order(rownames(mat)), order(colnames(mat))]
