In [None]:
# Load necessary libraries
library(dplyr)
library(Matrix)
library(data.table)
library(Seurat)
library(ggplot2)
library(RColorBrewer)
library(cowplot)
library(tidyverse)
library(tidyr)
library(viridis)
library(harmony)
library(MASS) #
library(scales)
library(AnnotationHub)	
library(org.Hs.eg.db)  
library(clusterProfiler)
library(biomaRt)
library(ggthemes)
library(msigdbr)
library(conflicted)
library(stringr)


In [None]:

# Load the data
data = readRDS('./')

# Calculate the count for each 'area_celltype'
count_mtx <- data %>%
  group_by(Chips, area_celltype) %>%
  summarise(count = n()) %>%
  ungroup()

# Extract 'celltype' and 'area' from 'area_celltype'
count_mtx <- count_mtx %>%
  mutate(celltype = gsub("-.*", "", area_celltype),
         area = gsub(".*-", "", area_celltype))

# Calculate the total count for each 'area' within each 'Chips'
area_total_count <- count_mtx %>%
  group_by(Chips, area) %>%
  summarise(total_count = sum(count)) %>%
  ungroup()

# Join the 'total_count' back to the 'count_mtx' based on 'Chips' and 'area'
count_mtx <- count_mtx %>%
  left_join(area_total_count, by = c("Chips", "area"))

# Calculate the percentage for each 'area_celltype' within its 'area'
count_mtx <- count_mtx %>%
  mutate(percentage = count / total_count * 100)

# Add 'Class' and 'area_celltype_class' columns
count_mtx <- count_mtx %>%
  mutate(Class = gsub("(AD|Con).*", "\\1", Chips),
         area_celltype_class = paste0(Class, "/", area_celltype))

# Function to remove outliers
remove_outliers <- function(x, na.rm = TRUE, ...) {
  qnt <- quantile(x, probs = c(.25, .80), na.rm = na.rm, ...)
  H <- 1.5 * IQR(x, na.rm = na.rm)
  x[x < (qnt[1] - H)] <- NA
  x[x > (qnt[2] + H)] <- NA
  x
}

# Apply the function to remove outliers and calculate the mean value
average_counts <- count_mtx %>%
  group_by(area_celltype_class) %>%
  mutate(filtered_percentage = remove_outliers(percentage)) %>%
  summarise(mean = mean(filtered_percentage, na.rm = TRUE))

