# Relating regulon activity to Xi reactivation

### runa with conda env: R_scRNAseq

In [1]:
suppressPackageStartupMessages({
    library(Seurat)
    library(dplyr)
    library(readr)
    library(ggplot2)
    library(tidyr)
    library(viridis)
})

"package 'Seurat' was built under R version 3.6.2"

In [2]:
setwd("/staging/leuven/stg_00041/Adrian/TALON_JANISZEWSKI_XCR2/")

### I/O

In [8]:
io <- list()

io$regulon <- "SCENIC/auc_mtx_5.csv"
io$targets <- "SCENIC/regulon_target_chr.csv"
io$reg_chr <- "SCENIC/reg_chr_counts.csv"

In [5]:
allelic <- read_csv("allele_specific/pre_processing/AJ_0065.9c_allelic_norm_complete.csv")

"Missing column names filled in: 'X1' [1]"Parsed with column specification:
cols(
  X1 = col_double(),
  gene = col_character(),
  RNA_id = col_character(),
  X = col_double(),
  X129 = col_double(),
  Cast = col_double(),
  sumReads = col_double(),
  ratioPercent = col_double(),
  ratioLog = col_double(),
  Pseudotime = col_double(),
  seurat_clusters_rename = col_double(),
  Timepoint = col_character(),
  Xi = col_character(),
  Chr = col_character(),
  total_reads = col_double(),
  mus_norm = col_double(),
  cast_norm = col_double(),
  ratio_norm = col_double(),
  sum_norm = col_double()
)


In [7]:
allelic <- allelic %>% 
                dplyr::rename("Cell" = "RNA_id", "cluster_replace" = "seurat_clusters_rename") 

## Import regulons and targets

In [9]:
regulon <- read_csv(io$regulon)
targets <- read_csv(io$targets)
reg_chr <- read_csv(io$reg_chr)

Parsed with column specification:
cols(
  .default = col_double(),
  Cell = col_character()
)
See spec(...) for full column specifications.
"Missing column names filled in: 'X1' [1]"Parsed with column specification:
cols(
  X1 = col_double(),
  `Unnamed: 0` = col_double(),
  Regulon = col_character(),
  Targets = col_character(),
  Chr = col_character()
)
"Missing column names filled in: 'X1' [1]"Parsed with column specification:
cols(
  X1 = col_double(),
  Regulon = col_character(),
  Chr = col_character(),
  n_targets = col_double()
)


In [10]:
names(regulon) <- gsub("\\.", "", names(regulon))
names(regulon) <- gsub("\\(\\+\\)", "", names(regulon))
head(regulon)
targets$Regulon <- gsub("\\(\\+\\)", "", targets$Regulon)
head(targets)
reg_chr$Regulon <- gsub("\\(\\+\\)", "", reg_chr$Regulon)
head(reg_chr)

Cell,2410141K09Rik,Akr1a1,Alx1,Alx3,Alx4,Arid3a,Arnt,Atf3,Atf4,...,Zfp148,Zfp362,Zfp410,Zfp42,Zfp467,Zfp667,Zic3,Zic5,Zmiz1,Zscan10
Day_9_Xi_Mus_33,0.04189857,0.1412515,0,0,0,0.17659295,0.0,0.07668644,0.09558015,...,0.0,0,0.0,0.06540736,0.07630722,0.04782211,0.0,0,0.075012198,0.08569761
Day_10_Xi_Mus_25,0.17300972,0.1295921,0,0,0,0.0563064,0.0802993,0.02993759,0.16795411,...,0.01319862,0,0.0,0.11827622,0.0,0.11238005,0.0,0,0.007973111,0.1557526
Day_10_Xi_Mus_32,0.1066065,0.1295921,0,0,0,0.10619694,0.06599512,0.05396919,0.18447566,...,0.0,0,0.0,0.08585769,0.0,0.12837342,0.0,0,0.031736208,0.12836103
Day_9_Xi_Mus_27,0.11266982,0.1293932,0,0,0,0.13793736,0.03021848,0.07551412,0.14347473,...,0.03371463,0,0.0,0.09190361,0.0,0.03700477,0.0,0,0.039008209,0.11342516
Day_9_Xi_Mus_36,0.13542941,0.1290948,0,0,0,0.11177643,0.0,0.04981346,0.12447627,...,0.0,0,0.1712373,0.08658987,0.07376799,0.06449128,0.0,0,0.023464786,0.16054179
Day_10_Xi_Mus_30,0.24870989,0.1293932,0,0,0,0.02614772,0.0,0.03021372,0.15539862,...,0.029002,0,0.0,0.18275565,0.05202173,0.08431324,0.1364037,0,0.009582028,0.21361607


X1,Unnamed: 0,Regulon,Targets,Chr
0,0,2410141K09Rik,Cdk4,chr10
1,1,2410141K09Rik,Gtf2h1,chr7
2,2,2410141K09Rik,Sae1,chr7
3,3,2410141K09Rik,Qtrt1,chr9
4,4,2410141K09Rik,Apex1,chr14
5,5,2410141K09Rik,Pop1,chr15


X1,Regulon,Chr,n_targets
0,2410141K09Rik,chr1,5
1,2410141K09Rik,chr10,8
2,2410141K09Rik,chr11,8
3,2410141K09Rik,chr13,1
4,2410141K09Rik,chr14,2
5,2410141K09Rik,chr15,3


In [11]:
allelic2 <- allelic %>% 
                filter(Chr=="chrX")

allele_X <- allelic2 %>% 
                na.omit() %>% 
                group_by(Cell, Pseudotime) %>% 
                summarize(Xi = mean(ratio_norm)) 

In [12]:
regulon_xcr <- left_join(regulon, allele_X, by = "Cell") 

In [13]:
regulon_xcr <- data.frame(regulon_xcr[,-1], row.names = regulon_xcr$Cell)
regulon_xcr_class <- regulon_xcr %>% mutate(xcr = case_when(Xi > 0.15 & Xi < 0.85 ~ 1,
                                                           Xi < 0.15 ~ 0,
                                                           Xi > 0.85 ~ 2)) %>% filter(xcr != 2) %>% select(-Xi) 

In [14]:
reg_xcr_logit_summary <- list()

for (i in 1:ncol(regulon_xcr_class[,-2])){
    
    
    model <- glm(regulon_xcr_class$xcr ~ regulon_xcr_class[[i]], family=binomial(link="logit"))
    
    coeffs <- summary(model)$coefficients
    
    reg_xcr_logit_summary[[i]] <- coeffs[2,]
    
    names(reg_xcr_logit_summary)[i] <- names(regulon_xcr_class)[i]
    
    
}

"glm.fit: fitted probabilities numerically 0 or 1 occurred"

In [15]:
reg_xcr_logit_summary_df <- bind_rows(reg_xcr_logit_summary)

In [16]:
reg_xcr_logit_summary_df

X2410141K09Rik,Akr1a1,Alx1,Alx3,Alx4,Arid3a,Arnt,Atf3,Atf4,Atf6,...,Zfp362,Zfp410,Zfp42,Zfp467,Zfp667,Zic3,Zic5,Zmiz1,Zscan10,Pseudotime
58.77898,-9.1614427261,-52.16469998,-102.99906297,-113.0967039,-83.61748,-60.32326,-134.2494,12.45506,-28.48668,...,-48.6182203,-22.42376271,87.71982,-27.44046,-5.21188499,20.06137,10.73066,-119.7181,74.84703,0.2615402
6.359557,2.6816911937,21.24364471,50.7054944,79.7335027,9.372254,10.96408,14.2113,2.457449,5.276522,...,54.0425834,9.11854032,9.031521,5.836103,2.63747514,2.394141,1.125264,15.25423,7.924068,0.03087445
9.242623,-3.4162929526,-2.4555438,-2.03131957,-1.4184339,-8.92181,-5.501902,-9.446668,5.068288,-5.398761,...,-0.899628,-2.4591395,9.71263,-4.701847,-1.97608876,8.379359,9.536127,-7.84819,9.445531,8.471091
2.405282e-20,0.0006347992,0.01406716,0.04222259,0.1560641,4.587269e-19,3.757165e-08,3.497891e-21,4.01409e-07,6.710288e-08,...,0.3683183,0.01392705,2.663744e-22,2.578188e-06,0.04814473,5.3216860000000003e-17,1.482657e-21,4.220847e-15,3.536053e-21,2.431048e-17


In [17]:
reg_xcr_logit_summary_df_ordered <- reg_xcr_logit_summary_df %>% 
                                        gather( key ="regulon", value = "estimate") %>%
                                        mutate(id = rep(c("estimate","se","z","p"), 312)) %>%
                                        spread( id, estimate) %>%
                                        arrange(desc(estimate))

In [19]:
reg_xcr_logit_summary_df_ordered %>%
    head()

regulon,estimate,p,se,z
Hcfc1,97.31749,6.280124e-28,8.883296,10.955111
Trp53,87.87226,1.355077e-20,9.444767,9.303805
Zfp42,87.71982,2.663744e-22,9.031521,9.71263
Ezh2,85.28313,5.1393479999999995e-20,9.309315,9.161053
Bclaf1,76.31879,9.704595e-27,7.129658,10.704411
Rest,76.05423,1.073904e-21,7.947539,9.569532


In [26]:
Tcf7l2 <- ggplot(regulon_xcr, aes(x=Tcf7l2, y=Xi, color = Pseudotime))+
                    geom_point()+
                    geom_smooth(color = "firebrick3")+
                    geom_hline(yintercept = 0.15, linetype = "dashed") +
                    geom_hline(yintercept = 0.85, linetype = "dashed") +
                    coord_cartesian(ylim = c(0,1))+
                    ylab("Mean Xi/(Xi+Xa) ratio") + 
                    xlab("Regulon Activity") +
                    ggtitle(paste0("Regulon: ", "Tcf7l2")) +
                    theme_classic()

Tcf7l2

`geom_smooth()` using method = 'loess' and formula 'y ~ x'
"Removed 75 rows containing missing values (geom_point)."

ERROR: Error in value[[3L]](cond): could not open file '/tmp/RtmpN9jwWs/file8b3655822b6.png'


plot without title

In [34]:
pdf("allele_specific/XCR/Regulon_XCR_logit_Tcf7l2.pdf", useDingbats = FALSE, width = 8, height =3)
plot(Tcf7l2)
dev.off()

"Cannot open temporary file '/tmp/RtmpN9jwWs/pdf8b3bc7fa1f' for compression (reason: No space left on device); compression has been turned off for this device"

In [27]:
plots_logit <- list()

top_20_logit <- head(reg_xcr_logit_summary_df_ordered$regulon, 20)
regulon_xcr_top <- regulon_xcr %>%
                        select(top_20_logit, Xi, Pseudotime) %>%
                        gather(regulon, activity, -Xi, -Pseudotime) 


for (i in 1:20) {
    
    plots_logit[[i]] <- regulon_xcr_top %>% filter(regulon == unique(regulon_xcr_top$regulon)[i]) %>%
                            ggplot(., aes(activity, Xi, color = Pseudotime))+
                            geom_point()+
                            geom_smooth(color = "firebrick3")+
                            geom_hline(yintercept = 0.15) +
                            coord_cartesian(ylim = c(0,1))+
                            ylab("Mean Xi/(Xi+Xa) ratio") + xlab("Regulon Activity") +
                            ggtitle(paste0("Regulon: ",  unique(regulon_xcr_top$regulon)[i]))
}

In [28]:
pdf(paste0("allele_specific/XCR/reg_xcr_logit_top50.pdf"), useDingbats = FALSE, height=30, width=8)
CombinePlots(plots = plots_logit, ncol = 2, legend = "none")
dev.off()

`geom_smooth()` using method = 'loess' and formula 'y ~ x'
"Removed 75 rows containing missing values (geom_point)."`geom_smooth()` using method = 'loess' and formula 'y ~ x'
"Removed 75 rows containing missing values (geom_point)."`geom_smooth()` using method = 'loess' and formula 'y ~ x'
"Removed 75 rows containing missing values (geom_point)."`geom_smooth()` using method = 'loess' and formula 'y ~ x'
"Removed 75 rows containing missing values (geom_point)."`geom_smooth()` using method = 'loess' and formula 'y ~ x'
"Removed 75 rows containing missing values (geom_point)."`geom_smooth()` using method = 'loess' and formula 'y ~ x'
"Removed 75 rows containing missing values (geom_point)."`geom_smooth()` using method = 'loess' and formula 'y ~ x'
"Removed 75 rows containing missing values (geom_point)."`geom_smooth()` using method = 'loess' and formula 'y ~ x'
"Removed 75 rows containing missing values (geom_point)."`geom_smooth()` using method = 'loess' and formula 'y ~ x'
"Removed 75 r