In [1]:
import rpy2
from rpy2.robjects import pandas2ri
pandas2ri.activate()

In [2]:
%load_ext rpy2.ipython

In [4]:
%%R
# Load required libraries
library(ChIPseeker)
library(TxDb.Mmusculus.UCSC.mm10.knownGene)
library(clusterProfiler)
library(ggplot2)

# Set your working directory to where your files are located
setwd("/beegfs/scratch/ric.broccoli/kubacki.michal/SRF_ChipSeq/custom_pipeline")

Error in library(ChIPseeker) : there is no package called ‘ChIPseeker’


RInterpreterError: Failed to parse and evaluate line '# Load required libraries\nlibrary(ChIPseeker)\nlibrary(TxDb.Mmusculus.UCSC.mm10.knownGene)\nlibrary(clusterProfiler)\nlibrary(ggplot2)\n\n# Set your working directory to where your files are located\nsetwd("/beegfs/scratch/ric.broccoli/kubacki.michal/SRF_ChipSeq/custom_pipeline")\n'.
R error message: 'Error in library(ChIPseeker) : there is no package called ‘ChIPseeker’'

In [None]:
%%R
# Define sample groups
endogenous_samples <- c("NeuM2", "NeuM3", "NSCM1", "NSCM2", "NSCM3", "IgM")
exogenous_samples <- c("NeuV1", "NeuV2", "NeuV3", "NSCv1", "NSCv2", "NSCv3")

In [None]:
%%R
# Read in your narrowPeak files
peak_files <- list.files(path = "results/peaks", pattern = "*_peaks.narrowPeak$", full.names = TRUE)
peak_list <- lapply(peak_files, readPeakFile)
names(peak_list) <- gsub("_peaks.narrowPeak", "", basename(peak_files))

In [None]:
%%R
# Filter peak list to include only the specified samples
peak_list <- peak_list[c(endogenous_samples, exogenous_samples)]

In [None]:
%%R
# Create a txdb object
txdb <- TxDb.Mmusculus.UCSC.mm10.knownGene

In [None]:
%%R
# Plot the peak distribution relative to TSS
promoter <- getPromoters(TxDb=txdb, upstream=3000, downstream=3000)
tagMatrixList <- lapply(peak_list, getTagMatrix, windows=promoter)


In [None]:
%%R
# Separate endogenous and exogenous samples
tagMatrixList_endo <- tagMatrixList[endogenous_samples]
tagMatrixList_exo <- tagMatrixList[exogenous_samples]

In [None]:
%%R
# Plot average profiles
pdf("ChIP_seq_avg_profiles.pdf", width=12, height=8)
plotAvgProf(tagMatrixList_endo, xlim=c(-3000, 3000), conf=0.95, resample=500, 
            facet="row", title="Endogenous Samples")
plotAvgProf(tagMatrixList_exo, xlim=c(-3000, 3000), conf=0.95, resample=500, 
            facet="row", title="Exogenous Samples")
dev.off()

In [None]:
%%R
# Annotate peaks
peakAnnoList <- lapply(peak_list, annotatePeak, TxDb=txdb, 
                       tssRegion=c(-3000, 3000), verbose=FALSE)

In [None]:
%%R
# Plot genomic annotation
pdf("ChIP_seq_genomic_annotation.pdf", width=12, height=8)
plotAnnoBar(peakAnnoList[endogenous_samples])
plotAnnoBar(peakAnnoList[exogenous_samples])
dev.off()

In [None]:
%%R
# Plot distribution of peaks over chromosomes
pdf("ChIP_seq_peak_distribution.pdf", width=12, height=8)
plotDistribution(peakAnnoList[endogenous_samples], 
                 title="Endogenous Samples - Peak Distribution")
plotDistribution(peakAnnoList[exogenous_samples], 
                 title="Exogenous Samples - Peak Distribution")
dev.off()

In [None]:
%%R
# Compare peak numbers
peak_numbers <- sapply(peak_list, length)
peak_numbers_df <- data.frame(
  Sample = names(peak_numbers),
  PeakCount = peak_numbers,
  Condition = ifelse(names(peak_numbers) %in% endogenous_samples, "Endogenous", "Exogenous"),
  CellType = ifelse(grepl("Neu", names(peak_numbers)), "Neuron", "NSC")
)

In [None]:
%%R
ggplot(peak_numbers_df, aes(x = Sample, y = PeakCount, fill = CellType)) +
  geom_bar(stat = "identity") +
  facet_wrap(~ Condition, scales = "free_x") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  labs(title = "Peak Counts Across Samples", y = "Number of Peaks")
ggsave("peak_counts_comparison.pdf", width = 12, height = 6)