/
MitoBreakDeletionsAndOrlovRepeats.Rmd
125 lines (113 loc) · 3.47 KB
/
MitoBreakDeletionsAndOrlovRepeats.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
---
title: "Exploratory MITOBREAK and Orlov's data analysis"
output:
workflowr::wflow_html:
toc: true
toc_float: yes
theme: journal
highlight: textmate
code_folding: hide
df_print: paged
---
```{r setup, echo=FALSE, include=FALSE}
knitr::opts_chunk$set(
autodep = TRUE,
cache = FALSE,
cache.lazy = FALSE,
dev = c("png", "pdf"),
echo = TRUE,
error = FALSE,
fig.align = "center",
fig.width = 8,
fig.asp = 0.618,
message = FALSE,
warning = FALSE
)
# Load tidyverse infrastructure packages
suppressPackageStartupMessages({
library(tidyverse)
library(here)
})
# Set paths
src_dir <- here('code')
raw_dir <- here('1_Raw')
data_dir <- here('2_Derived')
plots_dir <- here("3_Results")
# set seed
reseed <- 42
set.seed(seed = reseed)
```
## 1: READ MITOBREAK AND KEEP ONLY MAJOR ARC DELETIONS:
```{r load-mitobreak}
breaks <- read.table(
here(
raw_dir,
"MitoBreakDB_12122019.csv"),
sep = ',',
header = TRUE)
breaks$X5..breakpoint <-
as.numeric(as.character(breaks$X5..breakpoint))
summary(breaks$X5..breakpoint)
breaks$X3..breakpoint <-
as.numeric(as.character(breaks$X3..breakpoint))
summary(breaks$X3..breakpoint)
breaks <- breaks[!is.na(breaks$X3..breakpoint) &
!is.na(breaks$X5..breakpoint),]
```
```{r plt-mitobreak-hist}
par(mfrow = c(2, 1))
hist(breaks$X5..breakpoint, breaks = seq(0, 16600, 100))
hist(breaks$X3..breakpoint, breaks = seq(0, 16600, 100))
nrow(breaks)
breaks = breaks[breaks$Deletion.of.replication.origins == 'None', ]
nrow(breaks)
breaks = breaks[breaks$Location.of.the.deleted.region == 'Inside the major arc', ]
nrow(breaks)
summary(breaks$X5..breakpoint)
summary(breaks$X3..breakpoint)
hist(breaks$X5..breakpoint, breaks = seq(0, 16600, 100))
hist(breaks$X3..breakpoint, breaks = seq(0, 16600, 100))
# OH: 110-441
# OL: 5721-5781
for (i in 1:nrow(breaks))
{
if (breaks$X5..breakpoint[i] < 110) {
breaks$X5..breakpoint[i] = breaks$X5..breakpoint[i] + 16569
}
if (breaks$X3..breakpoint[i] < 110) {
breaks$X3..breakpoint[i] = breaks$X3..breakpoint[i] + 16569
}
}
summary(breaks$X5..breakpoint)
summary(breaks$X3..breakpoint)
nrow(breaks)
breaks = breaks[breaks$X5..breakpoint > 5781 &
breaks$X3..breakpoint > 5781, ]
nrow(breaks)
summary(breaks$X5..breakpoint)
summary(breaks$X3..breakpoint)
hist(breaks$X5..breakpoint, breaks = seq(0, 16700, 100))
hist(breaks$X3..breakpoint, breaks = seq(0, 16700, 100))
```
## 2: read Orlovs's direct perfect repeats
```{r load-orlovs-data}
Rep <- read.table(
here(raw_dir, "Homo_sapiens.input.out4out.SecondPart"),
header = TRUE,
sep = '\t'
) # 767
Rep <- Rep[Rep$RepName == 'Direct_repeat', ] # 330
Rep$RepStart <- as.numeric(as.character(Rep$RepStart))
Rep$RepEnd <- as.numeric(as.character(Rep$RepEnd))
Rep <- Rep[Rep$RepStart > 5781 &
Rep$RepStart < 16569 &
Rep$RepEnd > 5781 & Rep$RepEnd < 16569, ] # 171
```
## 3: plot repeats and breakpoints
```{r plt-breakpoints}
par(mfrow=c(1,1))
plot(breaks$X5..breakpoint,breaks$X3..breakpoint,xlim = c(5781,16569), ylim=c(16569,5781), col = rgb(0.5,0.5,0.5,0.2), pch = 16, xlab = '5\'breakpoint', ylab = '3\'breakpoint')
par(new= TRUE)
plot(Rep$RepEnd,Rep$RepStart, xlim = c(5781,16569), ylim=c(16569,5781), col = rgb(1,0.1,0.1,0.5), pch = 16, xlab = '5\'breakpoint', ylab = '3\'breakpoint')
legend(12000, 8000, c('deletions','repeats'), col=c(rgb(0.5,0.5,0.5,0.5),rgb(1,0.1,0.1,0.5)), pch = 16)
```