/
calculateMSD.R
132 lines (123 loc) · 5.61 KB
/
calculateMSD.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
#' Calculate Mean Squared Displacement (MSD)
#'
#' Calculation of the MSD of multiple tracks.
#' There are two methods for everaging MSD data from multiple tracks:
#' ensemble = for each time lag average all squared displacements from all tracks
#' time-averaged = find MSD for each track and then generate the average MSD from these curves
#' The MSD curves will be identical if all tracks are the same length, and diverge if not.
#' Standard deviation will be large for ensemble and smaller for time-averaged data.
#' Input is a data frame of tracks imported using readTrackMateXML()
#'
#' @param df data frame must include at a minimum - trace (track ID), x, y and t (in real coords)
#' @param method string. Either "ensemble" or "timeaveraged" (default)
#' @param N numeric variable for MSD. dt should be up to 1/N of number of data points (4 recommended)
#' @param short numeric variable for the shortest number of points we will analyse. Note, this uses the number of frames from start, not number of points in track, i.e. a track with <short points and many gaps will remain
#' @return list of three data frames
#' @examples
#' xmlPath <- system.file("extdata", "ExampleTrackMateData.xml", package="TrackMateR")
#' tmObj <- readTrackMateXML(XMLpath = xmlPath)
#' tmObj <- correctTrackMateData(tmObj, xyscalar = 0.04)
#' tmDF <- tmObj[[1]]
#' msdObj <- calculateMSD(df = tmDF, method = "ensemble", N = 3, short = 8)
#' @export
calculateMSD <- function(df, method = "timeaveraged", N = 4, short = 0) {
x <- y <- NULL
if(!inherits(df, "data.frame")) {
cat("Function requires a data frame.\n")
return(NULL)
}
# make a list of all traces (this is the filtered list of traces from TrackMate XML)
traceList <- unique(df$trace)
tList <- list()
# for each trace find the frame offset
for (i in traceList) {
a <- df %>%
filter(trace == i) %>%
select(frame, x)
frame0 <- a$frame[1]
a <- a[-1,]
tList[[i]] <- a$frame - frame0
}
# filter out short traces
if(short > 0) {
traceList_temp <- subset(traceList,unlist(lapply(tList, max)) >= short)
tList_temp <- subset(tList,unlist(lapply(tList, max)) >= short)
if(length(traceList_temp) == 0) {
cat("No traces are long enough to calculate MSD. Reverting to short = 0.\n")
} else {
traceList <- traceList_temp
tList <- tList_temp
}
}
# find the maximum frame offset of all traces
tListmax <- max(unlist(lapply(tList, max)))
# create matrices for x and y coords one row for each frame, each trace in a column
displacementXMat <- matrix(data = NA, nrow = tListmax, ncol = length(traceList))
colnames(displacementXMat) <- traceList
rownames(displacementXMat) <- 1:tListmax
displacementYMat <- displacementXMat
# now we repeat the process to extract the x y coords for each frame offset, store in matrices.
for (i in traceList){
a <- df %>%
filter(trace == i) %>%
select(x, y, frame)
frame0 <- a$frame[1]
a <- a[-1,]
a$frame <- a$frame - frame0
displacementXMat[a$frame,i] <- a$x
displacementYMat[a$frame,i] <- a$y
}
# find the 90th centile longest trace (in terms of frame offset)
tListmax2 <- quantile(unlist(lapply(tList, max)), probs=.9)
# dt should be up to 1/4 of number of data points (default)
numberOfdeltaT = floor(tListmax2/N)
if(numberOfdeltaT == 0) {
cat("The number of frames is too few to calculate MSD.\n")
return(NULL)
}
# make matrix to store the averaged msd summary
msd <- matrix(data = NA, nrow = numberOfdeltaT, ncol = 5 )
colnames(msd) <- c("mean", "sd", "n", "size", "t")
tstep <- df$t[match(1,df$frame)]
# make matrix to store the msd curve for each track
trackmsd <- matrix(data = NA, nrow = numberOfdeltaT, ncol = length(traceList) )
colnames(trackmsd) <- traceList
for(deltaT in 1 : numberOfdeltaT){
# calculate displacement between points for x and y
deltaXCoords <- displacementXMat[(1 + deltaT) : tListmax,] - displacementXMat[1 : (tListmax-deltaT),]
deltaYCoords <- displacementYMat[(1 + deltaT) : tListmax,] - displacementYMat[1 : (tListmax-deltaT),]
# calculate squared displacement
squaredDisplacement <- deltaXCoords**2 + deltaYCoords**2
# we collect the MSD for each track (column)
if(length(dim(squaredDisplacement)) < 2) {
next
}
eachSquaredDisplacement <- colMeans(squaredDisplacement, na.rm = TRUE)
# summary statistics for each deltaT
if(method == "ensemble") {
# store the ensemble data
msd[deltaT,1] <- mean(squaredDisplacement, na.rm = TRUE) # average
msd[deltaT,2] <- sd(squaredDisplacement, na.rm = TRUE) # standard deviation
msd[deltaT,3] <- sum(!is.na(squaredDisplacement)) # n
} else {
# store the time-averaged data
msd[deltaT,1] <- mean(eachSquaredDisplacement, na.rm = TRUE) # average
msd[deltaT,2] <- sd(eachSquaredDisplacement, na.rm = TRUE) # standard deviation
msd[deltaT,3] <- sum(!is.na(eachSquaredDisplacement)) # n
}
# place the MSD for this deltaT and for every track into the matrix
trackmsd[deltaT,] <- eachSquaredDisplacement
}
# send msd curves for each track to compute the diffusive behaviour
alphas <- calculateAlpha(trackmsd, tstep)
# send x y displacements (1 frame lag) to calculate CVE
deltaXCoords <- displacementXMat[2 : tListmax,] - displacementXMat[1 : (tListmax-1),]
deltaYCoords <- displacementYMat[2 : tListmax,] - displacementYMat[1 : (tListmax-1),]
cves <- calculateCVE(deltaXCoords, deltaYCoords, traceList, tstep)
# format msd matrix into data frame
msd <- as.data.frame(msd)
msd$size <- c(1 : numberOfdeltaT)
msd$t <- msd$size * tstep
msdList <- list(msd,alphas,cves)
return(msdList)
}