Permalink
Browse files

SS adds modified Jaccard distance.

  • Loading branch information...
Sohrab Salehi Sohrab Salehi
Sohrab Salehi authored and Sohrab Salehi committed Dec 23, 2016
1 parent ea67e09 commit 19fb2f6aa7c9554f4414c20942df739830c89342
Showing with 27 additions and 23 deletions.
  1. +13 −13 .Rhistory
  2. +6 −6 .Rproj.user/1240FD37/pcs/windowlayoutstate.pper
  3. +8 −4 R/decay.R
@@ -1,16 +1,3 @@
data <- readRDS(file)
fileName <- file_path_sans_ext(basename(file))
fileName <- gsub('xenograft.real.intersected.curated.', 'tnbc.xenograft.', fileName)
fileName <- gsub('.f0.gl0', '', fileName)
assign(fileName, data)
#save(fileName, file = paste0('./data/', fileName, '.rda'))
devtools::use_data(fileName)
}
rm(list=ls())
library(tools)
baseDir <- '/Users/sohrab/Google Drive/Masters/Thesis/scripts/improve/real.data/ddcrp.all/'
files <- list.files(baseDir, pattern = 'xenograft.real.intersected.curated.*.dat', full.names = T)
for (file in files) {
# file = files[[1]]
data <- readRDS(file)
fileName <- file_path_sans_ext(basename(file))
@@ -510,3 +497,16 @@ bulkDat$mutation_id <- paste0('mut', bulkDat$mutation_id)
seed <- name.utils.get.seed(i)
write.xlsx(bulkDat, file=file.path(targetDir, paste0('_additional_file_4_inputs_simulated', '.xlsx')), sheetName=paste0('seed_', seed, '_allele_counts'), append = T)
}
devtools::document()
devtools::load_all()
ddCLone
ddClone
library(ddClone)
library(ddClone)
library(ddclone)
ddClone
ddclone
modified.jaccard.dist
devtools::document()
devtools::load_all()
library(ddclone)
@@ -1,14 +1,14 @@
{
"left" : {
"panelheight" : 1162,
"splitterpos" : 799,
"panelheight" : 713,
"splitterpos" : 490,
"topwindowstate" : "NORMAL",
"windowheight" : 1200
"windowheight" : 751
},
"right" : {
"panelheight" : 1162,
"splitterpos" : 250,
"panelheight" : 713,
"splitterpos" : 153,
"topwindowstate" : "NORMAL",
"windowheight" : 1200
"windowheight" : 751
}
}
@@ -107,18 +107,22 @@ identity.s <- function(simulatedData, options=NULL) {
}
mean.size <- function(X, FN.rate) {
sum(X) + (length(X) - sum(X)) * FN.rate
}
# For binary vectors X and Y,
# define a as the number of times where exactly one of X_i or Y_i are equal to 1
# define b as the number of times that both X_i and Y_i are equal to 1.
# The modified Jaccard distance is (a + 2*b*FN.rate)/(a+b)
modified.jaccard.dist.vector <- function(X, Y, FN.rate) {
# X = mat[, i]; Y = mat[, j]; FN.rate = .1
a <- sum(xor(X, Y))
b <- sum(X & Y)
(a + 2*b*FN.rate) / (a + b)
XplusY <- mean.size(X, FN.rate) + mean.size(Y, FN.rate)
XintersectY <- sum(xor(X, Y))*FN.rate + sum(X & Y)
(XplusY - 2*XintersectY) / (XplusY - XintersectY)
}
#' A non-symmeteric jaccard distance with respect to FN and FP rates
#' @param sDat a list containing an element \code{filteredMutMatrix}, the genotype (or cell) by mutation matrix
#' @param Options is a list containing an element FN.rate, the estimated false negative rate, FN.rate is mostly contributed to by adoRate

0 comments on commit 19fb2f6

Please sign in to comment.