Permalink
Branch: master
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
166 lines (117 sloc) 6.48 KB
library (tolerance)
library (plyr)
library (EnvStats)
# SET filename, folder, runs_to_keep
filename = "C:\\Users\\ngpc4\\Documents\\FusionB\\all_combined.csv"
folder="C:\\Users\\ngpc4\\Documents\\FusionB\\RunControlSpecification\\"
df <- read.csv (filename, header=TRUE)
runs_to_keep <- readLines ("C:\\Users\\ngpc4\\Documents\\FusionB\\RunControlSpecification\\RunControl_Runs.txt")
df <- df[df$Run %in% runs_to_keep,]
df[,"UniqueID"] = paste (df[,"Run"],df[,"Well"])
# SET the wells to exclude (remove more problematic wells)
df_problem <- read.csv ("C:\\Users\\ngpc4\\Documents\\FusionB\\wells_to_exclude.csv", header=TRUE)
problem_samples <- paste (df_problem[,"Run"], df_problem[,"Well"])
df <- df[!(df$UniqueID %in% problem_samples), ]
confidence_levels <- c(0.99, 0.95, 0.90)
coverage_levels <- c(0.900, 0.950, 0.990)
nonparametric_coverage_levels <- c (0.90, 0.95)
# Positive controls
positive_control = df[df$Content == 'Pos Ctrl-1' | df$Content == 'Pos Ctrl-2' | df$Content == 'Pos Ctrl-3' | df$Content == 'Pos Ctrl-4', ]
positive_control = positive_control[positive_control$Fluor == 'Texas Red' | positive_control$Fluor == 'Cy5' | positive_control$Fluor == 'FAM',]
#positve_control2 = df2[df2$Content != 'NTC-1' | df2$Content == 'NTC-2',]
levels(droplevels(positive_controls$Well))
pc_TexRed = positive_control[positive_control$Fluor == 'Texas Red',]
pc_Cy5 = positive_control[positive_control$Fluor == 'Cy5', ]
pc_Fam = positive_control[positive_control$Fluor == 'FAM', ]
negative_control = df[df$Content == 'NTC-1' , ]
negative_control = negative_control[negative_control$Fluor == 'HEX',]
all_controls <- merge (positive_control, negative_control, all=TRUE)
levels(droplevels(all_controls$Well))
levels(droplevels(all_controls$Fluor))
#source("http://goo.gl/UUyEzD")
# SET path to R functions
source ("C:\\Users\\ngpc4\\Documents\\Biostatistics\\R_functions.R.txt")
png(filename= paste (folder, "TexRed_outlier.png", sep =""))
pc_TexRed_noOutliers <- outlierKD (pc_TexRed, Cq)
dev.off()
png(filename=paste (folder, "Cy5_outlier.png", sep =""))
pc_Cy5_noOutliers <- outlierKD (pc_Cy5, Cq)
dev.off()
png(filename= paste (folder, "FAM_outlier.png", sep ="") )
pc_Fam_noOutliers <- outlierKD (pc_Fam, Cq)
dev.off()
png(filename= paste (folder, "HEX_outlier.png", sep ="") )
negative_control_noOutliers <- outlierKD (negative_control, Cq)
dev.off()
# Cy5 not included, creating error
all_controls_no_outliers <- merge (pc_TexRed_noOutliers, pc_Fam_noOutliers, all=TRUE)
all_controls_no_outliers <- merge (all_controls_no_outliers, negative_control_noOutliers, all=TRUE)
#outlier_removed <- ddply (all_controls, c('Fluor'), .fun = outlierKD, var=Cq ) - does'nt work, throws errors
shapiro.test (pc_TexRed_noOutliers$Cq)
shapiro.test (pc_Cy5_noOutliers$Cq)
shapiro.test (pc_Fam_noOutliers$Cq)
shapiro.test (negative_control_noOutliers$Cq)
#alpha_num = 1 – confidence level
#normtol.int (ct_values, alpha = alpha_num, P = coverage_level, side=2)
# test
# nptol.int (df$Ct, alpha=alpha_num, P= coverage_level, side=2)
# DEFINE FUNCTIONS, looking for column $Ct, Fluor, Target, and Sample
normtol_function <- function (df, alpha_num, coverage_level) {
cleaned_df <- df[!is.na(df$Cq),]
return (normtol.int (cleaned_df$Cq, alpha=alpha_num, P= coverage_level, side=2) )
}
nptol_function <- function (df, alpha_num, coverage_level) {
cleaned_df <- df[!is.na(df$Cq),]
result <- (nptol.int (cleaned_df$Cq, alpha=alpha_num, P= coverage_level, side=2) )
#lower_bound <- result[result$2-sided.lower,]
#upper_bound <- result[result$2-sided.upper,]
return (round(result,2))
}
summary_function <- function (df) {
cleaned_df <- df[!is.na(df$Cq),]
count <- nrow (cleaned_df)
mean <- mean (cleaned_df$Cq)
median <- median (cleaned_df$Cq)
sd <- sd (cleaned_df$Cq)
cv <- sd/mean *100
results <- data.frame (matrix (ncol=13,nrow=1))
percentiles <- quantile(cleaned_df$Cq, probs = c(0.01, 0.05, 0.1, 0.90, .95, .99))
min <- min (cleaned_df$Cq)
max <- max (cleaned_df$Cq)
colnames (results) <- c("Number of observations", "Mean", "Median", "Standard Deviation", "% CV", "1rst percentile", "5th percentile", "10th percentile", "90th %", "95%", "99%", "Min", "Max")
results[1,] <- round (c(count, mean, median, sd, cv, percentiles, min, max),2)
return (results)
}
# PREDICTION INTERVALS
# initialize
current_normal <- data.frame (matrix (ncol=8, nrow=0))
x <- c ("Fluor", "Target", "Sample", "alpha", "P", "x.bar", "2-sided.lower", "2-sided.upper")
colnames (current_normal) <- x
current_nonparametric <- data.frame (matrix (ncol=5, nrow=0))
y <- c ("Fluor", "alpha", "P", "2-sided.lower", "2-sided.upper")
#colnames (current_nonparametric) <- y
confidence_levels <- c(0.99, 0.95, 0.90)
coverage_levels <- c(0.900, 0.950, 0.990)
nonparametric_coverage_levels <- c (0.90, 0.95, 0.99)
# assume these have been cleaned
#all_controls <- merge (pc_TexRed, pc_Cy5, all=TRUE)
#all_controls <- merge (all_controls, pc_Fam, all=TRUE)
#all_controls <- merge (all_controls, negative_control, all=TRUE)
for (confidence_level in confidence_levels) {
alpha_num = 1 - confidence_level
for (coverage_level in nonparametric_coverage_levels) {
#current_nonparametric <- merge (current_nonparametric, c(nptol_function (pc_TexRed,alpha_num, coverage_level ), "Texas Red"), all=TRUE)
#current_nonparametric <- merge (current_nonparametric, c(nptol_function (pc_Cy5, alpha_num, coverage_level), "Cy5"), all=TRUE)
#current_nonparametric <- merge (current_nonparametric, c(nptol_function (pc_Fam, alpha_num, coverage_level), "Fam"), all=TRUE)
#current_nonparametric <- merge (current_nonparametric, c(nptol_function (negative_control, alpha_num, coverage_level), "Neg"), all=TRUE)
nonparametric <- ddply (all_controls_no_outliers, c('Fluor'), .fun = nptol_function, alpha=alpha_num, coverage_level=coverage_level )
if (all (is.na (current_nonparametric))) {
current_nonparametric <- nonparametric
} else {
current_nonparametric <- merge (current_nonparametric, nonparametric, all=TRUE)
}
} # end coverage_level
} # end confidence_level
write.csv(current_nonparametric, file = paste (folder, "Confidence_Intervals.csv", sep=""),row.names=TRUE)
summary_statistics <- ddply (all_controls_no_outliers, c('Fluor'),.fun=summary_function)
write.csv(summary_statistics, file = paste (folder,"Control_Summary_Statistics.csv",sep=""),row.names=TRUE)