updates to several functionsi

ococrook · May 15, 2024 · 524a22a · 524a22a
1 parent b042bd3
commit 524a22a
Show file tree

Hide file tree

Showing 6 changed files with 56 additions and 21 deletions.
diff --git a/R/ReX-utils.R b/R/ReX-utils.R
@@ -210,6 +210,9 @@ uptakePredict <- function(params){
     )
   })
 
+  # put 0s for prolines
+  out[, is.na(params@summary@Rex.resolution[,2])] <- 0
+
   out_long <- DataFrame(Residue = rep(pe$Residues, each = length(timepoints)),
                         timepoints = rep(timepoints, times = length(pe$Residues)),
                         Uptake = as.vector(out))
@@ -393,9 +396,13 @@ marginalEffect <- function(params,
 
   }
 
+  # put 0s for prolines
+  out_sub <- out[,Residues]
+  out_sub[, is.na(params@summary@Rex.resolution[, 2])] <- 0  
+
   out_long[[j]]  <- DataFrame(Residue = rep(Residues, each = length(timepoints)),
                               timepoints = rep(timepoints, times = length(Residues)),
-                              Uptake = as.vector(out[,Residues]),
+                              Uptake = as.vector(out_sub),
                               mcmcIter = rep(i, each = length(Residues) * length(timepoints)))
 
 

diff --git a/R/Rex-conformationalSignatures.R b/R/Rex-conformationalSignatures.R
@@ -10,6 +10,8 @@
 ##' @param pca_params The parameters to use for the PCA. 
 ##' Default is list(scale = FALSE, center = TRUE)
 ##' 
+##' 
+##' 
 ##' @return A list containing the PCA object and the TRE values in wide format
 ##' 
 ##' @examples
@@ -47,21 +49,24 @@ UnsupervisedCSA <- function(RexDifferentialList,
 
   timepoints <- as.numeric(gsub(".*?([0-9]+).*", "\\1", colnames(TRE[[1]])))
 
+  residue_list <- lapply(RexDifferentialList, function(x) x@Rex.estimates$Resdiues)
+
   df_TRE <- DataFrame(TRE = do.call(rbind, TRE),
                       probs = do.call(rbind, probs),
-                      Residues = rep(seq.int(nrow(TRE[[1]])),
-                                     times = length(states)),
-                      states = rep(states,
-                                   each = nrow(TRE[[1]])))
+                      Residues = unlist(residue_list),
+                      states = rep(states, times = sapply(residue_list, length)))
 
-  values_from <- grep(paste0(quantity, "_", whichTimepoint), colnames(df_TRE))
+  values_from <- grep(paste0(quantity, "_", whichTimepoint), colnames(df_TRE))[1]
 
   TRE_states_wide <- data.frame(df_TRE) %>% 
     pivot_wider(names_from = states,
                 values_from = all_of(values_from),
                 id_cols = "Residues")
 
-  pca_states <- prcomp(t(TRE_states_wide[, -1]),
+  quant_df <- t(TRE_states_wide[, -1])
+  quant_df_full <- quant_df[,colSums(is.na(quant_df)) == 0 ]
+
+  pca_states <- prcomp(quant_df_full,
                        scale = pca_params$scale,
                        center = pca_params$center)
 
@@ -287,16 +292,16 @@ supervisedCSA <- function(RexDifferentialList,
 
   timepoints <- as.numeric(gsub(".*?([0-9]+).*", "\\1", colnames(TRE[[1]])))
 
+  residue_list <- lapply(RexDifferentialList, function(x) x@Rex.estimates$Resdiues)
+
   df_TRE <- DataFrame(TRE = do.call(rbind, TRE),
                       probs = do.call(rbind, probs),
-                      Residues = rep(seq.int(nrow(TRE[[1]])),
-                                     times = length(states)),
-                      states = rep(states,
-                                   each = nrow(TRE[[1]])))
+                      Residues = unlist(residue_list),
+                      states = rep(states, times = sapply(residue_list, length)))
 
   df_opls <- cbind(df_TRE, labels[df_TRE$states, ])
 
-  values_from <- grep(paste0(quantity, "_", whichTimepoint), colnames(df_opls))
+  values_from <- grep(paste0(quantity, "_", whichTimepoint), colnames(df_opls))[1]
 
   opls_states_wide <- data.frame(df_opls) %>% 
     pivot_wider(names_from = states,
@@ -305,27 +310,43 @@ supervisedCSA <- function(RexDifferentialList,
 
   if (type == "catagorical") {
 
-    col_subset <-  seq.int(nrow(labels))[labels[,whichlabel] != "Unknown"] + 1
+    col_subset <-  seq.int(nrow(labels))[labels[, whichlabel] != "Unknown"] + 1
     df_reduced <- opls_states_wide[,
                                    col_subset]
     annotations <- labels[labels[, whichlabel] != "Unknown", whichlabel]
   } else {
 
-    col_subset <-  seq.int(nrow(labels))[!is.na(as.numeric(labels[,whichlabel]))] + 1
+    col_subset <-  seq.int(nrow(labels))[!is.na(as.numeric(labels[, whichlabel]))] + 1
     df_reduced <- opls_states_wide[,
                                    col_subset]
     annotations <- as.numeric(labels[!is.na(as.numeric(labels[, whichlabel])),
                                      whichlabel])
 
   }
 
+  cross_val <- min(7, ncol(df_reduced)/2)
+
   if (type == "catagorical") {
-    states.plsda <- opls(x = t(df_reduced),
+
+    df_reduced <- data.frame(df_reduced)
+    rownames(df_reduced) <- opls_states_wide$Residues
+    df <- t(df_reduced)
+    df_na_remove <- df[, colSums(is.na(df)) == 0]
+
+    states.plsda <- opls(x = df_na_remove,
                          y =  factor(as.numeric(annotations)),
+                         crossvalI = cross_val,
                          orthoI = orthoI)
   } else {
-    states.plsda <- opls(x = t(df_reduced),
+
+    df_reduced <- data.frame(df_reduced)
+    rownames(df_reduced) <- opls_states_wide$Residues
+    df <- t(df_reduced)
+    df_na_remove <- df[, colSums(is.na(df)) == 0]
+
+    states.plsda <- opls(x = df_na_remove,
                          y =  as.numeric(annotations),
+                         crossvalI = cross_val,
                          orthoI = orthoI)
   }
 
@@ -525,9 +546,9 @@ plotLoadingSCSA <- function(states.plsda,
 
   df_loadings <- data.frame(loadings1 = states.plsda@loadingMN[,1], 
                             loadings2 = states.plsda@orthoLoadingMN[,1],
-                            residues = rownames(states.plsda@loadingMN))
+                            residues = as.numeric(rownames(states.plsda@loadingMN)))
 
-  df_loadings$residues <- as.numeric(sub(".", "", df_loadings$residues))
+  #df_loadings$residues <- as.numeric(sub(".", "", df_loadings$residues))
 
   if (whichLoading == "predictive") {
     df_loadings$loadings1 <- states.plsda@loadingMN[,1]

diff --git a/R/Rex-differential.R b/R/Rex-differential.R
@@ -82,7 +82,9 @@ processDifferential <- function(HdxData,
     pilong <- params@summary@posteriorEstimates$pilong
     qlong <- params@summary@posteriorEstimates$qlong
     dlong <- params@summary@posteriorEstimates$dlong
-    sigma <- params@summary@Rex.globals$sigma[whichChain]
+    sigma <- params@summary@Rex.globals$sigma[1]
+
+    names(blong) <- names(pilong) <- names(qlong) <- names(dlong) <- Residues
 
     err <- error_prediction(
         res = HdxData,

diff --git a/R/Rex-error.R b/R/Rex-error.R
@@ -25,7 +25,11 @@ error_prediction <- function(res, blong, pilong, qlong, dlong, phi) {
 
     min_index <- min(res$Start)
     max_index <- max(res$End)
-    colnames(out_res) <- seq(min_index, max_index)
+
+    colnames(out_res) <- names(blong)
+
+    # subset only to columns observed in this data
+    out_res <- out_res[, as.character(seq(min_index, max_index))]
 
 
     diff_coupling <- matrix(NA, nrow = length(unique(res$Sequence)), ncol = numtimepoints)

diff --git a/R/Rex-function.R b/R/Rex-function.R
@@ -346,6 +346,7 @@ RexProcess <- function(HdxData,
         rownames(.diagnoistics) <- c("sigma")
     }
 
+    names(blong) <- names(pilong) <- names(qlong) <- names(dlong) <- seq.int(R)
 
     # make error predictions
     err <- error_prediction(

diff --git a/R/rex-plotting.R b/R/rex-plotting.R
@@ -435,7 +435,7 @@ plotResidueResolution <- function(rex_params,
     geom_point(alpha = 0.9, size = 2) + 
     theme_bw() + 
     geom_line() + 
-    ylim(0, max(abs(df_butterfly$ARE))*1.1) +
+    ylim(0, max(abs(df_butterfly$ARE), na.rm = TRUE )*1.1) +
     ylab("ARE") + 
     facet_wrap(.~timepoints, nrow = nrow) +
     scale_alpha_continuous(range = c(0,1)) +