In [1]:
if (!require("pcalg")) install.packages("pcalg")
if (!require("graph")) install.packages("graph")
if (!require("ggm")) install.packages("ggm")
if (!require("Rgraphviz")) BiocManager::install("Rgraphviz")
if (!require("data.table")) install.packages("data.table")
library(pcalg)
library(graph)
library(ggm)
library(Rgraphviz)
library(data.table)
set.seed(42)

Loading required package: pcalg

Loading required package: graph

Loading required package: BiocGenerics


Attaching package: ‘BiocGenerics’


The following objects are masked from ‘package:stats’:

    IQR, mad, sd, var, xtabs


The following objects are masked from ‘package:base’:

    anyDuplicated, aperm, append, as.data.frame, basename, cbind,
    colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
    get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
    match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
    Position, rank, rbind, Reduce, rownames, sapply, saveRDS, setdiff,
    table, tapply, union, unique, unsplit, which.max, which.min


Loading required package: ggm

Loading required package: Rgraphviz

Loading required package: grid

Loading required package: data.table



In [3]:
inspect_pc_matrix <- function(adj_mat, file_prefix) {
    con <- file(paste0(file_prefix, "_matrix_info.txt"), "w")
    
    cat("PC Matrix Dimensions:", dim(adj_mat)[1], "x", dim(adj_mat)[2], "\n", file=con)
    cat("PC Matrix Column Names:", paste(colnames(adj_mat), collapse=", "), "\n", file=con)
    cat("PC Matrix Row Names:", paste(rownames(adj_mat), collapse=", "), "\n", file=con)
    
    cat("Contains NA values:", any(is.na(adj_mat)), "\n", file=con)
    cat("Contains Inf values:", any(is.infinite(adj_mat)), "\n", file=con)
    
    unique_vals <- unique(as.vector(adj_mat))
    cat("Unique values in matrix:", paste(unique_vals, collapse=", "), "\n", file=con)
    
    cat("Number of zeros:", sum(adj_mat == 0), "\n", file=con)
    cat("Number of ones:", sum(adj_mat == 1), "\n", file=con)
    
    bidir_count <- 0
    for(i in 1:nrow(adj_mat)) {
        for(j in 1:ncol(adj_mat)) {
            if(i < j && adj_mat[i,j] != 0 && adj_mat[j,i] != 0) {
                bidir_count <- bidir_count + 1
            }
        }
    }
    cat("Number of bidirectional edges:", bidir_count, "\n", file=con)
    
    close(con)
}

In [5]:
load_tep_data <- function(data_path) {
    all_data <- list()
    selected_vars <- c(
        "XMEAS.1.", "XMEAS.2.", "XMEAS.3.", "XMEAS.4.", "XMEAS.5.", 
        "XMEAS.6.", "XMEAS.7.", "XMEAS.8.", "XMEAS.9.", "XMEAS.10.",
        "XMEAS.11.", "XMEAS.12.", "XMEAS.13.", "XMEAS.14.", "XMEAS.15.",
        "XMEAS.16.", "XMEAS.17.", "XMEAS.18.", "XMEAS.19.", "XMEAS.20.",
        "XMEAS.21.", "XMEAS.22.", "XMEAS.23.", "XMEAS.24.", "XMEAS.25.",
        "XMEAS.26.", "XMEAS.27.", "XMEAS.28.", "XMEAS.29.", "XMEAS.30.",
        "XMEAS.31.", "XMEAS.32.", "XMEAS.33.", "XMEAS.34.", "XMEAS.35.",
        "XMEAS.36.", "XMEAS.37.", "XMEAS.38.", "XMEAS.39.", "XMEAS.40.",
        "XMEAS.41.",
        "XMV.1.", "XMV.2.", "XMV.3.", "XMV.4.", "XMV.5.",
        "XMV.6.", "XMV.7.", "XMV.8.", "XMV.9.", "XMV.10.",
        "XMV.11."
    )
    
    d00 <- as.data.table(read.csv(file.path(data_path, "d00.csv")))
    d00$FaultBinary <- 0
    d00 <- d00[, c(selected_vars, "FaultBinary"), with=FALSE]
    all_data[[1]] <- d00
    
    for(i in 1:21) {
        fault_file <- file.path(data_path, sprintf("d%02d.csv", i))
        if(file.exists(fault_file)) {
            di <- as.data.table(read.csv(fault_file))
            di$FaultBinary <- 1
            di <- di[, c(selected_vars, "FaultBinary"), with=FALSE]
            all_data[[i+1]] <- di
        }
    }
    
    combined_data <- rbindlist(all_data)
    return(as.matrix(combined_data))
}

In [7]:
preprocess_data <- function(data_matrix) {
    scaled_data <- scale(data_matrix)
    scaled_data[is.na(scaled_data)] <- 0
    scaled_data[is.infinite(scaled_data)] <- 0
    scaled_data <- scaled_data + matrix(rnorm(prod(dim(scaled_data)), 0, 1e-10), 
                                      nrow=nrow(scaled_data))
    return(scaled_data)
}

create_balanced_dataset <- function(data_matrix, sample_size = 2000) {
    normal_idx <- which(data_matrix[, "FaultBinary"] == 0)
    fault_idx <- which(data_matrix[, "FaultBinary"] == 1)
    
    n_samples <- min(sample_size/2, length(normal_idx), length(fault_idx))
    sampled_normal <- sample(normal_idx, n_samples)
    sampled_fault <- sample(fault_idx, n_samples)
    
    balanced_data <- data_matrix[c(sampled_normal, sampled_fault), ]
    return(balanced_data)
}

In [9]:
safe_plot <- function(graph_obj, graph_type, plots_dir, matrices_dir, data) {
    file_id <- sprintf("%s_analysis", graph_type)
    file_path <- file.path(plots_dir, file_id)
    pdf_file <- paste0(file_path, ".pdf")
    
    if (graph_type %in% c("rfci")) {
        adj_mat <- graph_obj@amat
        write.csv(adj_mat, file.path(matrices_dir, sprintf("%s_raw_amat.csv", graph_type)))
    } else if (graph_type == "pc") {
        adj_mat <- as(graph_obj@graph, "matrix")
    }
    
    write.csv(adj_mat, file.path(matrices_dir, sprintf("%s_amat.csv", graph_type)))
    
    fault_idx <- which(colnames(adj_mat) == "FaultBinary")
    
    tryCatch({
        cat(sprintf("Trying manual DOT approach for %s...\n", graph_type))
        
        dot_file <- file.path(plots_dir, paste0(file_id, "_debug.dot"))
        cat("Creating debug DOT file:", dot_file, "\n")
        
        dot_content <- "digraph G {\n"
        dot_content <- paste0(dot_content, "  rankdir=TB;\n")
        dot_content <- paste0(dot_content, "  node [shape=circle, style=filled, fillcolor=white, fontname=Arial];\n")
        dot_content <- paste0(dot_content, "  edge [arrowsize=0.8, fontname=Arial];\n")
        
        for (i in 1:ncol(adj_mat)) {
            node_name <- paste0("node", i)
            
            if (i == fault_idx) {
                dot_content <- paste0(dot_content, "  ", node_name, " [label=<<b>", 
                                     gsub("\"", "", colnames(adj_mat)[i]), "</b>>];\n")
            } else {
                dot_content <- paste0(dot_content, "  ", node_name, " [label=\"", 
                                     gsub("\"", "", colnames(adj_mat)[i]), "\"];\n")
            }
        }
        
        if (graph_type == "pc") {
            inspect_pc_matrix(adj_mat, file.path(plots_dir, file_id))
            
            for (i in 1:nrow(adj_mat)) {
                for (j in 1:ncol(adj_mat)) {
                    if (adj_mat[i,j] != 0) {
                        from_node <- paste0("node", i)
                        to_node <- paste0("node", j)
                        
                        if (adj_mat[i,j] != 0 && adj_mat[j,i] != 0) {
                            if (i < j) {
                                dot_content <- paste0(dot_content, "  ", from_node, " -> ", to_node, 
                                                   " [dir=both, arrowhead=normal, arrowtail=normal];\n")
                            }
                        } else {
                            dot_content <- paste0(dot_content, "  ", from_node, " -> ", to_node, ";\n")
                        }
                    }
                }
            }
        } else if (graph_type == "rfci") {
            for (i in 1:nrow(adj_mat)) {
                for (j in 1:ncol(adj_mat)) {
                    if (adj_mat[i,j] != 0) {
                        from_node <- paste0("node", i)
                        to_node <- paste0("node", j)
                        
                        if (adj_mat[i,j] != 0 && adj_mat[j,i] != 0) {
                            if (i < j) { 
                                dot_content <- paste0(dot_content, "  ", from_node, " -> ", to_node, 
                                                   " [dir=both, arrowhead=normal, arrowtail=normal];\n")
                            }
                        } else {
                            dot_content <- paste0(dot_content, "  ", from_node, " -> ", to_node, ";\n")
                        }
                    }
                }
            }
        }
        
        dot_content <- paste0(dot_content, "}\n")
        writeLines(dot_content, dot_file)
        
        cat("DOT file content (first 10 lines):\n")
        system(paste("head -10", dot_file))
        
        cat("Validating DOT file syntax...\n")
        test_dot_file <- paste0(dot_file, ".test")
        writeLines(dot_content, test_dot_file)
        test_cmd <- paste("dot -Tplain", shQuote(test_dot_file), "> /dev/null 2>&1")
        test_result <- system(test_cmd)
        
        if (test_result != 0) {
            cat("DOT syntax validation failed. Creating minimalist DOT file.\n")
            
            dot_content <- "digraph G {\n"
            dot_content <- paste0(dot_content, "  node [shape=circle];\n")
            
            for (i in 1:ncol(adj_mat)) {
                node_name <- paste0("node", i)
                if (i == fault_idx) {
                    dot_content <- paste0(dot_content, "  ", node_name, " [label=<<b>", 
                                         gsub("\"", "", colnames(adj_mat)[i]), "</b>>];\n")
                } else {
                    dot_content <- paste0(dot_content, "  ", node_name, " [label=\"", 
                                         gsub("\"", "", colnames(adj_mat)[i]), "\"];\n")
                }
            }
            
            edge_count <- 0
            for (i in 1:nrow(adj_mat)) {
                if (edge_count >= 5) break  
                
                for (j in 1:ncol(adj_mat)) {
                    if (i != j && adj_mat[i,j] != 0) {
                        from_node <- paste0("node", i)
                        to_node <- paste0("node", j)
                        dot_content <- paste0(dot_content, "  ", from_node, " -> ", to_node, ";\n")
                        edge_count <- edge_count + 1
                        
                        if (edge_count >= 5) break
                    }
                }
            }
            dot_content <- paste0(dot_content, "}\n")
            writeLines(dot_content, dot_file)
        }
        
        pdf_output <- file.path(plots_dir, paste0(file_id, ".pdf"))
        dot_cmd <- paste("dot -Tpdf", shQuote(dot_file), "-o", shQuote(pdf_output))
        
        cat("Executing DOT command:", dot_cmd, "\n")
        system_result <- system(dot_cmd)
        
        if (system_result == 0) {
            cat("Successfully created PDF using manual DOT approach\n")
            return(TRUE)
        } else {
            cat("Manual DOT approach failed with exit code:", system_result, "\n")
            cat("Trying to generate Graphviz version info for debugging:\n")
            system("dot -V")
            cat("Falling back to standard plotting\n")
            return(FALSE)
        }
    }, error = function(e) {
        cat(sprintf("Manual DOT approach failed for %s: %s\n", graph_type, e$message))
        return(FALSE)
    })
    
    tryCatch({
        cat(sprintf("Using standard plot for %s...\n", graph_type))
        pdf(pdf_file, width = 15, height = 15)
        par(mar = c(2, 2, 4, 2), oma = c(1, 1, 1, 1), cex = 1.2)
        
        if (length(fault_idx) > 0) {
            if (graph_type == "rfci") {
                nodeRenderInfo <- list(fontsize = 16)
                
                nodeRenderInfo$fontsize <- rep(16, ncol(adj_mat))
                nodeRenderInfo$fontsize[fault_idx] <- 20
                
                plot(graph_obj, 
                     main = sprintf("%s Causal Graph Analysis", toupper(graph_type)),
                     nodeAttrs = list(fontsize = nodeRenderInfo$fontsize))
            } else if (graph_type == "pc") {
                g <- graph_obj@graph
                
                nAttrs <- list(fontsize = rep(16, ncol(adj_mat)))
                nAttrs$fontsize[fault_idx] <- 20
                
                plot(g, 
                     main = sprintf("PC Causal Graph Analysis"),
                     nodeAttrs = list(fontsize = nAttrs$fontsize))
            }
        } else {
            if (graph_type == "rfci") {
                plot(graph_obj, main = sprintf("%s Causal Graph Analysis", toupper(graph_type)))
            } else if (graph_type == "pc") {
                plot(graph_obj, main = sprintf("PC Causal Graph Analysis"))
            }
        }
        dev.off()
        return(TRUE)
    }, error = function(e) {
        cat(sprintf("Standard plot failed for %s: %s\n", graph_type, e$message))
        return(FALSE)
    })
    
    cat(sprintf("Both plotting methods failed for %s. Creating text representation.\n", graph_type))
    write.table(
        sprintf("Adjacency matrix for %s analysis", graph_type),
        file = paste0(file_path, ".txt"),
        row.names = FALSE, col.names = FALSE, quote = FALSE
    )
    write.table(
        adj_mat,
        file = paste0(file_path, ".txt"),
        append = TRUE,
        row.names = TRUE, col.names = TRUE
    )
    return(FALSE)
}

In [11]:
run_causal_analysis <- function(data_path, save_path_base) {
    plots_dir <- file.path(dirname(save_path_base), "plots")
    if (!dir.exists(plots_dir)) {
        dir.create(plots_dir, recursive = TRUE)
    }
    
    matrices_dir <- file.path(dirname(save_path_base), "matrices")
    if (!dir.exists(matrices_dir)) {
        dir.create(matrices_dir, recursive = TRUE)
    }
    
    cat("Loading and preprocessing data...\n")
    full_data <- load_tep_data(data_path)
    balanced_data <- create_balanced_dataset(full_data)
    processed_data <- preprocess_data(balanced_data)
    
    n <- nrow(processed_data)
    var_names <- colnames(balanced_data)
    
    suffStat <- list(C = cor(processed_data), n = n)
    
    cat("\nPerforming RFCI algorithm...\n")
    rfci_result <- rfci(suffStat, 
                       indepTest = gaussCItest,
                       alpha = 0.05,
                       labels = var_names)
    
    cat("\nPerforming PC algorithm...\n")
    pc_result <- pc(suffStat,
                   indepTest = gaussCItest,
                   alpha = 0.05,
                   labels = var_names)
    
    cat("\nPlotting RFCI graph...\n")
    safe_plot(rfci_result, "rfci", plots_dir, matrices_dir, balanced_data)
    
    cat("\nPlotting PC graph...\n")
    safe_plot(pc_result, "pc", plots_dir, matrices_dir, balanced_data)
    
    rfci_adj <- rfci_result@amat
    pc_adj <- as(pc_result@graph, "matrix")
    
    fault_idx <- which(colnames(rfci_adj) == "FaultBinary")
    
    if (length(fault_idx) > 0) {
        rfci_fault_connections <- colnames(rfci_adj)[which(rfci_adj[, fault_idx] != 0 | rfci_adj[fault_idx, ] != 0)]
        pc_fault_connections <- colnames(pc_adj)[which(pc_adj[, fault_idx] != 0 | pc_adj[fault_idx, ] != 0)]
        
        summary_df <- data.frame(
            Method = c("RFCI", "PC"),
            Total_Edges = c(sum(rfci_adj != 0)/2, sum(pc_adj != 0)/2),
            Fault_Connected_Variables = c(length(rfci_fault_connections), length(pc_fault_connections)),
            Incoming_Fault_Edges = c(sum(rfci_adj[, fault_idx] != 0), sum(pc_adj[, fault_idx] != 0)),
            Outgoing_Fault_Edges = c(sum(rfci_adj[fault_idx, ] != 0), sum(pc_adj[fault_idx, ] != 0))
        )
        
        write.csv(summary_df, file.path(matrices_dir, "causal_summary.csv"), row.names = FALSE)
        
        write.csv(
            list(
                RFCI_Fault_Connections = rfci_fault_connections,
                PC_Fault_Connections = pc_fault_connections
            ),
            file.path(matrices_dir, "fault_connections.csv")
        )
    } else {
        cat("Warning: FaultBinary column not found in adjacency matrices\n")
    }
    
    cat("\nAnalysis complete. Results saved to", matrices_dir, "and", plots_dir, "\n")
    
    return(list(
        rfci = rfci_result,
        pc = pc_result,
        rfci_adj = rfci_adj,
        pc_adj = pc_adj
    ))
}

In [13]:
data_path <- "Downloads/data_tep/"
save_path_base <- "Downloads/shap"

results <- run_causal_analysis(data_path, save_path_base)

Loading and preprocessing data...

Performing RFCI algorithm...

Performing PC algorithm...

Plotting RFCI graph...
Trying manual DOT approach for rfci...
Creating debug DOT file: Downloads/plots/rfci_analysis_debug.dot 
DOT file content (first 10 lines):
Validating DOT file syntax...
Executing DOT command: dot -Tpdf 'Downloads/plots/rfci_analysis_debug.dot' -o 'Downloads/plots/rfci_analysis.pdf' 
Successfully created PDF using manual DOT approach

Plotting PC graph...
Trying manual DOT approach for pc...
Creating debug DOT file: Downloads/plots/pc_analysis_debug.dot 
DOT file content (first 10 lines):
Validating DOT file syntax...
Executing DOT command: dot -Tpdf 'Downloads/plots/pc_analysis_debug.dot' -o 'Downloads/plots/pc_analysis.pdf' 
Successfully created PDF using manual DOT approach

Analysis complete. Results saved to Downloads/matrices and Downloads/plots 
