In [1]:
if (!require("pcalg")) install.packages("pcalg")
if (!require("graph")) install.packages("graph")
if (!require("ggm")) install.packages("ggm")
if (!require("Rgraphviz")) BiocManager::install("Rgraphviz")
if (!require("fastICA")) install.packages("fastICA")
if (!require("data.table")) install.packages("data.table")
if (!require("anchorFCI")) install.packages("anchorFCI") 
if (!require("FCI.Utils")) install.packages("FCI.Utils")  

library(pcalg)
library(graph)
library(ggm)
library(Rgraphviz)
library(fastICA)
library(data.table)
library(anchorFCI)
library(FCI.Utils)  

Loading required package: pcalg

Loading required package: graph

Loading required package: BiocGenerics


Attaching package: ‘BiocGenerics’


The following objects are masked from ‘package:stats’:

    IQR, mad, sd, var, xtabs


The following objects are masked from ‘package:base’:

    anyDuplicated, aperm, append, as.data.frame, basename, cbind,
    colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
    get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
    match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
    Position, rank, rbind, Reduce, rownames, sapply, saveRDS, setdiff,
    table, tapply, union, unique, unsplit, which.max, which.min


Loading required package: ggm

Loading required package: Rgraphviz

Loading required package: grid

Loading required package: fastICA

Loading required package: data.table

Loading required package: anchorFCI

Loading required package: FCI.Utils

“ABI version mismatch: 
lme4 was built with Matrix ABI version 1

In [2]:
set.seed(42)

load_tep_data <- function(data_path, n_vars) {
    all_data <- list()
    selected_vars <- c("XMV.11.", "XMEAS.17.", "XMEAS.20.", "XMV.10.", 
                      "XMEAS.18.", "XMEAS.5.", "XMEAS.24.", "XMEAS.9.", 
                      "XMEAS.21.", "XMEAS.8.", "XMEAS.39.", "XMEAS.1.",
                      "XMEAS.37.", "XMEAS.6.", "XMEAS.14.")
    
    selected_vars <- selected_vars[1:n_vars]
    
    d00 <- as.data.table(read.csv(file.path(data_path, "d00.csv")))
    d00$FaultBinary <- 0
    d00 <- d00[, c(selected_vars, "FaultBinary"), with=FALSE]
    all_data[[1]] <- d00
    
    for(i in 1:21) {
        fault_file <- file.path(data_path, sprintf("d%02d.csv", i))
        if(file.exists(fault_file)) {
            di <- as.data.table(read.csv(fault_file))
            di$FaultBinary <- 1
            di <- di[, c(selected_vars, "FaultBinary"), with=FALSE]
            all_data[[i+1]] <- di
        }
    }
    
    combined_data <- rbindlist(all_data)
    return(as.matrix(combined_data))
}

preprocess_data <- function(data_matrix) {
    scaled_data <- scale(data_matrix)
    
    scaled_data[is.na(scaled_data)] <- 0
    scaled_data[is.infinite(scaled_data)] <- 0
    
    scaled_data <- scaled_data + matrix(rnorm(prod(dim(scaled_data)), 0, 1e-10), 
                                      nrow=nrow(scaled_data))
    
    return(scaled_data)
}

create_balanced_dataset <- function(data_matrix, sample_size = 2000) {
    normal_idx <- which(data_matrix[, "FaultBinary"] == 0)
    fault_idx <- which(data_matrix[, "FaultBinary"] == 1)
    
    n_samples <- min(sample_size/2, length(normal_idx), length(fault_idx))
    sampled_normal <- sample(normal_idx, n_samples)
    sampled_fault <- sample(fault_idx, n_samples)
    
    balanced_data <- data_matrix[c(sampled_normal, sampled_fault), ]
    return(balanced_data)
}

In [3]:
perform_causal_discovery <- function(data, alpha = 0.01) {
    processed_data <- preprocess_data(data)
    n <- nrow(processed_data)
    var_names <- colnames(data)
    
    suffStat <- list(C = cor(processed_data), n = n)
    
    cat("\nPerforming FCI algorithm...\n")
    fci_result <- fci(suffStat, 
                     indepTest = gaussCItest,
                     alpha = alpha,
                     labels = var_names)
    
    cat("\nPerforming RFCI algorithm...\n")
    rfci_result <- rfci(suffStat, 
                       indepTest = gaussCItest,
                       alpha = alpha,
                       labels = var_names)
    
    cat("Performing PC algorithm...\n")
    pc_result <- pc(suffStat,
                   indepTest = gaussCItest,
                   alpha = alpha,
                   labels = var_names)
    
    anchor_ci_test <- function(x, y, S, suffStat) {
        C <- suffStat$C
        n <- suffStat$n
        
        return(gaussCItest(x, y, S, list(C = C, n = n)))
    }
    
    suffStat_anchor <- list(
        C = cor(processed_data),
        n = n,
        data = processed_data  
    )
    
    cat("\nPerforming AnchorFCI algorithm...\n")
    anchorfci_result <- fci(suffStat_anchor, 
                           indepTest = anchor_ci_test,
                           alpha = alpha,
                           labels = var_names)
    
    return(list(
        fci = fci_result,
        rfci = rfci_result,
        pc = pc_result,
        anchorfci = anchorfci_result
    ))
}

analyze_results <- function(results, data, save_path) {
    pdf(save_path, width = 20, height = 20)  
    
    par(mfrow = c(2, 2), mar = c(2, 2, 6, 2)) 
    
    attrs <- list(
        node = list(
            fontsize = 14,
            width = 1.5,
            height = 1.5
        ),
        edge = list(
            len = 2.5
        ),
        graph = list(
            rankdir = "TB",  
            margin = "1,1"   
        )
    )
    
    cat("\nPlotting FCI graph...\n")
    plot(results$fci, attrs = attrs)
    title(sprintf("FCI Causal Graph - %d variables", ncol(data)-1), line = 1)
    
    cat("\nPlotting RFCI graph...\n")
    plot(results$rfci, attrs = attrs)
    title(sprintf("RFCI Causal Graph - %d variables", ncol(data)-1), line = 1)
    
    cat("Plotting PC graph...\n")
    plot(results$pc, main = sprintf("PC Causal Graph - %d variables", ncol(data)-1))
    
    cat("\nPlotting AnchorFCI graph...\n")
    plot(results$anchorfci, attrs = attrs)
    title(sprintf("AnchorFCI Causal Graph - %d variables", ncol(data)-1), line = 1)
    
    par(mfrow = c(1, 1))
    dev.off()
    
    fci_adj <- as(results$fci@amat, "matrix")
    rfci_adj <- as(results$rfci@amat, "matrix")
    pc_adj <- as(results$pc@graph, "matrix")
    anchorfci_adj <- as(results$anchorfci@amat, "matrix")
    
    fault_idx <- which(colnames(rfci_adj) == "FaultBinary")
    fault_connections <- list(
        FCI = colnames(fci_adj)[which(fci_adj[, fault_idx] != 0 | fci_adj[fault_idx, ] != 0)],
        RFCI = colnames(rfci_adj)[which(rfci_adj[, fault_idx] != 0 | rfci_adj[fault_idx, ] != 0)],
        PC = colnames(pc_adj)[which(pc_adj[, fault_idx] != 0 | pc_adj[fault_idx, ] != 0)],
        AnchorFCI = colnames(anchorfci_adj)[which(anchorfci_adj[, fault_idx] != 0 | anchorfci_adj[fault_idx, ] != 0)]
    )
    
    methods <- c("FCI", "RFCI", "PC", "AnchorFCI")
    edge_counts <- c(sum(fci_adj != 0)/2,
                    sum(rfci_adj != 0)/2,
                    sum(pc_adj != 0)/2,
                    sum(anchorfci_adj != 0)/2)
    
    edge_summary <- data.frame(
        Method = methods,
        Total_Edges = edge_counts,
        Fault_Connected_Variables = c(
            length(fault_connections$FCI),
            length(fault_connections$RFCI),
            length(fault_connections$PC),
            length(fault_connections$AnchorFCI)
        )
    )
    
    write.csv(edge_summary, 
              gsub("\\.pdf$", sprintf("_summary_%dvars.csv", ncol(data)-1), save_path))
    
    return(list(
        edge_summary = edge_summary,
        fault_connections = fault_connections
    ))
}

In [4]:
main <- function(data_path, save_path_base) {
    var_sets <- c(7, 10, 12, 15)
    
    results_all <- list()
    
    for(n_vars in var_sets) {
        cat(sprintf("\nProcessing analysis with %d variables...\n", n_vars))
        
        full_data <- load_tep_data(data_path, n_vars)
        
        balanced_data <- create_balanced_dataset(full_data)
        
        results <- perform_causal_discovery(balanced_data)
        
        save_path <- sprintf("%s_%dvars.pdf", save_path_base, n_vars)
        
        comparison <- analyze_results(results, balanced_data, save_path)
        
        results_all[[sprintf("vars_%d", n_vars)]] <- list(
            causal_results = results,
            comparison = comparison
        )
    }
    
    return(results_all)
}

results <- main(
    data_path = "Downloads/data_tep/",
    save_path_base = "Downloads/tep_causal_shap"
)


Processing analysis with 7 variables...

Performing FCI algorithm...

Performing RFCI algorithm...
Performing PC algorithm...

Performing AnchorFCI algorithm...

Plotting FCI graph...

Plotting RFCI graph...
Plotting PC graph...

Plotting AnchorFCI graph...

Processing analysis with 10 variables...

Performing FCI algorithm...

Performing RFCI algorithm...
Performing PC algorithm...

Performing AnchorFCI algorithm...

Plotting FCI graph...

Plotting RFCI graph...
Plotting PC graph...

Plotting AnchorFCI graph...

Processing analysis with 12 variables...

Performing FCI algorithm...

Performing RFCI algorithm...
Performing PC algorithm...

Performing AnchorFCI algorithm...

Plotting FCI graph...

Plotting RFCI graph...
Plotting PC graph...

Plotting AnchorFCI graph...

Processing analysis with 15 variables...

Performing FCI algorithm...

Performing RFCI algorithm...
Performing PC algorithm...

Performing AnchorFCI algorithm...

Plotting FCI graph...

Plotting RFCI graph...
Plotting PC