## Validation - AUCG and BN-UCGAIN

In this notebook, we evaluate the prediction accuracy of two-step models proposed in:

```
@article{ochodek2016functional,
  title={Functional size approximation based on use-case names},
  author={Ochodek, Miros{\l}aw},
  journal={Information and Software Technology},
  volume={80},
  pages={73--88},
  year={2016},
  publisher={Elsevier}
}
```

The code and the data comes from this publication (augumented with the data for the project P27 which was not considered in that publication).

## Imports

In [1]:
# Install required packages with install.packages(X)
require(RWeka)
require(partykit)
library(dplyr)
require(randomForest)
require(xlsx)
require(bnlearn)
require(gRain)

Loading required package: RWeka
Loading required package: partykit
Loading required package: grid
Loading required package: libcoin
Error: package or namespace load failed for 'libcoin' in loadNamespace(j <- i[[1L]], c(lib.loc, .libPaths()), versionCheck = vI[[j]]):
 there is no package called 'mvtnorm'
"package 'dplyr' was built under R version 3.6.1"
Attaching package: 'dplyr'

The following objects are masked from 'package:stats':

    filter, lag

The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union

Loading required package: randomForest
randomForest 4.6-14
Type rfNews() to see new features/changes/bug fixes.

Attaching package: 'randomForest'

The following object is masked from 'package:dplyr':

    combine

Loading required package: xlsx
Loading required package: bnlearn

Attaching package: 'bnlearn'

The following object is masked from 'package:stats':

    sigma

Loading required package: gRain
"package 'gRain' was built under R versi

## Load data

In [2]:
#discretizes a variable - each interval is based on quantiles, e.g., (2,3]
discretize_measure <-function(variable_values){
  discretize_measure<-cut(variable_values,unique(quantile(variable_values)),include.lowest=T)
  discretize_measure
}

uc_ttypes <- c("C", "R", "U", "D", "L", "DL", "AR", "DR", "T", "CO", "CIA", "CS", "C|D|R|U")
uc_ttypes_factor <- factor(uc_ttypes)

Load use cases:

In [3]:
use_cases <- read.csv(file="./input/use-cases.csv", header=TRUE,stringsAsFactors=FALSE, row.names=1)
head(use_cases)

Unnamed: 0_level_0,ProjectID,UC,TransTypes,UCType,Cfp,TitleTokens
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<chr>,<int>,<chr>
0,P01,UC2-1-1,C|D|R|U,C|D|R|U,16,manage faculties crud
1,P01,UC2-1-10,DL|L|R,L,27,assign science olympiads major specialty edit delete
2,P01,UC2-1-11,CS|R,CS,7,manage ranking algorithms
3,P01,UC2-1-13,C|D|R|U,C|D|R|U,17,manage exams crud
4,P01,UC2-1-14,DL|L|R,L,27,manage assignments exams majors specialties
5,P01,UC2-1-16,C|D|R|U,C|D|R|U,18,manage courses crud


In [4]:
dim(use_cases)

Load features for predicting use-case type

In [5]:
use_case_types_pred_input <- read.csv("./input/uc_pred.csv", sep=";", header=TRUE, stringsAsFactors=TRUE)

In [6]:
use_case_types_pred_input <- merge(use_case_types_pred_input[,-c(dim(use_case_types_pred_input)[2])], 
      use_cases %>% select(ProjectID, UC, UCType),
      by=c("ProjectID", "UC"), sort=F)
use_case_types_pred_input$UCType <- factor(use_case_types_pred_input$UCType, levels=uc_ttypes)

In [7]:
dim(use_case_types_pred_input)

In [8]:
head(use_case_types_pred_input)

ProjectID,UC,C,R,U,D,CRUD,DR,AR,T,...,no_predicates,actors,NP,VP,words,no_parentheses,conjuctions,pronouns,uniques,UCType
<fct>,<fct>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,...,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<fct>
P01,UC2-1-1,0.0,0.4789282,0.2903455,0,1,0,0.3296749,0.0,...,1,1,4,2,6,2,0,0,0,C|D|R|U
P01,UC2-1-10,0.714355,0.4891753,0.0,0,0,0,0.4891753,0.6117968,...,1,1,8,6,14,2,2,0,0,L
P01,UC2-1-11,0.0,0.4789282,0.2903455,0,1,0,0.3296749,0.0,...,1,1,2,2,4,0,0,0,1,CS
P01,UC2-1-13,0.0,0.4789282,0.2903455,0,1,0,0.3296749,0.0,...,1,1,4,2,6,2,0,0,0,C|D|R|U
P01,UC2-1-14,0.0,0.4789282,0.2903455,0,1,0,0.3296749,0.0,...,1,1,9,2,9,0,2,0,0,L
P01,UC2-1-16,0.0,0.4789282,0.2903455,0,1,0,0.3296749,0.0,...,1,1,4,2,6,2,0,0,0,C|D|R|U


## Validation framework

In [9]:
set.seed(100239)

In [10]:
train_ids <- read.csv("./input/10-fold-train-full.csv", header = T)[,-1]
val_ids <- read.csv("./input/10-fold-val-full.csv", header = T)[,-1]

In [22]:
runs <- max(train_ids$run) + 1
k <- max(train_ids$k) + 1

In [28]:
1:(k-1)

In [32]:
validate_model_per_use_case <- function(model_factory, pred_func, use_cases_df, use_case_types_pred_input, 
                                        runs, k, train_ids, val_ids, uc_type_col_name){
    print("Starting validation...")
    run_results <- NULL
    
    for (run in 0:(runs-1)){
        print(paste("Starting run", run+1))
        
        for (ki in 0:(k-1)){
            print(paste("Starting fold", ki+1))
            print("Preparing training data...")
            # we need to add +1 because in R indexing start from 1 instead of 0 as in Python 
            train_index <- na.omit(as.numeric((train_ids %>% filter(run == run & k == ki))[1, 3:dim(train_ids)[2]] + 1))
            use_cases_train <- use_cases_df[train_index, ]
            
            print(paste("Training data shape:", dim(use_cases_train)[1], "x",  dim(use_cases_train)[2]))
            print('Fitting the model...')
            model <- model_factory(use_cases_train)
            
            # we need to add +1 because in R indexing start from 1 instead of 0 as in Python 
            val_index <- na.omit(as.numeric((val_ids %>% filter(run == run & k == ki))[1, 3:dim(val_ids)[2]] + 1))
            use_cases_val <- use_cases_df[val_index, ]
            use_case_types_pred_input_val <- use_case_types_pred_input[val_index,]
            print(paste("Validation data shape:", dim(use_cases_val)[1], "x",  dim(use_cases_val)[2]))
            
            print("Predicting size...")
            pred_res <- pred_func(use_cases_val, use_case_types_pred_input_val, model, uc_type_col_name)
            pred_res <- data.frame(run=rep(run, length(val_index)), k=rep(ki, length(val_index)), 
                                   pred_res, y_true=use_cases_val[,'Cfp'])
            
            pred_res <- pred_res %>% mutate(ar=abs(y_true - y_pred), re=abs(y_true - y_pred) / y_true)
            
            if (run == 0 & k == 0){
                run_results <- pred_res
            }else{
                run_results <- rbind(run_results, pred_res)
            }  
        }
        print("Run results...")
        print(paste("MAR =", mean(run_results$ar), "; MdAR =", median(run_results$ar), "; MRE =", mean(run_results$re)))

    } 
    results_agg <- list()
    results_agg['MAR'] <- mean(run_results$ar)
    results_agg['MAR_SD'] <- sd(run_results$ar)
    results_agg['MdAR'] <- median(run_results$ar)
    results_agg['MdAR_SD'] <- sd(run_results$ar)
    
    return(list(run_results, results_agg))
}

## Use-case type classifier (manual rules + classifier)

In [13]:
number_of_crud_ops <- function(newdata,cut_point){
  count <- 0
  if (newdata$C[1] > cut_point) {
    count <- count + 1
  }
  if (newdata$R[1] > cut_point) {
    count <- count + 1
  }
  if (newdata$U[1] > cut_point) {
    count <- count + 1
  }
  
  if (newdata$D[1] > cut_point) {
    count <- count + 1
  }
  number_of_crud_ops <- count
  number_of_crud_ops
}



predict_with_rules <- function(object, newdata, ...){
  uctype <- NULL
  
  cut_point <- 0.95
  
  if (( (newdata$no_predicates[1] > 1 &&  number_of_crud_ops(newdata,cut_point) > 1 ) )   
      && is.null(uctype)){
    uctype <- "C|D|R|U"
  }
  
  #Link
  if ((newdata$L[1] > cut_point 
       || 
       (newdata$CRUD[1] > cut_point && newdata$assignment_word[1] == "True") ||
       (newdata$C[1] > cut_point && newdata$assignment_word[1] == "True"))
      && is.null(uctype)){
    uctype <- "L"
  }

  
  #Transfer
  if ((newdata$T[1] > cut_point )  
      && is.null(uctype)){
    uctype <- "T"
  }
  
  #CRUD
  if (( (newdata$CRUD[1] > cut_point || newdata$crud[1] == "True") )   
      && is.null(uctype)){
    uctype <- "C|D|R|U"
  }
  
  
  #Dynamic retrieve 
  if ((newdata$DR[1] > cut_point) 
      && is.null(uctype)){
    uctype <- "DR"
  }
  
  
  #Async Retrieve
  if ((newdata$AR[1] > cut_point)
      && is.null(uctype)){
    uctype <- "AR"
  }
  
  #CS
  if ((newdata$CS[1] > cut_point)
      && is.null(uctype)){
    uctype <- "CS"
  }
  
  #Retrieve
  if ((newdata$R[1] > cut_point )
      && is.null(uctype)){
    uctype <- "R"
  }
  
  #Create
  if ((newdata$C[1] > cut_point) 
      && is.null(uctype)){
    uctype <- "C"
  }
  
  #Transfer
  if ((newdata$T[1] > cut_point )  
      && is.null(uctype)){
    uctype <- "T"
  }
  
  #CO
  if ((newdata$CO[1] > cut_point) 
      && is.null(uctype)){
    uctype <- "CO"
  }  
  
  #Update
  if ((newdata$U[1] > cut_point )
      && is.null(uctype)){
    uctype <- "U"
  }
  
  #Delete Link
  if (((newdata$DL[1] > cut_point)  && 
       (newdata$assignment_word[1] == "True" || newdata$from[1] == "True")) 
      && is.null(uctype)){
    uctype <- "DL"
  }
  
  #Delete
  if ((newdata$D[1] > cut_point) 
      && is.null(uctype)){
    uctype <- "D"
  }
  
  #CIA
  if ((newdata$CIA[1] > cut_point) 
      && is.null(uctype)){
    uctype <- "CIA"
  } 
  
  
  #Change state (antonym)
  if ((newdata$antonyms[1] == "True") 
      && is.null(uctype)){
    uctype <- "CS"
  }
  
  
  
  if (is.null(uctype)){
    uctype <- predict(object, 
                      newdata=newdata, 
                      type=c("class"))
  }
  return(uctype)
}


## AUCG

In [30]:
# M1 - mean or median size per UCType
createModel_M1 <- function(use_cases){
  model <- list()
  column_name = "Cfp"
  
  for (i in 1:length(uc_ttypes)){
    uctype <- uc_ttypes[i]
    use_cases_with_uctype <- use_cases %>% filter(UCType == uctype)
    uctype_entry <- list()
    if (dim(use_cases_with_uctype)[1] > 0){
      uctype_entry[["mean"]] <- mean(use_cases_with_uctype[,column_name])
      uctype_entry[["median"]] <- median(use_cases_with_uctype[, column_name])
      uctype_entry[["Q1"]] <- as.numeric(quantile(use_cases_with_uctype[, column_name], 0.25))
      uctype_entry[["Q3"]] <- as.numeric(quantile(use_cases_with_uctype[, column_name], 0.75))
      
    }else{
      # find mean according to taxonomy
      use_cases_with_similar_uctype <- NULL
      if (uctype %in% c("T") ){
        use_cases_with_similar_uctype <- use_cases %>% filter(UCType == "C")
      } else if (uctype %in% c("CS")){
        use_cases_with_similar_uctype <- use_cases %>% filter(UCType == "U")
      } else if (uctype %in% c("L")){
        use_cases_with_similar_uctype <- use_cases %>% filter(UCType %in% c("C","U"))
      } else if (uctype %in% c("DL")){
        use_cases_with_similar_uctype <- use_cases %>% filter(UCType == "D")
      } else if (uctype %in% c("AR", "DR")){
        use_cases_with_similar_uctype <- use_cases %>% filter(UCType == "R")
      }
      if ( is.null(use_cases_with_similar_uctype) ) {
        use_cases_with_similar_uctype <- use_cases
      }
      
      uctype_entry[["mean"]] <- mean(use_cases_with_similar_uctype[,column_name])
      uctype_entry[["median"]] <- median(use_cases_with_similar_uctype[, column_name])
      uctype_entry[["Q1"]] <- as.numeric(quantile(use_cases_with_similar_uctype[, column_name], 0.25))
      uctype_entry[["Q3"]] <- as.numeric(quantile(use_cases_with_similar_uctype[, column_name], 0.75))
    }
    
    model[[uctype]] <- uctype_entry
  }
  
  return(model)
}

In [15]:
# Estimation function 
estimate_M1 <- function(use_cases, use_case_types_pred_input, model, uctype_column_name){
    result <- list()
    func <- 'mean'

    project_ids <- c()
    ids <- c()
    estimated <- c()

    if (uctype_column_name == "PredUCTypeAllPredicates"){
        types_all_j48 <- J48(UCType ~ ., data = use_case_types_pred_input[,c(-1,-2)])
    
    
        predicted_uctypes <- c()
        for (i in 1:dim(use_cases)[1]){
          use_case <- use_cases[i,]
          print(use_case$UC)
          print(use_case_types_pred_input$UC)
          uctype <- predict_with_rules(types_all_j48, 
                                       (use_case_types_pred_input %>% 
                                          filter(UC==use_case$UC, ProjectID==use_case$ProjectID)))
          predicted_uctypes <- c(predicted_uctypes, as.character(uctype))
        }
    
        use_cases$PredUCTypeAllPredicates <- predicted_uctypes
    }
  
    for (i in 1:dim(use_cases)[1]){
        use_case <- use_cases[i,]
        uctype <- use_case[1,uctype_column_name]
        project_ids <- c(project_ids, use_case$ProjectID)
        ids <- c(ids, use_case$UC)
        estimate <- model[[uctype]][[func]]
        estimated <- c(estimated,  estimate)
    }
  
    result <- data.frame(ProjectID=project_ids, UC=ids, y_pred=estimated)
  
    return(result)
}

In [33]:
aucg_results <- validate_model_per_use_case(createModel_M1, estimate_M1, use_cases, use_case_types_pred_input, 
                            runs, k, train_ids, val_ids, "PredUCTypeAllPredicates")

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



In [34]:
aucg_results

run,k,ProjectID,UC,y_pred,y_true,ar,re
<int>,<int>,<fct>,<fct>,<dbl>,<int>,<dbl>,<dbl>
0,0,P01,UC2-1-14,13.113636,27,13.8863636,0.51430976
0,0,P01,UC2-1-8,13.113636,27,13.8863636,0.51430976
0,0,P01,UC2-1-9,17.186047,14,3.1860465,0.22757475
0,0,P01,UC2-2-11,8.279070,36,27.7209302,0.77002584
0,0,P01,UC2-2-17,7.373333,19,11.6266667,0.61192982
0,0,P01,UC2-4-10,7.373333,18,10.6266667,0.59037037
0,0,P02,PP_UC3,7.033333,10,2.9666667,0.29666667
0,0,P02,PP_UC5,13.113636,19,5.8863636,0.30980861
0,0,P06,UC1.3,5.680000,6,0.3200000,0.05333333
0,0,P06,UC2.1,7.033333,5,2.0333333,0.40666667


In [35]:
marp0 <- read.csv(file="./output/validation-marp0.csv", header=TRUE,stringsAsFactors=FALSE, row.names=1)[1,1]

In [36]:
SA_aucg <- (1 - aucg_results[[2]][['MAR']] / marp0) * 100
paste("SA=", format(round(SA_aucg, 2), nsmall = 2), "%", sep="")

In [37]:
 write.csv(aucg_results[[1]], file="./output/validation-aucg.csv",)

## BN-UCG

In [38]:
# calculates Equal Bands for a vector of sizes 
calculate_equal_bands <- function(sizes_of_use_cases, no_bands = 4){
  
  # 1. sort asceding
  sorted_sizes_of_use_cases <- sort(sizes_of_use_cases)
  
  # 2. calculate band size
  band_size <- sum(sorted_sizes_of_use_cases) / no_bands
  
  lower_idx <- 1
  upper_idx <- 1
  lower_band_size <- 0
  upper_band_size <- 0
  remaining_fractial_membership <- 0
  
  result <- NULL
  
  for (band in 1:no_bands){
    sum_sizes <- 0
    missing_size <- 0
    fractial_membership <- 0
    idx <- lower_idx
    number_of_members <- 0
    lower_band_size <- upper_band_size
    
    # we have to handle the remaining fraction first
    if (remaining_fractial_membership > 0){
      number_of_members <- remaining_fractial_membership
      sum_sizes <- sum_sizes + remaining_fractial_membership * sorted_sizes_of_use_cases[idx]
      remaining_fractial_membership <- 0
    }
    
    while (sum_sizes < band_size & idx <= length(sorted_sizes_of_use_cases)){
      # add size of a use case if it will entirely fit into the band
      if (sum_sizes + sorted_sizes_of_use_cases[idx] <=  band_size){
        sum_sizes <- sum_sizes + sorted_sizes_of_use_cases[idx]
        number_of_members <- number_of_members + 1
      }else{
        upper_idx <- idx
        # check size of band was entirely covered
        if (sum_sizes < band_size){
          missing_size <- band_size - sum_sizes
          fractial_membership <- missing_size / sorted_sizes_of_use_cases[idx]
          remaining_fractial_membership <- 1 - fractial_membership
          number_of_members <- number_of_members + fractial_membership
        }
        break
      }
      idx <- idx + 1
    }
    average_size_of_band <- band_size / number_of_members
    
    if (!is.na(sorted_sizes_of_use_cases[idx]) &
        fractial_membership > sorted_sizes_of_use_cases[idx]){
      upper_band_size <- fractial_membership
    }else{
      upper_band_size <- sorted_sizes_of_use_cases[idx]
    }
    
    if (is.null(result)){
      result <- data.frame(band=c(band), members=c(number_of_members),
                           avg_size=c(average_size_of_band),
                           lower_bound_size=c(lower_band_size),
                           upper_bound_size=c(upper_band_size))
    }else{
      result <- rbind(result, c(band, number_of_members, average_size_of_band,
                                lower_band_size, upper_band_size))
    }
    
    lower_idx <- idx + 1
  }
  
  return(result)
}

# converts vector of sizes to the average size of a band:
# bands_summary is produced by calculate_equal_bands
convert_sizes_to_bands <- function(sizes_of_use_cases, bands_summary){
  result <- c()
  
  for (i in 1:length(sizes_of_use_cases)){
    actual_size <- sizes_of_use_cases[i]
    band <- NULL
    for (j in 1:dim(bands_summary)[1]){
      if (actual_size > bands_summary$lower_bound_size[j]){
        band <- bands_summary$band[j]
        avg_size <- bands_summary$avg_size[j]
      }
    }
    result <- c(result, avg_size)
  }
  return(result)
}

factor_to_number <- function(factor_value){
  return(as.numeric(levels(factor_value)[factor_value]))
}

In [39]:
# M2 - BNN model
createModel_M2 <- function(use_cases){
  model <- list()
  measure_name <- "Cfp"

    
  # BNN
  bnn_input <- data.frame(TransTypes=use_cases$TransTypes,
        UCType=use_cases$UCType,
        Cfp=convert_sizes_to_bands(use_cases$Cfp, 
            calculate_equal_bands(use_cases$Cfp, no_bands = 4)))    
  

  bnn_input <- data.frame(lapply(bnn_input, factor),  stringsAsFactors=TRUE)
  
  bnn <- empty.graph(names(bnn_input))

  arcs(bnn) <- matrix( 
    c("UCType", "TransTypes", 
      "TransTypes", "Cfp" 
  ),
  ncol = 2, byrow = TRUE, dimnames = list(c(), c("from", "to")))
  
  bn.fitted <- bn.fit(bnn, bnn_input, method="bayes") 
  model <- compile(as.grain(bn.fitted))

  
  return(model)
}

createModel_M2(use_cases)

Independence network: Compiled: TRUE Propagated: FALSE 
  Nodes: chr [1:3] "TransTypes" "UCType" "Cfp"

In [40]:
# Estimation function 
estimate_M2 <- function(use_cases, use_case_types_pred_input, model, uctype_column_name){
    result <- list()
    measure_column_name <- 'Cfp'

    project_ids <- c()
    ids <- c()
    estimated <- c()

    if (uctype_column_name == "PredUCTypeAllPredicates"){
        types_all_j48 <- J48(UCType ~ ., data = use_case_types_pred_input[,c(-1,-2)])
    
    
        predicted_uctypes <- c()
        for (i in 1:dim(use_cases)[1]){
          use_case <- use_cases[i,]
          uctype <- predict_with_rules(types_all_j48, 
                                       (use_case_types_pred_input %>% 
                                          filter(UC==use_case$UC, ProjectID==use_case$ProjectID)))
          predicted_uctypes <- c(predicted_uctypes, as.character(uctype))
        }
    
        use_cases$PredUCTypeAllPredicates <- predicted_uctypes
    }
  
    for (i in 1:dim(use_cases)[1]){
        use_case <- use_cases[i,]
        uctype <- use_case[1,uctype_column_name]
        project_ids <- c(project_ids, use_case$ProjectID)
        ids <- c(ids, use_case$UC)
        
        jprop <- setEvidence(model, evidence = list(UCType=as.character(uctype)))
        query_res <- querygrain(jprop, nodes = c(measure_column_name), result="data.frame", type="marginal")[[1]]
        estimate <- sum(as.numeric(as.character(query_res[,measure_column_name])) * query_res$Freq)
        if(is.na(estimate) | estimate == 0){
          # I did it because I'm not sure what would happen if a type that was not in training set would appear
          stop(paste("ERROR with the value of estimate ", estimate, " query res was ", query_res))
        }
        estimated <- c(estimated,  estimate)
    }
  
    result <- data.frame(ProjectID=project_ids, UC=ids, y_pred=estimated)
  
    return(result)
}

In [41]:
bn_ucgain_results = validate_model_per_use_case(createModel_M2, estimate_M2, use_cases, use_case_types_pred_input, 
                            runs, k, train_ids, val_ids, "PredUCTypeAllPredicates")

[1] "Starting validation..."
[1] "Starting run 1"
[1] "Starting fold 1"
[1] "Preparing training data..."
[1] "Training data shape: 393 x 6"
[1] "Fitting the model..."
[1] "Validation data shape: 44 x 6"
[1] "Predicting size..."
[1] "Starting fold 2"
[1] "Preparing training data..."
[1] "Training data shape: 393 x 6"
[1] "Fitting the model..."
[1] "Validation data shape: 44 x 6"
[1] "Predicting size..."
[1] "Starting fold 3"
[1] "Preparing training data..."
[1] "Training data shape: 393 x 6"
[1] "Fitting the model..."
[1] "Validation data shape: 44 x 6"
[1] "Predicting size..."
[1] "Starting fold 4"
[1] "Preparing training data..."
[1] "Training data shape: 393 x 6"
[1] "Fitting the model..."
[1] "Validation data shape: 44 x 6"
[1] "Predicting size..."
[1] "Starting fold 5"
[1] "Preparing training data..."
[1] "Training data shape: 393 x 6"
[1] "Fitting the model..."
[1] "Validation data shape: 44 x 6"
[1] "Predicting size..."
[1] "Starting fold 6"
[1] "Preparing training data..."
[1] "

In [42]:
bn_ucgain_results

run,k,ProjectID,UC,y_pred,y_true,ar,re
<int>,<int>,<fct>,<fct>,<dbl>,<int>,<dbl>,<dbl>
0,0,P01,UC2-1-14,11.975572,27,15.02442787,0.55646029
0,0,P01,UC2-1-8,11.975572,27,15.02442787,0.55646029
0,0,P01,UC2-1-9,16.551992,14,2.55199201,0.18228514
0,0,P01,UC2-2-11,7.369914,36,28.63008612,0.79528017
0,0,P01,UC2-2-17,7.704046,19,11.29595416,0.59452390
0,0,P01,UC2-4-10,7.704046,18,10.29595416,0.57199745
0,0,P02,PP_UC3,7.292398,10,2.70760216,0.27076022
0,0,P02,PP_UC5,11.975572,19,7.02442787,0.36970673
0,0,P06,UC1.3,5.928245,6,0.07175459,0.01195910
0,0,P06,UC2.1,7.292398,5,2.29239784,0.45847957


In [43]:
marp0 <- read.csv(file="./output/validation-marp0.csv", header=TRUE,stringsAsFactors=FALSE, row.names=1)[1,1]

In [46]:
SA_bn_ucgain <- (1 - bn_ucgain_results[[2]][['MAR']] / marp0) * 100
paste("SA=", format(round(SA_bn_ucgain, 2), nsmall = 2), "%", sep="")

In [47]:
 write.csv(bn_ucgain_results[[1]], file="./output/validation-bnucg.csv",)