# Loading Packages & Initialization

In [2]:
folder.path = "C:/Users/paslanpatir/Desktop/TEZ_v2/"
source(paste0(folder.path,"pickleware/pickleware/TezV2_SetupCode.r"))

Is_Headless <- 1
nl.model <- "Segregation_Dummy"

nl.path <- "C:/Program Files/NetLogo 6.0.4/app"
folder.path = "C:/Users/paslanpatir/Desktop/TEZ_v2/"

model.path <- paste0(folder.path, nl.model, ".nlogo")

if (Is_Headless == 0) {
    NLStart(nl.path, gui = TRUE, nl.jarname = "netlogo-6.0.4.jar")
    NLLoadModel(model.path)
} else {
    NLStart(nl.path, gui = FALSE, nl.jarname = "netlogo-6.0.4.jar", nl.obj = nl.model)
    NLLoadModel(model.path, nl.obj = nl.model)
}

In [None]:
model.type = ifelse(nl.model == "Segregation", "basic", "dummy")
# the path of data folder
data.path = paste0(folder.path,"data/")
# the path for outputs to be record
output.folder = paste0("outputs_V2_StaticElim_PoolUpd_",model.type,"_",Sys.Date())
dir.create(file.path(folder.path, output.folder), showWarnings = FALSE)

outputs.path = paste0(folder.path,output.folder,"/")

# Read Me File to keep info about the output folder
ReadMe = paste0(outputs.path,"ReadMe_",model.type,".txt")

# Model Parameters & Functions

## Set model parameters

In [None]:
#### Model Parameters ####
## Set model parameters Number of replications for each instance
nofrep = 10

# order feature names according to their definition order in run_model
if (model.type == "basic") {
    feature_names = c("density", "%-similar-wanted")
} else if (model.type == "dummy") {
    feature_names = c("density", "%-similar-wanted", "budget-multiplier-dummy", "density-multiplier-dummy", 
        "noise-dummy", "tick-limit")
}  
# 
output_name = c("percent-similar")

# Number of input parameters of the agent-based model
nofparams = length(feature_names)

# set RF parameters
ntree = 300
mtry = 2
nperm = 5

## Set user parameters

In [None]:
#### User parameters ####
error_type = "RMSE"  # MAPE, BIAS

# choose the uncertainty measure
selection_metric <- "coefvar"  #, 'range' 

# Number of iterations
iteration_budget = 11
metarep = c(1:5)

# Number of instances
unlabeled_ins = 100
test_ins = c(100)
train_ins_oneshot = 100
train_ins_Ad = 50

# Set selection parameters
selected_ins = 5  #nofinstancesWillbeSelected in each step

# Set elimination parameters
h <- 1  # number of variables eliminated in each step

seed.focus = c(0,4)

## !!!
unlabeled.type = "refresh and ElimInducedSampling"
sample.type = ""

log_entry()

In [None]:
#######
feature_ranges =data.table(  feature = feature_names
                           , min_range = c(10,10,1,0.01,0.00001,90)
                           , max_range = c(90,90,10,1,0.0001,110))

In [None]:
refresh_sample_pool_Elim <- function(selected.seed, columns_left = feature_names) {
    set.seed(selected.seed)
    
    
    unlabeled_pool = as.data.table(maximinLHS(n = unlabeled_ins, k = length(columns_left), dup = 5))
    setnames(unlabeled_pool, c(paste0("V",1:length(columns_left))), columns_left)
        
     
    for( c in 1:length(columns_left)){
      unlabeled_pool[[c]] = qunif(unlabeled_pool[[c]],feature_ranges[feature == colnames(unlabeled_pool)[c]]$min_range
                                                     ,feature_ranges[feature == colnames(unlabeled_pool)[c]]$max_range)
    }
    
    random_pool_all= data.table()
    eliminated_columns = setdiff(feature_names,columns_left)
    if(length(eliminated_columns) > 0){
        for( e in 1:length(eliminated_columns)){
            random_pool = data.table(runif(unlabeled_ins
                                    ,feature_ranges[feature == eliminated_columns[e]]$min_range
                                    ,feature_ranges[feature == eliminated_columns[e]]$max_range) )
            setnames(random_pool,eliminated_columns[e])
            
            random_pool_all= cbind(random_pool_all,random_pool)
        }
    
   unlabeled_pool = cbind(unlabeled_pool,random_pool_all)     
}
    
return(unlabeled_pool)       
}

# Test Set

In [None]:
#### Test Sets ####
for( t in test_ins){
    test_set.name= paste0(data.path,"test_set","_",model.type,"_",t,".csv")
    test_set <- fread(test_set.name)  
    
    assign(paste0("test_set_",t),test_set)
}

# Adaptive Training Set

In [None]:
adaptive_initial_data = upload_training_set(model.type,seed.focus,train_ins_Ad)

## Adaptive & Feature Elimination Train & Test Metamodel

In [None]:
# Decide on strategy:

#Eliminate at the end
if (model.type == "basic") {
    sample_selection_iteration_order = c(1:(iteration_budget - 1))
    feature_elimination_iteration_order = c(iteration_budget - 1)
} else if (model.type == "dummy") {
    sample_selection_iteration_order = c(1:(iteration_budget - 1))
    feature_elimination_iteration_order = c((iteration_budget - 4):(iteration_budget - 1))
    feature_elimination_iteration_order = feature_elimination_iteration_order[feature_elimination_iteration_order > 0] # eliminate negative order
}
elimination_freq_schedule = c(rep(0,(iteration_budget-5)),rep(1,4))


In [None]:
#### Adaptive Feature Selection Training ####

# Record model performances
performance_table = data.table(iter = numeric(),seed = numeric(),rep = numeric(),size = numeric(), mae = numeric(), rmse = numeric(), mape = numeric())
# Record obb_error table
obb_error = data.table(iter = numeric() ,obb_error = numeric(),seed = numeric(),rep = numeric())

## initialize variables
predictedLabels_all = data.table()
train_candidates_all = data.table()
training_set_Ad_final = data.table()
importance_table_AdFe_All = data.table()
iteration_history_AdFe = data.table()


# specify variables(columns) to be used initialize
columns_left = feature_names
total_numof_eliminated_vars <- 0
    

In [None]:
sample.type = paste0("AdFe_",selection_metric)
sample.folder = paste0(sample.type,"/")
dir.create(file.path(folder.path, output.folder,sample.folder), showWarnings = FALSE)

models.folder = paste0("models_",sample.type,"/")
dir.create(file.path(folder.path, output.folder,models.folder), showWarnings = FALSE)

PL.folder = paste0("PL_",sample.type,"/")
dir.create(file.path(folder.path, output.folder,PL.folder), showWarnings = FALSE)

for(i in seed.focus){

print(paste0("seed : ",i,"  Adaptive Sampling with Feature Selection section start time : ",Sys.time()))
    
for (r in metarep){ #replications
    set.seed(i + r)
        print(paste0("seed : ", i,"   rep : ", r, "  Adaptive Sampling with Feature Selection section start time : ", Sys.time()))
    
    training_set_Ad = copy(adaptive_initial_data[seed == i, .SD, .SDcols = -c("seed")])
    train_candidates_table = data.table()
        
    columns_left = feature_names # reset at the beginning of each iteration
    total_numof_eliminated_vars <- 0 # reset at the beginning of each iteration
    
    iteration_history = data.table("seed" = integer(),"rep" = integer(),"iter_no" = integer(), "IsFeatureEliminated" = logical(), "IsDataSelected" = logical())

    iter = 1
    while(iter <= iteration_budget){   
        print(iter)
    
        trainx = training_set_Ad[,.SD, .SDcols = columns_left]
        trainy = training_set_Ad$output
        
        # Train the model
        model_Sub <- randomForest( x = trainx, y =  trainy,importance = TRUE,ntree = ntree, mtry = mtry, nperm = nperm)
        model_Sub.name = paste0("model_",sample.type,"_", iter, "_seed_", i, "_rep_",r)
        assign(model_Sub.name,model_Sub)
        model_Sub.path = paste0(outputs.path,models.folder, paste0(model_Sub.name,"_size_",train_ins_Ad, ".rds"))  # to save the model
        saveRDS(model_Sub, model_Sub.path)
        
        if (length(columns_left) == length(feature_names)) {
        ranked_features = get_variable_importance(model_Sub)
        }
        
        #to append
        obb_err = rbind( data.table(iter = numeric() ,obb_error = numeric(),seed = numeric(),rep = numeric())
                        ,data.table(iter, obb_error_func(model_Sub), i, r), use.names = FALSE)            
        fwrite(obb_err,paste0(outputs.path,sample.folder,model.type,"_Append_","obb_error_",sample.type,".csv") ,append = TRUE)
        ##                
        obb_error = rbind(obb_error,data.table(iter,obb_error_func(model_Sub),i,r),use.names=FALSE)
        
        # test the model on test set
        predictedLabels_Comb = data.table()
        for (t in test_ins) {
            test_set = get(paste0("test_set_", t))
            
            test_predictions_Sub = get_test_predictions(model_Sub, test_set,error_type)
            predictedLabels_Sub = test_predictions_Sub[[1]]
            setnames(predictedLabels_Sub, c("pred_output", error_type), c(paste0("pred_output_", iter), paste0(error_type, "_", iter)))

            fwrite(predictedLabels_Sub
                   ,paste0(outputs.path,PL.folder,model.type
                           ,"_","predictedLabels.",sample.type,"_seed_",i,"_iter_",iter,"_rep_",r,"_size_",t,".csv") ) 

            predictedLabels_Comb = rbind(predictedLabels_Comb, data.table(size = t,predictedLabels_Sub))   
             
             # Keep test set error records
                #to append
                perf_table = rbind(data.table(iter = numeric(),seed = numeric(),rep = numeric(),size = numeric(), mae = numeric(), rmse = numeric(), mape = numeric())
                                  ,data.table(iter, i, r, t, test_predictions_Sub[[2]]),use.names = FALSE )
                fwrite(perf_table,paste0(outputs.path,sample.folder,model.type,"_Append_","performance_table_",sample.type,".csv"),append = TRUE )    
                ##
                performance_table = rbind(performance_table, data.table(iter, i, r, t, test_predictions_Sub[[2]]),use.names = FALSE)
                        }
        if(iter == 1){
            predictedLabels_table = copy(predictedLabels_Comb[,1:4])
        }
            
        predictedLabels_table = cbind(predictedLabels_table, predictedLabels_Comb[,.SD ,.SDcols = c(paste0("pred_output_", iter), paste0(error_type, "_", iter))])

        write_importance.rf(i,r,iter,model_Sub,sample.type)#last one=sample_type
        
        if(iter != iteration_budget){ # below efforts are unnecessary when the budget is reached. 
            iteration_history= rbind(iteration_history,data.table(i,r,iter,0,0), use.names = FALSE)
            
            if (iter %in% sample_selection_iteration_order) { # select new data candidates before elimination
                ## sample selection from unlabeled data select candidates
                unlabeled_set <- refresh_sample_pool_Elim(i + r + iter, columns_left)
                train_candidates = sample_selection(selected_ins, unlabeled_set, model_Sub)
                
                # run ABM to find outputs of train candidates
                print(paste0("ABM train_candidate run start time : ",Sys.time()))
                train_candidates = run_ABM(nofrep, selected_ins, train_candidates)
                print(paste0("ABM train_candidate run end time : ",Sys.time()))
                
                fwrite(data.table(train_candidates, "iter" = iter, "seed" = i, "rep" = r)
                       ,paste0(outputs.path,sample.folder,model.type,"_train_candidates_table_",sample.type,".csv"),append = TRUE )      
          
                # add labeled candidates to the train data
                training_set_Ad = rbind(training_set_Ad, train_candidates[, -c("idx")])
                
                # update iteration_history
                 iteration_history[iter]$IsDataSelected= 1
                }
            
            elimination_freq = 1
            while(elimination_freq <= elimination_freq_schedule[iter]) {
               if (iter %in% feature_elimination_iteration_order) { # feature elimination module

                    feature_elimination_result = feature_elimination(h, total_numof_eliminated_vars, ranked_features)

                    columns_left = feature_elimination_result[[1]]  # 
                    eliminated_columns = feature_elimination_result[[4]]  #   not necessary
                    total_numof_eliminated_vars = as.numeric(feature_elimination_result[2])
                    numof_eliminated_vars = as.numeric(feature_elimination_result[3])  #   not necessary 

                    # update iteration_history
                    iteration_history[iter]$IsFeatureEliminated= 1
               }
               elimination_freq = elimination_freq + 1
           }
        }
        fwrite(iteration_history[iter],paste0(outputs.path,sample.folder,model.type,"_iteration_history_",sample.type,".csv"),append = TRUE )       
        iter = iter + 1
    }
    fwrite(data.table(training_set_Ad, "seed" = i,"rep" = r),paste0(outputs.path,sample.folder,model.type,"_FinalTrainData_",sample.type,".csv") ,append = TRUE)
    fwrite(data.table(predictedLabels_table, "seed" = i, "rep" = r),paste0(outputs.path,sample.folder,model.type,"_predictedLabels_table_",sample.type,".csv"),append = TRUE )                                       
    
    print(paste0("seed : ",i,"   rep : ", r,"  Adaptive Sampling with Feature Elimination section end time : ",Sys.time()))
}
    print(paste0("seed : ",i,"  Adaptive Sampling with Feature Elimination section end time : ",Sys.time()))
    #rm(training_set_Ad,predictedLabels_table,train_candidates_table)      
}

In [None]:
# Final records
#FinalTrainData_AdFe = copy(training_set_Ad_final)
obb_error_AdFe = copy(obb_error)
performance_table_AdFe = copy(performance_table)
#predictedLabels_table_AdFe = copy(predictedLabels_all)
#train_candidates_table_AdFe  = copy(train_candidates_all)

In [None]:
rm(training_set_Ad_final,obb_error,performance_table,predictedLabels_all,train_candidates_all)

In [None]:
#fwrite(importance_table_AdFe_All,paste0(outputs.path,model.type,"_","importance_table_AdFe","_",selection_metric,".csv") )
#fwrite(FinalTrainData_AdFe,paste0(outputs.path,model.type,"_","FinalTrainData_AdFe","_",selection_metric,".csv") )
fwrite(performance_table_AdFe,paste0(outputs.path,sample.folder,model.type,"_","performance_table_AdFe","_",selection_metric,".csv") )
#fwrite(train_candidates_table_AdFe,paste0(outputs.path,model.type,"_","train_candidates_table_AdFe","_",selection_metric,".csv") )
#fwrite(predictedLabels_table_AdFe,paste0(outputs.path,model.type,"_","predictedLabels_table_AdFe","_",selection_metric,".csv") )
fwrite(obb_error_AdFe,paste0(outputs.path,sample.folder,model.type,"_","obb_error_AdFe","_",selection_metric,".csv") )
#fwrite(iteration_history_AdFe,paste0(outputs.path,model.type,"_","iteration_history_AdFe","_",selection_metric,".csv") )

In [None]:
rm(importance_table_AdFe,FinalTrainData_AdFe,iteration_history_AdFe,performance_table_AdFe,train_candidates_table_AdFe,predictedLabels_table_AdFe,obb_error_AdFe)

# Quit NL

In [None]:
NLQuit(nl.obj = nl.model)
#NLQuit(all=TRUE)