# Loading Packages & Initialization

In [1]:
folder.path = "C:/Users/paslanpatir/Desktop/TEZ_v2/"
source(paste0(folder.path,"pickleware/pickleware/TezV3_SetupCode.r"))

Is_Headless <- 1
nl.model <- "Segregation_Dummy"

nl.path <- "C:/Program Files/NetLogo 6.0.4/app"
folder.path = "C:/Users/paslanpatir/Desktop/TEZ_v2/"

model.path <- paste0(folder.path, nl.model, ".nlogo")

if (Is_Headless == 0) {
    NLStart(nl.path, gui = TRUE, nl.jarname = "netlogo-6.0.4.jar")
    NLLoadModel(model.path)
} else {
    NLStart(nl.path, gui = FALSE, nl.jarname = "netlogo-6.0.4.jar", nl.obj = nl.model)
    NLLoadModel(model.path, nl.obj = nl.model)
}

-- [1mAttaching packages[22m --------------------------------------- tidyverse 1.2.1 --
[32mv[39m [34mggplot2[39m 3.2.1     [32mv[39m [34mpurrr  [39m 0.3.3
[32mv[39m [34mtibble [39m 2.1.3     [32mv[39m [34mdplyr  [39m 0.8.3
[32mv[39m [34mtidyr  [39m 1.0.0     [32mv[39m [34mstringr[39m 1.4.0
[32mv[39m [34mreadr  [39m 1.3.1     [32mv[39m [34mforcats[39m 0.4.0
-- [1mConflicts[22m ------------------------------------------ tidyverse_conflicts() --
[31mx[39m [34mdplyr[39m::[32mbetween()[39m   masks [34mdata.table[39m::between()
[31mx[39m [34mdplyr[39m::[32mfilter()[39m    masks [34mstats[39m::filter()
[31mx[39m [34mdplyr[39m::[32mfirst()[39m     masks [34mdata.table[39m::first()
[31mx[39m [34mpurrr[39m::[32mflatten()[39m   masks [34mjsonlite[39m::flatten()
[31mx[39m [34mdplyr[39m::[32mlag()[39m       masks [34mstats[39m::lag()
[31mx[39m [34mdplyr[39m::[32mlast()[39m      masks [34mdata.table[39m::last()
[31

In [2]:
model.type = ifelse(nl.model == "Segregation", "basic", "dummy")
# the path of data folder
data.path = paste0(folder.path,"data/")
# the path for outputs to be record
output.folder = paste0("outputs_DENEME_",model.type,"_",Sys.Date())
dir.create(file.path(folder.path, output.folder), showWarnings = FALSE)

outputs.path = paste0(folder.path,output.folder,"/")

# Read Me File to keep info about the output folder
ReadMe = paste0(outputs.path,"ReadMe_",model.type,".txt")

# Model Parameters & Functions

## Set model parameters

In [3]:
#### Model Parameters #### Set model parameters Number of replications for each
#### instance
nofrep = 1 #############################

# order feature names according to their definition order in run_model
if (model.type == "basic") {
    feature_names = c("density", "%-similar-wanted")
    
    feature_ranges = data.table(  feature   = feature_names
                                , min_range = c(10, 10)
                                , max_range = c(90, 90))
    
} else if (model.type == "dummy") {
    feature_names = c("density", "%-similar-wanted", "budget-multiplier-dummy", "density-multiplier-dummy", 
        "noise-dummy", "tick-limit")
    
    feature_ranges = data.table(  feature   = feature_names
                                , min_range = c(10, 10, 1, 0.01, 1e-05, 90)
                                , max_range = c(90, 90, 10, 1, 1e-04, 110))
}
# 
output_name = c("percent-similar")

# Number of input parameters of the agent-based model
nofparams = length(feature_names)

# set RF parameters
ntree = 300
#mtry = 2
mtry.multiplier = 1 # when 1, it is default, when 2, it is at most twice of defaults 
nperm = 5

## Set user parameters

In [5]:
#### User parameters ####
error_type = "RMSE"  # MAPE, BIAS

# choose the uncertainty measure
selection_metric <- "coefvar"  #, 'range' 
sample.type = paste0("AdFe_",selection_metric)

elimination.type = "NRFE" # or "RFE"

# Number of iterations
iteration_budget = 6
metarep = c(1)

# Number of instances
unlabeled_ins = 100
test_ins = c(100,200)
train_ins_oneshot = 100
train_ins_Ad = 50

# Set selection parameters
selected_ins = 1  #nofinstancesWillbeSelected in each step

# Set elimination parameter
p = 0.5 # elimination proportion
# h = 1
oob_allowance = 0.1

seed.focus = c(0)

## !!!
unlabeled.type = "refresh and ElimInducedSampling"


#log_entry()

# Test Set

In [6]:
#### Test Sets ####
test_set = data.table()
for( t in test_ins){
    test_set.name= paste0(data.path,"test_set","_",model.type,"_",t,".csv")
    test_set_Sub <- fread(test_set.name)  
    
    test_set = rbind(test_set, data.table(size = t, test_set_Sub))
    
    #assign(paste0("test_set_",t),test_set)
}

# Adaptive Training Set

In [7]:
adaptive_initial_data = upload_training_set(model.type,seed.focus,train_ins_Ad)

## Adaptive & Feature Elimination Train & Test Metamodel

In [9]:
# Decide on strategy:
elimination_start_iter = 2

In [10]:
#### Adaptive Feature Selection Training ####
# specify variables(columns) to be used initialize
columns_left = feature_names
total_numof_eliminated_vars <- 0

eliminated_columns = c() 


In [None]:
sample.type = paste0("AdFe_",selection_metric)
sample.folder = paste0(sample.type,"/")
dir.create(file.path(folder.path, output.folder,sample.folder), showWarnings = FALSE)

models.folder = paste0("models_",sample.type,"/")
dir.create(file.path(folder.path, output.folder,models.folder), showWarnings = FALSE)

PL.folder = paste0("PL_",sample.type,"/")
dir.create(file.path(folder.path, output.folder,PL.folder), showWarnings = FALSE)

for(i in seed.focus){ print(paste0("seed : ",i,"  Adaptive Sampling with Feature Selection section start time : ",Sys.time()))    
    for (r in metarep){ print(paste0("seed : ", i,"   rep : ", r, "  Adaptive Sampling with Feature Selection section start time : ", Sys.time()))
        set.seed(i + r)
            
        training_set_Ad = copy(adaptive_initial_data[seed == i, .SD, .SDcols = -c("seed")])
        train_candidates_table = data.table()
        
        columns_left = feature_names # reset at the beginning of each iteration
        total_numof_eliminated_vars <- 0 # reset at the beginning of each iteration
    
        iteration_history = data.table("seed" = integer(),"rep" = integer(),"iter_no" = integer()
                              ,"IsFeatureEliminated" = logical(), "IsDataSelected" = logical()
                              ,"NumOfEliminated" = integer())
        iter = 1
        while(iter <= iteration_budget){   
            print(iter)
    
            trainx = training_set_Ad[,.SD, .SDcols = columns_left]
            trainy = training_set_Ad$output
        
            # Train the model
            model_Sub <- randomForest( x = trainx, y =  trainy,importance = TRUE
                                      ,ntree = ntree, nperm = nperm
                                      ,mtry = mtry_default(columns_left) * mtry.multiplier)
                model_Sub.name = paste0("model_",sample.type,"_", iter, "_seed_", i, "_rep_",r)
                model_Sub.path = paste0(outputs.path,models.folder, paste0(model_Sub.name,"_size_",train_ins_Ad, ".rds"))  # to save the model
                saveRDS(model_Sub, model_Sub.path)
        
            # update VIM or not
            if (elimination.type == "RFE" | (elimination.type == "NRFE" & (length(columns_left) == length(feature_names)))){
                ranked_features = get_variable_importance(model_Sub)
            }     
       
            # write errors 
            obb_err = obb_error_func(model_Sub)     
            fwrite(data.table(iter,obb_error = obb_err,seed = i,rep = r)
                   ,paste0(outputs.path,sample.folder,model.type,"_","obb_error_",sample.type,".csv") ,append = TRUE)
        
            write_test_accuracy(i,r,iter,model_Sub,test_set, error_type)
            write_importance.rf(i,r,iter,model_Sub,sample.type)#last one=sample_type
        
            if(iter != iteration_budget){ # below efforts are unnecessary when the budget is reached. 
                iteration_history= rbind(iteration_history,data.table(i,r,iter,0,0,0), use.names = FALSE)
         
                ### SAMPLE SELECTION ###    
                #select samples first but not to add to the training set until eliminated_features are specified.
                # select new data candidates before elimination
                ## sample selection from unlabeled data select candidates
                unlabeled_set <- refresh_sample_pool(i + r + iter, columns_left)
                train_candidates = sample_selection(selected_ins, unlabeled_set, model_Sub,selection_metric)
                
                # run ABM to find outputs of train candidates
                print(paste0("ABM train_candidate run start time : ",Sys.time()))
                train_candidates = run_ABM(nofrep, selected_ins, train_candidates)
                print(paste0("ABM train_candidate run end time : ",Sys.time()))
                
                fwrite(data.table(train_candidates, "iter" = iter, "seed" = i, "rep" = r)
                       ,paste0(outputs.path,sample.folder,model.type,"_train_candidates_table_",sample.type,".csv"),append = TRUE )      

                ### SAMPLE SELECTION ENDS ###
                
                ### FEATURE ELIMINATION ###
                if(elimination_start_iter <= iter & length(columns_left) > 2){ 
                    check_elim = TRUE 
                    apply_elim = FALSE
                    # 
                ### FEATURE ELIMINATION PART I ###
                #decide how many features will be eliminated
                    elim_check_iter = 1
                    h = floor(length(columns_left) * (p^elim_check_iter))
                    while(check_elim){
                        
                        # Assume as if feature(s) will be eliminated
                        feature_elimination_result = feature_elimination(h, columns_left, ranked_features)
                        planned_columns_left = feature_elimination_result[[1]]
                    
                        model_Sub_afterElim <- randomForest(  x = training_set_Ad[,.SD, .SDcols = planned_columns_left]
                                                             ,y =  training_set_Ad$output
                                                             ,importance = TRUE, nperm = nperm
                                                             ,ntree = ntree
                                                            , mtry = mtry_default(planned_columns_left) * mtry.multiplier)        
                            model_Sub_afterElim.name = paste0("model_afterElim_",sample.type,"_", iter, "_seed_", i, "_rep_",r,"_h_",h)
                            model_Sub_afterElim.path = paste0(outputs.path,models.folder, paste0(model_Sub_afterElim.name,"_size_",train_ins_Ad, ".rds"))  # to save the model
                            saveRDS(model_Sub_afterElim, model_Sub_afterElim.path)
                    
                        new_oob = obb_error_func(model_Sub_afterElim)
                    
                        if(new_oob < (obb_err + obb_err * oob_allowance)){ 
                            check_elim = FALSE 
                            apply_elim = TRUE
                        } else {
                            elim_check_iter = elim_check_iter + 1
                            h_upd = floor(length(columns_left) * (p^elim_check_iter)) 
                            if(h_upd == h){ # if h does not change
                                check_elim = FALSE    
                            }
                            h = copy(h_upd)
                        }
                     }             
               ### FEATURE SELECTION PART II ###
               # really eliminate 
                    if(apply_elim){     # update iteration_history
                        iteration_history[iter]$IsFeatureEliminated= 1
                        iteration_history[iter]$NumOfEliminated= length(columns_left) - length(planned_columns_left)
                
                        columns_left = planned_columns_left
                        eliminated_columns =  feature_elimination_result[[4]]
                    }         
               }
              ### FEATURE SELECTION ENDS ###
            
              # add labeled candidates to the train data
              training_set_Ad = rbind(training_set_Ad, train_candidates[, -c("idx")],use.names = TRUE)
              # update iteration_history
              iteration_history[iter]$IsDataSelected= 1  
            }
            fwrite(iteration_history[iter],paste0(outputs.path,sample.folder,model.type,"_iteration_history_",sample.type,".csv"),append = TRUE )       
            iter = iter + 1
        }
        fwrite(data.table(training_set_Ad, "seed" = i,"rep" = r),paste0(outputs.path,sample.folder,model.type,"_FinalTrainData_",sample.type,".csv") ,append = TRUE)
       
        print(paste0("seed : ",i,"   rep : ", r,"  Adaptive Sampling with Feature Elimination section end time : ",Sys.time()))
    }
    print(paste0("seed : ",i,"  Adaptive Sampling with Feature Elimination section end time : ",Sys.time()))
    #rm(training_set_Ad,predictedLabels_table,train_candidates_table)      
}

In [None]:
rm(training_set_Ad_final,obb_error,performance_table,predictedLabels_all,train_candidates_all)

In [None]:
rm(importance_table_AdFe,FinalTrainData_AdFe,iteration_history_AdFe,performance_table_AdFe,train_candidates_table_AdFe,predictedLabels_table_AdFe,obb_error_AdFe)

# Quit NL

In [None]:
NLQuit(nl.obj = nl.model)
#NLQuit(all=TRUE)