# Execute the Graph-Based Spatial Cross-Validation experiments from ICMLA21 Paper

## 1 - Initialize libraries

In [1]:
import os
import pandas as pd
from weka.core import jvm
from src import utils
from src.pipeline import Pipeline
from src.visualization.performance import VizMetrics
from src.visualization.dependence import VizDependence
os.system("taskset -p 0xff %d" % os.getpid())

atual máscara de afinidade do pid 25980: ffffffff
nova máscara de afinidade do pid 25980: ff


You can install them with  `pip install urbanaccess pandana` or `conda install -c udst pandana urbanaccess`
  warn(


0

## 2 - Initialize loggers

In [2]:
utils.initialize_coloredlog()
utils.initialize_rich_tracerback()
utils.initialize_logging()

## 3 - Initialize working path and enviromental variables

In [3]:
# Project path
project_dir = os.path.abspath('')[:-5]
# Load enviromental variables
env_var = utils.load_env_variables(project_dir)

## 4 - Set pipeline switchers, the default is to set True to all processes

In [4]:
# Set pipeline switchers
switchers = {
    "scv": False,
    "fs": False,
    "train": True,
    "predict": True,
    "evaluate": False,
}

# 5 - List all datasets

In [5]:
brazil_removed_datasets = ["Brazil_Election_2018_Sampled_dec0.3_prob0.1_removed_north",
                   "Brazil_Election_2018_Sampled_dec0.3_prob0.1_removed_northeast",
                   "Brazil_Election_2018_Sampled_dec0.3_prob0.1_removed_south",
                   "Brazil_Election_2018_Sampled_dec0.3_prob0.1_removed_southeast",
                   "Brazil_Election_2018_Sampled_dec0.3_prob0.1_removed_centerwest"]
 
brazil_datasets = ["Brazil_Election_2018_Sampled_dec0.3_prob0.1",
                   "Brazil_Election_2018_Sampled_dec0.3_prob0.2",
                   "Brazil_Election_2018_Sampled_dec0.3_prob0.3",
                   "Brazil_Election_2018_Sampled_dec0.3_prob0.4",
                   "Brazil_Election_2018_Sampled_dec0.3_prob0.5",
                   "Brazil_Election_2018_Sampled_dec0.3_prob0.6",
                   "Brazil_Election_2018_Sampled_dec0.3_prob0.7",
                   "Brazil_Election_2018_Sampled_dec0.3_prob0.8",
                   "Brazil_Election_2018_Sampled_dec0.3_prob0.9",
                  ]
brazil_geoeconomical_regions = ["Brazil_Election_2018_removed_AMAZONIA",
                                "Brazil_Election_2018_removed_NORDESTE",
                                "Brazil_Election_2018_removed_CENTRO_SUL"]
us_datasets = ["US_Corn_Yield_2016", "US_Wheat_Yield_2014"]

jsut_for_test = ["Brazil_Election_2018_Sampled_dec0.3_prob0.2"]

## 6 - Runs the pipeline for Optmistic approach
OBS: The results and files generated from the pipeline execution will be in the created folder Results in the data directory

In [None]:

fs_method = "CFS"
ml_methods  = ["KNN", "OLS", "Lasso", "Ridge", "ElasticNet", "DT", "LGBM", "RF", "MLP", "SVM"]
#ml_methods = ["KNN"]
dataset_list = brazil_datasets

if fs_method == "CFS" and switchers["fs"]:
    jvm.start()

env_var["root_path"] = "/exp/tpinho/Datasets/"
for dataset in dataset_list:
    # Load the data
    changing_switchers = switchers
    data_path = os.path.join(env_var["root_path"], dataset, "data.csv")
    data = pd.read_csv(data_path, index_col="INDEX", low_memory=False)
    try:
        data.drop(columns=["[GEO]_LATITUDE", "[GEO]_LONGITUDE"], inplace=True)
    except KeyError:
        pass
    for ml_method in ml_methods:
        Optimistic = Pipeline(
            root_path=os.path.join(env_var["root_path"], dataset),
            data=data,
            index_col="INDEX",
            fold_col="INDEX_FOLDS",
            target_col="TARGET",
            scv_method="Optimistic",
            fs_method=fs_method,
            ml_method=ml_method,
            switchers=changing_switchers
        )
        print(f"Running the Optimistic approach for dataset: {dataset} ML Method = {ml_method}")
        Optimistic.run()
        #changing_switchers["scv"] = False
        #changing_switchers["fs"] = False
if fs_method == "CFS" and switchers["fs"]:
    jvm.stop()

Running the Optimistic approach for dataset: Brazil_Election_2018_Sampled_dec0.3_prob0.1 ML Method = KNN


Training model: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 27/27 [00:12<00:00,  2.10it/s]
Predicting test set: 100%|████████████████████████████████████████████████████████████████████████████████████████████| 27/27 [00:00<00:00, 28.30it/s]


Running the Optimistic approach for dataset: Brazil_Election_2018_Sampled_dec0.3_prob0.1 ML Method = OLS


Training model: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 27/27 [00:02<00:00,  9.04it/s]
Predicting test set: 100%|████████████████████████████████████████████████████████████████████████████████████████████| 27/27 [00:03<00:00,  7.57it/s]


Running the Optimistic approach for dataset: Brazil_Election_2018_Sampled_dec0.3_prob0.1 ML Method = Lasso


Training model: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 27/27 [00:12<00:00,  2.16it/s]
Predicting test set: 100%|████████████████████████████████████████████████████████████████████████████████████████████| 27/27 [00:04<00:00,  6.38it/s]


Running the Optimistic approach for dataset: Brazil_Election_2018_Sampled_dec0.3_prob0.1 ML Method = Ridge


Training model: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 27/27 [00:05<00:00,  4.65it/s]
Predicting test set: 100%|████████████████████████████████████████████████████████████████████████████████████████████| 27/27 [00:03<00:00,  6.84it/s]


Running the Optimistic approach for dataset: Brazil_Election_2018_Sampled_dec0.3_prob0.1 ML Method = ElasticNet


Training model: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 27/27 [00:05<00:00,  5.22it/s]
Predicting test set: 100%|████████████████████████████████████████████████████████████████████████████████████████████| 27/27 [00:03<00:00,  7.49it/s]


Running the Optimistic approach for dataset: Brazil_Election_2018_Sampled_dec0.3_prob0.1 ML Method = DT


Training model: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 27/27 [00:18<00:00,  1.48it/s]
Predicting test set: 100%|████████████████████████████████████████████████████████████████████████████████████████████| 27/27 [00:00<00:00, 60.36it/s]


Running the Optimistic approach for dataset: Brazil_Election_2018_Sampled_dec0.3_prob0.1 ML Method = LGBM


Training model: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 27/27 [00:18<00:00,  1.43it/s]
Predicting test set: 100%|████████████████████████████████████████████████████████████████████████████████████████████| 27/27 [00:00<00:00, 49.48it/s]


Running the Optimistic approach for dataset: Brazil_Election_2018_Sampled_dec0.3_prob0.1 ML Method = RF


Training model:  74%|███████████████████████████████████████████████████████████████████████▊                         | 20/27 [23:11<08:07, 69.58s/it]
