# Execute the Graph-Based Spatial Cross-Validation experiments from ICMLA21 Paper

## 1 - Initialize libraries

In [7]:
import os
import pandas as pd
from weka.core import jvm
from pathlib import Path
from src import utils
from src.pipeline import Pipeline
from src.visualization.performance import VizMetrics
from src.visualization.dependence import VizDependence

## 2 - Initialize loggers

In [8]:
utils.initialize_coloredlog()
utils.initialize_rich_tracerback()
utils.initialize_logging()

## 3 - Initialize working path and enviromental variables

In [9]:
# Project path
project_dir = str(Path().resolve().parents[1])
# Load enviromental variables
env_var = utils.load_env_variables(project_dir)
# Load parameters
dataset = "Brazil_Election_2018"
parameters = utils.load_json(os.path.join(project_dir, "parameters", "validation", f"{dataset}.json"))

## 4 - Set pipeline switchers, the default is to set True to all processes

In [10]:
# Set pipeline switchers
switchers = {
    "scv": False,
    "fs": True,
    "train": False,
    "predict": False,
    "evaluate": False,
}

# 5 - List all datasets

In [11]:
dataset_path = os.path.join(env_var["root_path"], dataset)
dataset_list = [folder for folder in os.listdir(dataset_path) if os.path.isdir(os.path.join(dataset_path, folder))]
dataset_list.remove("Original")
dataset_list.sort()

## 6 - Runs the pipeline for Optmistic approach
OBS: The results and files generated from the pipeline execution will be in the created folder Results in the data directory

In [12]:
fs_method = parameters["fs_method"]
ml_methods = parameters["ml_methods"]
if "CFS" in fs_method and switchers["fs"]:
    jvm.start()

for dataset in dataset_list:
    # Load the data
    data_path = os.path.join(dataset_path, dataset, "data.csv")
    data = pd.read_csv(data_path, index_col="INDEX", low_memory=False)
    if parameters["cols_remove"]:
        data.drop(columns=parameters["cols_remove"], inplace=True)
    for ml_method in ml_methods:
        Optimistic = Pipeline(
            root_path=os.path.join(dataset_path, dataset),
            data=data,
            index_col=parameters["index_col"],
            fold_col=parameters["fold_col"],
            target_col=parameters["target_col"],
            scv_method="Optimistic",
            fs_method=fs_method,
            ml_method=ml_method,
            switchers=switchers,
            cols_remove=parameters["cols_remove"]
        )
        print(f"Running the Optimistic approach for dataset: {dataset} ML Method = {ml_method}")
        Optimistic.run()
if "CFS" in fs_method and switchers["fs"]:
    jvm.stop()

Running the Optimistic approach for dataset: Brazil_Election_2018_Sampled_dec0.3_prob0.1 ML Method = KNN


Predicting test set: 100%|██████████| 27/27 [00:01<00:00, 16.43it/s]


Running the Optimistic approach for dataset: Brazil_Election_2018_Sampled_dec0.3_prob0.1 ML Method = OLS


Predicting test set: 100%|██████████| 27/27 [00:00<00:00, 85.36it/s]


Running the Optimistic approach for dataset: Brazil_Election_2018_Sampled_dec0.3_prob0.1 ML Method = Lasso


Predicting test set: 100%|██████████| 27/27 [00:00<00:00, 86.20it/s]


Running the Optimistic approach for dataset: Brazil_Election_2018_Sampled_dec0.3_prob0.1 ML Method = Ridge


Predicting test set: 100%|██████████| 27/27 [00:00<00:00, 80.35it/s]


Running the Optimistic approach for dataset: Brazil_Election_2018_Sampled_dec0.3_prob0.1 ML Method = ElasticNet


Predicting test set: 100%|██████████| 27/27 [00:00<00:00, 86.24it/s]


Running the Optimistic approach for dataset: Brazil_Election_2018_Sampled_dec0.3_prob0.1 ML Method = DT


Predicting test set: 100%|██████████| 27/27 [00:00<00:00, 68.55it/s]


Running the Optimistic approach for dataset: Brazil_Election_2018_Sampled_dec0.3_prob0.1 ML Method = LGBM


Predicting test set: 100%|██████████| 27/27 [00:00<00:00, 57.87it/s]


Running the Optimistic approach for dataset: Brazil_Election_2018_Sampled_dec0.3_prob0.1 ML Method = RF


Predicting test set: 100%|██████████| 27/27 [00:11<00:00,  2.45it/s]


Running the Optimistic approach for dataset: Brazil_Election_2018_Sampled_dec0.3_prob0.1 ML Method = MLP


Predicting test set: 100%|██████████| 27/27 [00:00<00:00, 48.15it/s]


Running the Optimistic approach for dataset: Brazil_Election_2018_Sampled_dec0.3_prob0.1 ML Method = SVM


Predicting test set: 100%|██████████| 27/27 [00:03<00:00,  7.50it/s]


Running the Optimistic approach for dataset: Brazil_Election_2018_Sampled_dec0.3_prob0.2 ML Method = KNN


Predicting test set: 100%|██████████| 27/27 [00:00<00:00, 27.81it/s]


Running the Optimistic approach for dataset: Brazil_Election_2018_Sampled_dec0.3_prob0.2 ML Method = OLS


Predicting test set: 100%|██████████| 27/27 [00:00<00:00, 90.22it/s]


Running the Optimistic approach for dataset: Brazil_Election_2018_Sampled_dec0.3_prob0.2 ML Method = Lasso


Predicting test set: 100%|██████████| 27/27 [00:00<00:00, 93.89it/s]


Running the Optimistic approach for dataset: Brazil_Election_2018_Sampled_dec0.3_prob0.2 ML Method = Ridge


Predicting test set: 100%|██████████| 27/27 [00:00<00:00, 94.21it/s]


Running the Optimistic approach for dataset: Brazil_Election_2018_Sampled_dec0.3_prob0.2 ML Method = ElasticNet


Predicting test set: 100%|██████████| 27/27 [00:00<00:00, 94.61it/s]


Running the Optimistic approach for dataset: Brazil_Election_2018_Sampled_dec0.3_prob0.2 ML Method = DT


Predicting test set: 100%|██████████| 27/27 [00:00<00:00, 80.33it/s]


Running the Optimistic approach for dataset: Brazil_Election_2018_Sampled_dec0.3_prob0.2 ML Method = LGBM


Predicting test set: 100%|██████████| 27/27 [00:00<00:00, 55.86it/s]


Running the Optimistic approach for dataset: Brazil_Election_2018_Sampled_dec0.3_prob0.2 ML Method = RF


Predicting test set: 100%|██████████| 27/27 [00:08<00:00,  3.33it/s]


Running the Optimistic approach for dataset: Brazil_Election_2018_Sampled_dec0.3_prob0.2 ML Method = MLP


Predicting test set: 100%|██████████| 27/27 [00:00<00:00, 31.28it/s]


Running the Optimistic approach for dataset: Brazil_Election_2018_Sampled_dec0.3_prob0.2 ML Method = SVM


Predicting test set: 100%|██████████| 27/27 [00:02<00:00, 13.24it/s]


Running the Optimistic approach for dataset: Brazil_Election_2018_Sampled_dec0.3_prob0.3 ML Method = KNN


Predicting test set: 100%|██████████| 27/27 [00:00<00:00, 34.62it/s]


Running the Optimistic approach for dataset: Brazil_Election_2018_Sampled_dec0.3_prob0.3 ML Method = OLS


Predicting test set: 100%|██████████| 27/27 [00:00<00:00, 96.69it/s]


Running the Optimistic approach for dataset: Brazil_Election_2018_Sampled_dec0.3_prob0.3 ML Method = Lasso


Predicting test set: 100%|██████████| 27/27 [00:00<00:00, 93.95it/s]


Running the Optimistic approach for dataset: Brazil_Election_2018_Sampled_dec0.3_prob0.3 ML Method = Ridge


Predicting test set: 100%|██████████| 27/27 [00:00<00:00, 102.33it/s]


Running the Optimistic approach for dataset: Brazil_Election_2018_Sampled_dec0.3_prob0.3 ML Method = ElasticNet


Predicting test set: 100%|██████████| 27/27 [00:00<00:00, 102.81it/s]


Running the Optimistic approach for dataset: Brazil_Election_2018_Sampled_dec0.3_prob0.3 ML Method = DT


Predicting test set: 100%|██████████| 27/27 [00:00<00:00, 76.85it/s]


Running the Optimistic approach for dataset: Brazil_Election_2018_Sampled_dec0.3_prob0.3 ML Method = LGBM


Predicting test set: 100%|██████████| 27/27 [00:00<00:00, 65.73it/s]


Running the Optimistic approach for dataset: Brazil_Election_2018_Sampled_dec0.3_prob0.3 ML Method = RF


Predicting test set: 100%|██████████| 27/27 [00:06<00:00,  3.95it/s]


Running the Optimistic approach for dataset: Brazil_Election_2018_Sampled_dec0.3_prob0.3 ML Method = MLP


Predicting test set: 100%|██████████| 27/27 [00:00<00:00, 40.08it/s]


Running the Optimistic approach for dataset: Brazil_Election_2018_Sampled_dec0.3_prob0.3 ML Method = SVM


Predicting test set: 100%|██████████| 27/27 [00:01<00:00, 18.78it/s]


Running the Optimistic approach for dataset: Brazil_Election_2018_Sampled_dec0.3_prob0.4 ML Method = KNN


Predicting test set: 100%|██████████| 27/27 [00:00<00:00, 43.88it/s]


Running the Optimistic approach for dataset: Brazil_Election_2018_Sampled_dec0.3_prob0.4 ML Method = OLS


Predicting test set: 100%|██████████| 27/27 [00:00<00:00, 106.66it/s]


Running the Optimistic approach for dataset: Brazil_Election_2018_Sampled_dec0.3_prob0.4 ML Method = Lasso


Predicting test set: 100%|██████████| 27/27 [00:00<00:00, 112.59it/s]


Running the Optimistic approach for dataset: Brazil_Election_2018_Sampled_dec0.3_prob0.4 ML Method = Ridge


Predicting test set: 100%|██████████| 27/27 [00:00<00:00, 116.69it/s]


Running the Optimistic approach for dataset: Brazil_Election_2018_Sampled_dec0.3_prob0.4 ML Method = ElasticNet


Predicting test set: 100%|██████████| 27/27 [00:00<00:00, 113.25it/s]


Running the Optimistic approach for dataset: Brazil_Election_2018_Sampled_dec0.3_prob0.4 ML Method = DT


Predicting test set: 100%|██████████| 27/27 [00:00<00:00, 103.99it/s]


Running the Optimistic approach for dataset: Brazil_Election_2018_Sampled_dec0.3_prob0.4 ML Method = LGBM


Predicting test set: 100%|██████████| 27/27 [00:00<00:00, 69.16it/s]


Running the Optimistic approach for dataset: Brazil_Election_2018_Sampled_dec0.3_prob0.4 ML Method = RF


Predicting test set: 100%|██████████| 27/27 [00:05<00:00,  4.95it/s]


Running the Optimistic approach for dataset: Brazil_Election_2018_Sampled_dec0.3_prob0.4 ML Method = MLP


Predicting test set: 100%|██████████| 27/27 [00:00<00:00, 33.47it/s]


Running the Optimistic approach for dataset: Brazil_Election_2018_Sampled_dec0.3_prob0.4 ML Method = SVM


Predicting test set: 100%|██████████| 27/27 [00:00<00:00, 36.33it/s]


Running the Optimistic approach for dataset: Brazil_Election_2018_Sampled_dec0.3_prob0.5 ML Method = KNN


Predicting test set: 100%|██████████| 27/27 [00:00<00:00, 62.50it/s]


Running the Optimistic approach for dataset: Brazil_Election_2018_Sampled_dec0.3_prob0.5 ML Method = OLS


Predicting test set: 100%|██████████| 27/27 [00:00<00:00, 102.54it/s]


Running the Optimistic approach for dataset: Brazil_Election_2018_Sampled_dec0.3_prob0.5 ML Method = Lasso


Predicting test set: 100%|██████████| 27/27 [00:00<00:00, 114.79it/s]


Running the Optimistic approach for dataset: Brazil_Election_2018_Sampled_dec0.3_prob0.5 ML Method = Ridge


Predicting test set: 100%|██████████| 27/27 [00:00<00:00, 125.84it/s]


Running the Optimistic approach for dataset: Brazil_Election_2018_Sampled_dec0.3_prob0.5 ML Method = ElasticNet


Predicting test set: 100%|██████████| 27/27 [00:00<00:00, 128.18it/s]


Running the Optimistic approach for dataset: Brazil_Election_2018_Sampled_dec0.3_prob0.5 ML Method = DT


Predicting test set: 100%|██████████| 27/27 [00:00<00:00, 117.65it/s]


Running the Optimistic approach for dataset: Brazil_Election_2018_Sampled_dec0.3_prob0.5 ML Method = LGBM


Predicting test set: 100%|██████████| 27/27 [00:00<00:00, 94.57it/s]


Running the Optimistic approach for dataset: Brazil_Election_2018_Sampled_dec0.3_prob0.5 ML Method = RF


Predicting test set: 100%|██████████| 27/27 [00:03<00:00,  7.09it/s]


Running the Optimistic approach for dataset: Brazil_Election_2018_Sampled_dec0.3_prob0.5 ML Method = MLP


Predicting test set: 100%|██████████| 27/27 [00:00<00:00, 28.52it/s]


Running the Optimistic approach for dataset: Brazil_Election_2018_Sampled_dec0.3_prob0.5 ML Method = SVM


Predicting test set: 100%|██████████| 27/27 [00:00<00:00, 73.77it/s]


Running the Optimistic approach for dataset: Brazil_Election_2018_Sampled_dec0.3_prob0.6 ML Method = KNN


Predicting test set: 100%|██████████| 27/27 [00:00<00:00, 71.78it/s]


Running the Optimistic approach for dataset: Brazil_Election_2018_Sampled_dec0.3_prob0.6 ML Method = OLS


Predicting test set: 100%|██████████| 27/27 [00:00<00:00, 123.25it/s]


Running the Optimistic approach for dataset: Brazil_Election_2018_Sampled_dec0.3_prob0.6 ML Method = Lasso


Predicting test set: 100%|██████████| 27/27 [00:00<00:00, 128.22it/s]


Running the Optimistic approach for dataset: Brazil_Election_2018_Sampled_dec0.3_prob0.6 ML Method = Ridge


Predicting test set: 100%|██████████| 27/27 [00:00<00:00, 120.20it/s]


Running the Optimistic approach for dataset: Brazil_Election_2018_Sampled_dec0.3_prob0.6 ML Method = ElasticNet


Predicting test set: 100%|██████████| 27/27 [00:00<00:00, 107.29it/s]


Running the Optimistic approach for dataset: Brazil_Election_2018_Sampled_dec0.3_prob0.6 ML Method = DT


Predicting test set: 100%|██████████| 27/27 [00:00<00:00, 107.17it/s]


Running the Optimistic approach for dataset: Brazil_Election_2018_Sampled_dec0.3_prob0.6 ML Method = LGBM


Predicting test set: 100%|██████████| 27/27 [00:00<00:00, 89.60it/s]


Running the Optimistic approach for dataset: Brazil_Election_2018_Sampled_dec0.3_prob0.6 ML Method = RF


Predicting test set: 100%|██████████| 27/27 [00:04<00:00,  6.51it/s]


Running the Optimistic approach for dataset: Brazil_Election_2018_Sampled_dec0.3_prob0.6 ML Method = MLP


Predicting test set: 100%|██████████| 27/27 [00:01<00:00, 25.54it/s]


Running the Optimistic approach for dataset: Brazil_Election_2018_Sampled_dec0.3_prob0.6 ML Method = SVM


Predicting test set: 100%|██████████| 27/27 [00:00<00:00, 61.97it/s]


Running the Optimistic approach for dataset: Brazil_Election_2018_Sampled_dec0.3_prob0.7 ML Method = KNN


Predicting test set: 100%|██████████| 27/27 [00:00<00:00, 76.93it/s]


Running the Optimistic approach for dataset: Brazil_Election_2018_Sampled_dec0.3_prob0.7 ML Method = OLS


Predicting test set: 100%|██████████| 27/27 [00:00<00:00, 112.58it/s]


Running the Optimistic approach for dataset: Brazil_Election_2018_Sampled_dec0.3_prob0.7 ML Method = Lasso


Predicting test set: 100%|██████████| 27/27 [00:00<00:00, 121.06it/s]


Running the Optimistic approach for dataset: Brazil_Election_2018_Sampled_dec0.3_prob0.7 ML Method = Ridge


Predicting test set: 100%|██████████| 27/27 [00:00<00:00, 115.19it/s]


Running the Optimistic approach for dataset: Brazil_Election_2018_Sampled_dec0.3_prob0.7 ML Method = ElasticNet


Predicting test set: 100%|██████████| 27/27 [00:00<00:00, 99.77it/s]


Running the Optimistic approach for dataset: Brazil_Election_2018_Sampled_dec0.3_prob0.7 ML Method = DT


Predicting test set: 100%|██████████| 27/27 [00:00<00:00, 115.29it/s]


Running the Optimistic approach for dataset: Brazil_Election_2018_Sampled_dec0.3_prob0.7 ML Method = LGBM


Predicting test set: 100%|██████████| 27/27 [00:00<00:00, 70.64it/s]


Running the Optimistic approach for dataset: Brazil_Election_2018_Sampled_dec0.3_prob0.7 ML Method = RF


Predicting test set: 100%|██████████| 27/27 [00:04<00:00,  6.50it/s]


Running the Optimistic approach for dataset: Brazil_Election_2018_Sampled_dec0.3_prob0.7 ML Method = MLP


Predicting test set: 100%|██████████| 27/27 [00:00<00:00, 28.31it/s]


Running the Optimistic approach for dataset: Brazil_Election_2018_Sampled_dec0.3_prob0.7 ML Method = SVM


Predicting test set: 100%|██████████| 27/27 [00:00<00:00, 66.68it/s]


Running the Optimistic approach for dataset: Brazil_Election_2018_Sampled_dec0.3_prob0.8 ML Method = KNN


Predicting test set: 100%|██████████| 27/27 [00:00<00:00, 80.59it/s]


Running the Optimistic approach for dataset: Brazil_Election_2018_Sampled_dec0.3_prob0.8 ML Method = OLS


Predicting test set: 100%|██████████| 27/27 [00:00<00:00, 131.56it/s]


Running the Optimistic approach for dataset: Brazil_Election_2018_Sampled_dec0.3_prob0.8 ML Method = Lasso


Predicting test set: 100%|██████████| 27/27 [00:00<00:00, 124.51it/s]


Running the Optimistic approach for dataset: Brazil_Election_2018_Sampled_dec0.3_prob0.8 ML Method = Ridge


Predicting test set: 100%|██████████| 27/27 [00:00<00:00, 130.27it/s]


Running the Optimistic approach for dataset: Brazil_Election_2018_Sampled_dec0.3_prob0.8 ML Method = ElasticNet


Predicting test set: 100%|██████████| 27/27 [00:00<00:00, 130.62it/s]


Running the Optimistic approach for dataset: Brazil_Election_2018_Sampled_dec0.3_prob0.8 ML Method = DT


Predicting test set: 100%|██████████| 27/27 [00:00<00:00, 125.91it/s]


Running the Optimistic approach for dataset: Brazil_Election_2018_Sampled_dec0.3_prob0.8 ML Method = LGBM


Predicting test set: 100%|██████████| 27/27 [00:00<00:00, 87.63it/s]


Running the Optimistic approach for dataset: Brazil_Election_2018_Sampled_dec0.3_prob0.8 ML Method = RF


Predicting test set: 100%|██████████| 27/27 [00:03<00:00,  8.46it/s]


Running the Optimistic approach for dataset: Brazil_Election_2018_Sampled_dec0.3_prob0.8 ML Method = MLP


Predicting test set: 100%|██████████| 27/27 [00:00<00:00, 48.32it/s]


Running the Optimistic approach for dataset: Brazil_Election_2018_Sampled_dec0.3_prob0.8 ML Method = SVM


Predicting test set: 100%|██████████| 27/27 [00:00<00:00, 112.43it/s]


Running the Optimistic approach for dataset: Brazil_Election_2018_Sampled_dec0.3_prob0.9 ML Method = KNN


Predicting test set: 100%|██████████| 27/27 [00:00<00:00, 78.13it/s]


Running the Optimistic approach for dataset: Brazil_Election_2018_Sampled_dec0.3_prob0.9 ML Method = OLS


Predicting test set: 100%|██████████| 27/27 [00:00<00:00, 136.58it/s]


Running the Optimistic approach for dataset: Brazil_Election_2018_Sampled_dec0.3_prob0.9 ML Method = Lasso


Predicting test set: 100%|██████████| 27/27 [00:00<00:00, 130.76it/s]


Running the Optimistic approach for dataset: Brazil_Election_2018_Sampled_dec0.3_prob0.9 ML Method = Ridge


Predicting test set: 100%|██████████| 27/27 [00:00<00:00, 141.42it/s]


Running the Optimistic approach for dataset: Brazil_Election_2018_Sampled_dec0.3_prob0.9 ML Method = ElasticNet


Predicting test set: 100%|██████████| 27/27 [00:00<00:00, 128.32it/s]


Running the Optimistic approach for dataset: Brazil_Election_2018_Sampled_dec0.3_prob0.9 ML Method = DT


Predicting test set: 100%|██████████| 27/27 [00:00<00:00, 124.89it/s]


Running the Optimistic approach for dataset: Brazil_Election_2018_Sampled_dec0.3_prob0.9 ML Method = LGBM


Predicting test set: 100%|██████████| 27/27 [00:00<00:00, 70.68it/s]


Running the Optimistic approach for dataset: Brazil_Election_2018_Sampled_dec0.3_prob0.9 ML Method = RF


Predicting test set: 100%|██████████| 27/27 [00:02<00:00,  9.09it/s]


Running the Optimistic approach for dataset: Brazil_Election_2018_Sampled_dec0.3_prob0.9 ML Method = MLP


Predicting test set: 100%|██████████| 27/27 [00:00<00:00, 46.81it/s]


Running the Optimistic approach for dataset: Brazil_Election_2018_Sampled_dec0.3_prob0.9 ML Method = SVM


Predicting test set: 100%|██████████| 27/27 [00:00<00:00, 73.42it/s]
