# Execute the Graph-Based Spatial Cross-Validation experiments from ICMLA21 Paper

## 1 - Initialize libraries

In [1]:
import os
from pathlib import Path
import pandas as pd
from weka.core import jvm
from src import utils
from src.pipeline import Pipeline
from src.visualization.performance import VizMetrics
#from src.visualization.dependence import VizDependence

## 2 - Initialize loggers

In [2]:
utils.initialize_coloredlog()
utils.initialize_rich_tracerback()
utils.initialize_logging()

## 3 - Initialize working path and enviromental variables

In [3]:
# Project path
project_dir = str(Path().resolve().parents[1])
# Load enviromental variables
env_var = utils.load_env_variables(project_dir)

## 4 - Load the data

In [4]:
# Set paths
data_path = os.path.join(env_var["root_path"], "Brazil_Election_2018", "Original", "data.csv")
adj_path = os.path.join(env_var["root_path"], "Brazil_Election_2018", "Original", "queen_matrix.csv")
# Load data
data = pd.read_csv(data_path, index_col="INDEX", low_memory=False)
#data.drop(columns=["[GEO]_LATITUDE", "[GEO]_LONGITUDE"])
adj_matrix = pd.read_csv(adj_path, low_memory=False)
# Set adjacency matrix index
adj_matrix.set_index(adj_matrix.columns[0], inplace=True)

## 5 - Set pipeline switchers, the default is to set True to all processes

In [5]:
# Set pipeline switchers
SWITCHERS = {
    "scv": False,
    "fs": False,
    "train": False,
    "predict": False,
    "evaluate": True,
}

## 6 - Runs the pipeline for each method
OBS: The results and files generated from the pipeline execution will be in the created folder Results in the data directory

### 6.3 RBuffer

In [6]:
ml_methods = ["OLS", "Lasso"]
fs_method = "CFS_Local_Meta_All_OLS"
data = data[data["INDEX_FOLDS"] != 53]
if fs_method == "CFS" and SWITCHERS["fs"]:
    jvm.start()
for ml_method in ml_methods:
    RBuffer = Pipeline(
        root_path=os.path.join(env_var["root_path"], "Brazil_Election_2018", "Original"),
        data=data,
        adj_matrix=adj_matrix,
        index_col="INDEX",
        fold_col="INDEX_FOLDS",
        target_col="TARGET",
        scv_method="RBuffer",
        run_selection=False,
        kappa=20,
        fs_method=fs_method,
        ml_method=ml_method,
        paper=True,
        switchers=SWITCHERS
    )

    print(f"Running the RBuffer approach for {ml_method}")
    RBuffer.run()
if fs_method == "CFS" and SWITCHERS["fs"]:
    jvm.stop()

Running the RBuffer approach for OLS


Evaluating predictions: 100%|██████████| 26/26 [02:51<00:00,  6.60s/it]
