# Execute the Graph-Based Spatial Cross-Validation experiments from ICMLA21 Paper

## 1 - Initialize libraries

In [1]:
import os
import pandas as pd
from src import utils
from src.pipeline import Pipeline
from src.visualization.performance import VizMetrics
from src.visualization.dependence import VizDependence

## 2 - Initialize loggers

In [2]:
utils.initialize_coloredlog()
utils.initialize_rich_tracerback()
utils.initialize_logging()

## 3 - Initialize working path and enviromental variables

In [3]:
# Project path
project_dir = os.path.abspath('')[:-5]
# Load enviromental variables
env_var = utils.load_env_variables(project_dir)

## 4 - Load the data

In [4]:
# Set paths
data_path = os.path.join(env_var["root_path"], "data.csv")
adj_path = os.path.join(env_var["root_path"], "queen_matrix.csv")
# Load data
data = pd.read_csv(data_path, index_col="INDEX", low_memory=False)
#data.drop(columns=["[GEO]_LATITUDE", "[GEO]_LONGITUDE"])
adj_matrix = pd.read_csv(adj_path, low_memory=False)
# Set adjacency matrix index
adj_matrix.set_index(adj_matrix.columns[0], inplace=True)

## 5 - Set pipeline switchers, the default is to set True to all processes

In [5]:
# Set pipeline switchers
SWITCHERS = {
    "scv": True,
    "fs": True,
    "train": True,
    "predict": True,
    "evaluate": True,
}

## 6 - Runs the pipeline for each method
OBS: The results and files generated from the pipeline execution will be in the created folder Results in the data directory

### 6.2 SRBuffer

In [None]:
SRBuffer = Pipeline(
    root_path=env_var["root_path"],
    data=data,
    adj_matrix=adj_matrix,
    index_col="INDEX",
    fold_col="INDEX_FOLDS",
    target_col="TARGET",
    scv_method="SRBuffer",
    run_selection=True,
    kappa=20,
    fs_method="CFS",
    ml_method="LGBM",
    paper=True,
    switchers=SWITCHERS
)

print("Running the SRBuffer approach...")
SRBuffer.run()
