# Execute the Graph-Based Spatial Cross-Validation experiments from ICMLA21 Paper

## 1 - Initialize libraries

In [1]:
import os
import pandas as pd
from weka.core import jvm
from src import utils
from src.pipeline import Pipeline
from src.visualization.performance import VizMetrics
from src.visualization.dependence import VizDependence

You can install them with  `pip install urbanaccess pandana` or `conda install -c udst pandana urbanaccess`
  warn(


## 2 - Initialize loggers

In [2]:
utils.initialize_coloredlog()
utils.initialize_rich_tracerback()
utils.initialize_logging()

## 3 - Initialize working path and enviromental variables

In [3]:
# Project path
project_dir = os.path.abspath('')[:-5]
# Load enviromental variables
env_var = utils.load_env_variables(project_dir)

## 4 - Set pipeline switchers, the default is to set True to all processes

In [4]:
# Set pipeline switchers
switchers = {
    "scv": True,
    "fs": False,
    "train": False,
    "predict": False,
    "evaluate": False,
}

# 5 - List all datasets

In [5]:
brazil_removed_datasets = ["Brazil_Election_2018_Sampled_dec0.3_prob0.1_removed_north",
                   "Brazil_Election_2018_Sampled_dec0.3_prob0.1_removed_northeast",
                   "Brazil_Election_2018_Sampled_dec0.3_prob0.1_removed_south",
                   "Brazil_Election_2018_Sampled_dec0.3_prob0.1_removed_southeast",
                   "Brazil_Election_2018_Sampled_dec0.3_prob0.1_removed_centerwest"]
 
brazil_datasets = ["Brazil_Election_2018_Sampled_dec0.3_prob0.1",
                   "Brazil_Election_2018_Sampled_dec0.3_prob0.2",
                   "Brazil_Election_2018_Sampled_dec0.3_prob0.3",
                   "Brazil_Election_2018_Sampled_dec0.3_prob0.4",
                   "Brazil_Election_2018_Sampled_dec0.3_prob0.5",
                   "Brazil_Election_2018_Sampled_dec0.3_prob0.6",
                   "Brazil_Election_2018_Sampled_dec0.3_prob0.7",
                   "Brazil_Election_2018_Sampled_dec0.3_prob0.8",
                   "Brazil_Election_2018_Sampled_dec0.3_prob0.9",
                  ]
brazil_geoeconomical_regions = ["Brazil_Election_2018_removed_AMAZONIA",
                                "Brazil_Election_2018_removed_NORDESTE",
                                "Brazil_Election_2018_removed_CENTRO_SUL"]
us_datasets = ["US_Corn_Yield_2016", "US_Wheat_Yield_2014"]

jsut_for_test = ["Brazil_Election_2018_Sampled_dec0.3_prob0.2"]

## 6 - Runs the pipeline for Optmistic approach
OBS: The results and files generated from the pipeline execution will be in the created folder Results in the data directory

In [6]:

fs_method = "CFS"
#ml_methods  = ["KNN", "OLS", "Lasso", "Ridge", "ElasticNet", "DT", "LGBM", "RF", "MLP", "SVM"]
ml_methods = ["KNN"]
dataset_list = brazil_datasets

if fs_method == "CFS" and switchers["fs"]:
    jvm.start()

env_var["root_path"] = "/exp/tpinho/Datasets/"
for dataset in dataset_list:
    # Load the data
    changing_switchers = switchers
    data_path = os.path.join(env_var["root_path"], dataset, "data.csv")
    data = pd.read_csv(data_path, index_col="INDEX", low_memory=False)
    try:
        data.drop(columns=["[GEO]_LATITUDE", "[GEO]_LONGITUDE"], inplace=True)
    except KeyError:
        pass
    for ml_method in ml_methods:
        CrossValidation= Pipeline(
            root_path=os.path.join(env_var["root_path"], dataset),
            data=data,
            index_col="INDEX",
            fold_col="INDEX_FOLDS",
            target_col="TARGET",
            scv_method="CrossValidation",
            fs_method=fs_method,
            ml_method=ml_method,
            switchers=changing_switchers
        )
        print(f"Running the Cross-Valdiation approach for dataset: {dataset} ML Method = {ml_method}")
        CrossValidation.run()
        #changing_switchers["scv"] = False
        #changing_switchers["fs"] = False
if fs_method == "CFS" and switchers["fs"]:
    jvm.stop()

Running the Cross-Valdiation approach for dataset: Brazil_Election_2018_Sampled_dec0.3_prob0.1 ML Method = KNN
Execution time: 3.0849246978759766 seconds
Running the Cross-Valdiation approach for dataset: Brazil_Election_2018_Sampled_dec0.3_prob0.2 ML Method = KNN
Execution time: 1.6807022094726562 seconds
Running the Cross-Valdiation approach for dataset: Brazil_Election_2018_Sampled_dec0.3_prob0.3 ML Method = KNN
Execution time: 1.1494312286376953 seconds
Running the Cross-Valdiation approach for dataset: Brazil_Election_2018_Sampled_dec0.3_prob0.4 ML Method = KNN
Execution time: 0.6052441596984863 seconds
Running the Cross-Valdiation approach for dataset: Brazil_Election_2018_Sampled_dec0.3_prob0.5 ML Method = KNN
Execution time: 0.4496133327484131 seconds
Running the Cross-Valdiation approach for dataset: Brazil_Election_2018_Sampled_dec0.3_prob0.6 ML Method = KNN
Execution time: 0.38397741317749023 seconds
Running the Cross-Valdiation approach for dataset: Brazil_Election_2018_Sam