# Execute the Graph-Based Spatial Cross-Validation experiments from ICMLA21 Paper

## 1 - Initialize libraries

In [7]:
import os
import pandas as pd
from weka.core import jvm
from src import utils
from src.pipeline import Pipeline
from src.visualization.performance import VizMetrics
from src.visualization.dependence import VizDependence
os.system("taskset -p 0xff %d" % os.getpid())

atual máscara de afinidade do pid 23709: ff
nova máscara de afinidade do pid 23709: ff


0

## 2 - Initialize loggers

In [8]:
utils.initialize_coloredlog()
utils.initialize_rich_tracerback()
utils.initialize_logging()

## 3 - Initialize working path and enviromental variables

In [9]:
# Project path
project_dir = os.path.abspath('')[:-5]
# Load enviromental variables
env_var = utils.load_env_variables(project_dir)

## 4 - Set pipeline switchers, the default is to set True to all processes

In [10]:
# Set pipeline switchers
switchers = {
    "scv": False,
    "fs": False,
    "train": True,
    "predict": False,
    "evaluate": False,
}

# 5 - List all datasets

In [11]:
brazil_removed_datasets = ["Brazil_Election_2018_Sampled_dec0.3_prob0.1_removed_north",
                   "Brazil_Election_2018_Sampled_dec0.3_prob0.1_removed_northeast",
                   "Brazil_Election_2018_Sampled_dec0.3_prob0.1_removed_south",
                   "Brazil_Election_2018_Sampled_dec0.3_prob0.1_removed_southeast",
                   "Brazil_Election_2018_Sampled_dec0.3_prob0.1_removed_centerwest"]
 
brazil_datasets = ["Brazil_Election_2018_Sampled_dec0.3_prob0.1",
                   "Brazil_Election_2018_Sampled_dec0.3_prob0.2",
                   "Brazil_Election_2018_Sampled_dec0.3_prob0.3",
                   "Brazil_Election_2018_Sampled_dec0.3_prob0.4",
                   "Brazil_Election_2018_Sampled_dec0.3_prob0.5",
                   "Brazil_Election_2018_Sampled_dec0.3_prob0.6",
                   "Brazil_Election_2018_Sampled_dec0.3_prob0.7",
                   "Brazil_Election_2018_Sampled_dec0.3_prob0.8",
                   "Brazil_Election_2018_Sampled_dec0.3_prob0.9",
                  ]
brazil_geoeconomical_regions = ["Brazil_Election_2018_removed_AMAZONIA",
                                "Brazil_Election_2018_removed_NORDESTE",
                                "Brazil_Election_2018_removed_CENTRO_SUL"]

us_corn_datasets = ["US_Corn_Yield_2016_Removed_Northeast",
                    "US_Corn_Yield_2016_Removed_Southeast",
                    "US_Corn_Yield_2016_Removed_Midwest",
                    "US_Corn_Yield_2016_Removed_Southwest",
                    "US_Corn_Yield_2016_Removed_West"]

jsut_for_test = ["Original"]

## 6 - Runs the pipeline for Optmistic approach
OBS: The results and files generated from the pipeline execution will be in the created folder Results in the data directory

In [12]:

fs_method = "CFS_Local"
ml_methods  = ["MLP", "SVM"]
#ml_methods = ["KNN"]
dataset_list = jsut_for_test

if "CFS" in fs_method and switchers["fs"]:
    print("entrou")
    jvm.start()

env_var["root_path"] = "/home/tpinho/IJGIS/Datasets/Brazil_Election_2018"
for dataset in dataset_list:
    # Load the data
    changing_switchers = switchers
    data_path = os.path.join(env_var["root_path"], dataset, "data.csv")
    data = pd.read_csv(data_path, index_col="INDEX", low_memory=False)
    data = data[data["INDEX_FOLDS"] != 53]
    try:
        data.drop(columns=["[GEO]_LATITUDE", "[GEO]_LONGITUDE"], inplace=True)
    except KeyError:
        pass
    for ml_method in ml_methods:
        Optimistic = Pipeline(
            root_path=os.path.join(env_var["root_path"], dataset),
            data=data,
            index_col="INDEX",
            fold_col="INDEX_FOLDS",
            target_col="TARGET",
            scv_method="Optimistic",
            fs_method=fs_method,
            ml_method=ml_method,
            switchers=changing_switchers
        )
        print(f"Running the Optimistic approach for dataset: {dataset} ML Method = {ml_method}")
        Optimistic.run()
        #changing_switchers["scv"] = False
        #changing_switchers["fs"] = False
if "CFS" in fs_method and switchers["fs"]:
    jvm.stop()

Running the Optimistic approach for dataset: Original ML Method = MLP


Training model: 100%|██████████| 27/27 [8:33:11<00:00, 1140.44s/it]  


Running the Optimistic approach for dataset: Original ML Method = SVM


Training model: 100%|██████████| 27/27 [55:27<00:00, 123.23s/it]
