# Execute the Graph-Based Spatial Cross-Validation experiments from ICMLA21 Paper

## 1 - Initialize libraries

In [13]:
import os
import pandas as pd
from weka.core import jvm
from src import utils
from src.pipeline import Pipeline
from src.visualization.performance import VizMetrics
from src.visualization.dependence import VizDependence

## 2 - Initialize loggers

In [14]:
utils.initialize_coloredlog()
utils.initialize_rich_tracerback()
utils.initialize_logging()

## 3 - Initialize working path and enviromental variables

In [15]:
# Project path
project_dir = os.path.abspath('')[:-5]
# Load enviromental variables
env_var = utils.load_env_variables(project_dir)

## 4 - Set pipeline switchers, the default is to set True to all processes

In [16]:
# Set pipeline switchers
switchers = {
    "scv": False,
    "fs": True,
    "train": False,
    "predict": False,
    "evaluate": False,
}

# 5 - List all datasets

In [17]:
brazil_removed_datasets = ["Brazil_Election_2018_Sampled_dec0.3_prob0.1_removed_north",
                   "Brazil_Election_2018_Sampled_dec0.3_prob0.1_removed_northeast",
                   "Brazil_Election_2018_Sampled_dec0.3_prob0.1_removed_south",
                   "Brazil_Election_2018_Sampled_dec0.3_prob0.1_removed_southeast",
                   "Brazil_Election_2018_Sampled_dec0.3_prob0.1_removed_centerwest"]
 
brazil_datasets = ["Brazil_Election_2018_Sampled_dec0.3_prob0.1",
                   "Brazil_Election_2018_Sampled_dec0.3_prob0.2",
                   "Brazil_Election_2018_Sampled_dec0.3_prob0.3",
                   "Brazil_Election_2018_Sampled_dec0.3_prob0.4",
                   "Brazil_Election_2018_Sampled_dec0.3_prob0.5",
                   "Brazil_Election_2018_Sampled_dec0.3_prob0.6",
                   "Brazil_Election_2018_Sampled_dec0.3_prob0.7",
                   "Brazil_Election_2018_Sampled_dec0.3_prob0.8",
                   "Brazil_Election_2018_Sampled_dec0.3_prob0.9",
                  ]

us_corn_datasets = ["US_Corn_Yield_2016_Removed_ALABAMA",
                    "US_Corn_Yield_2016_Removed_ARKANSAS",
                    "US_Corn_Yield_2016_Removed_CALIFORNIA",
                    "US_Corn_Yield_2016_Removed_COLORADO",
                    "US_Corn_Yield_2016_Removed_DELAWARE",
                    "US_Corn_Yield_2016_Removed_GEORGIA",
                    "US_Corn_Yield_2016_Removed_IDAHO",
                    "US_Corn_Yield_2016_Removed_ILLINOIS",
                    "US_Corn_Yield_2016_Removed_INDIANA",
                    "US_Corn_Yield_2016_Removed_IOWA",
                    "US_Corn_Yield_2016_Removed_KANSAS",
                    "US_Corn_Yield_2016_Removed_KENTUCKY",
                    "US_Corn_Yield_2016_Removed_LOUISIANA",
                    "US_Corn_Yield_2016_Removed_MARYLAND",
                    "US_Corn_Yield_2016_Removed_MICHIGAN",
                    "US_Corn_Yield_2016_Removed_MINNESOTA",
                    "US_Corn_Yield_2016_Removed_MISSISSIPPI",
                    "US_Corn_Yield_2016_Removed_MISSOURI",
                    "US_Corn_Yield_2016_Removed_MONTANA",
                    "US_Corn_Yield_2016_Removed_NEBRASKA",
                    "US_Corn_Yield_2016_Removed_NEW JERSEY",
                    "US_Corn_Yield_2016_Removed_NEW MEXICO",
                    "US_Corn_Yield_2016_Removed_NEW YORK",
                    "US_Corn_Yield_2016_Removed_NORTH CAROLINA",
                    "US_Corn_Yield_2016_Removed_NORTH DAKOTA",
                    "US_Corn_Yield_2016_Removed_OHIO",
                    "US_Corn_Yield_2016_Removed_OKLAHOMA",
                    "US_Corn_Yield_2016_Removed_PENNSYLVANIA",
                    "US_Corn_Yield_2016_Removed_SOUTH CAROLINA",
                    "US_Corn_Yield_2016_Removed_SOUTH DAKOTA",
                    "US_Corn_Yield_2016_Removed_TENNESSEE",
                    "US_Corn_Yield_2016_Removed_TEXAS",
                    "US_Corn_Yield_2016_Removed_VIRGINIA",
                    "US_Corn_Yield_2016_Removed_WEST VIRGINIA",
                    "US_Corn_Yield_2016_Removed_WISCONSIN",
                    "US_Corn_Yield_2016_Removed_WYOMING"]

jsut_for_test = ["Brazil_Election_2018"]

## 6 - Runs the pipeline for Optmistic approach
OBS: The results and files generated from the pipeline execution will be in the created folder Results in the data directory

In [18]:

fs_method = "All"
#ml_methods  = ["KNN", "OLS", "Lasso", "Ridge", "ElasticNet", "DT", "LGBM", "RF", "MLP", "SVM"]
ml_methods = ["KNN"]
dataset_list = us_corn_datasets

if fs_method == "CFS" and switchers["fs"]:
    jvm.start()

env_var["root_path"] = "/exp/tpinho/Datasets/US_Corn_Yield_2016"
for dataset in dataset_list:
    # Load the data
    changing_switchers = switchers
    data_path = os.path.join(env_var["root_path"], dataset, "data.csv")
    data = pd.read_csv(data_path, index_col="INDEX", low_memory=False)
    try:
        data.drop(columns=["[GEO]_LATITUDE", "[GEO]_LONGITUDE"], inplace=True)
    except KeyError:
        pass
    for ml_method in ml_methods:
        CrossValidation= Pipeline(
            root_path=os.path.join(env_var["root_path"], dataset),
            data=data,
            index_col="INDEX",
            fold_col="INDEX_FOLDS",
            target_col="TARGET",
            scv_method="CrossValidation",
            fs_method=fs_method,
            ml_method=ml_method,
            switchers=changing_switchers
        )
        print(f"Running the Cross-Valdiation approach for dataset: {dataset} ML Method = {ml_method}")
        CrossValidation.run()
        #changing_switchers["scv"] = False
        #changing_switchers["fs"] = False
if fs_method == "CFS" and switchers["fs"]:
    jvm.stop()

Running the Cross-Valdiation approach for dataset: US_Corn_Yield_2016_Removed_ALABAMA ML Method = KNN


Selecting Features: 100%|██████████| 10/10 [00:00<00:00, 172.01it/s]


Running the Cross-Valdiation approach for dataset: US_Corn_Yield_2016_Removed_ARKANSAS ML Method = KNN


Selecting Features: 100%|██████████| 10/10 [00:00<00:00, 111.97it/s]


Running the Cross-Valdiation approach for dataset: US_Corn_Yield_2016_Removed_CALIFORNIA ML Method = KNN


Selecting Features: 100%|██████████| 10/10 [00:00<00:00, 142.46it/s]


Running the Cross-Valdiation approach for dataset: US_Corn_Yield_2016_Removed_COLORADO ML Method = KNN


Selecting Features: 100%|██████████| 10/10 [00:00<00:00, 147.29it/s]


Running the Cross-Valdiation approach for dataset: US_Corn_Yield_2016_Removed_DELAWARE ML Method = KNN


Selecting Features: 100%|██████████| 10/10 [00:00<00:00, 170.30it/s]


Running the Cross-Valdiation approach for dataset: US_Corn_Yield_2016_Removed_GEORGIA ML Method = KNN


Selecting Features: 100%|██████████| 10/10 [00:00<00:00, 165.55it/s]


Running the Cross-Valdiation approach for dataset: US_Corn_Yield_2016_Removed_IDAHO ML Method = KNN


Selecting Features: 100%|██████████| 10/10 [00:00<00:00, 141.41it/s]


Running the Cross-Valdiation approach for dataset: US_Corn_Yield_2016_Removed_ILLINOIS ML Method = KNN


Selecting Features: 100%|██████████| 10/10 [00:00<00:00, 161.84it/s]


Running the Cross-Valdiation approach for dataset: US_Corn_Yield_2016_Removed_INDIANA ML Method = KNN


Selecting Features: 100%|██████████| 10/10 [00:00<00:00, 165.86it/s]


Running the Cross-Valdiation approach for dataset: US_Corn_Yield_2016_Removed_IOWA ML Method = KNN


Selecting Features: 100%|██████████| 10/10 [00:00<00:00, 160.46it/s]


Running the Cross-Valdiation approach for dataset: US_Corn_Yield_2016_Removed_KANSAS ML Method = KNN


Selecting Features: 100%|██████████| 10/10 [00:00<00:00, 139.49it/s]


Running the Cross-Valdiation approach for dataset: US_Corn_Yield_2016_Removed_KENTUCKY ML Method = KNN


Selecting Features: 100%|██████████| 10/10 [00:00<00:00, 140.54it/s]


Running the Cross-Valdiation approach for dataset: US_Corn_Yield_2016_Removed_LOUISIANA ML Method = KNN


Selecting Features: 100%|██████████| 10/10 [00:00<00:00, 141.65it/s]


Running the Cross-Valdiation approach for dataset: US_Corn_Yield_2016_Removed_MARYLAND ML Method = KNN


Selecting Features: 100%|██████████| 10/10 [00:00<00:00, 142.24it/s]


Running the Cross-Valdiation approach for dataset: US_Corn_Yield_2016_Removed_MICHIGAN ML Method = KNN


Selecting Features: 100%|██████████| 10/10 [00:00<00:00, 135.94it/s]


Running the Cross-Valdiation approach for dataset: US_Corn_Yield_2016_Removed_MINNESOTA ML Method = KNN


Selecting Features: 100%|██████████| 10/10 [00:00<00:00, 143.31it/s]


Running the Cross-Valdiation approach for dataset: US_Corn_Yield_2016_Removed_MISSISSIPPI ML Method = KNN


Selecting Features: 100%|██████████| 10/10 [00:00<00:00, 145.72it/s]


Running the Cross-Valdiation approach for dataset: US_Corn_Yield_2016_Removed_MISSOURI ML Method = KNN


Selecting Features: 100%|██████████| 10/10 [00:00<00:00, 136.62it/s]


Running the Cross-Valdiation approach for dataset: US_Corn_Yield_2016_Removed_MONTANA ML Method = KNN


Selecting Features: 100%|██████████| 10/10 [00:00<00:00, 152.75it/s]


Running the Cross-Valdiation approach for dataset: US_Corn_Yield_2016_Removed_NEBRASKA ML Method = KNN


Selecting Features: 100%|██████████| 10/10 [00:00<00:00, 137.82it/s]


Running the Cross-Valdiation approach for dataset: US_Corn_Yield_2016_Removed_NEW JERSEY ML Method = KNN


Selecting Features: 100%|██████████| 10/10 [00:00<00:00, 159.45it/s]


Running the Cross-Valdiation approach for dataset: US_Corn_Yield_2016_Removed_NEW MEXICO ML Method = KNN


Selecting Features: 100%|██████████| 10/10 [00:00<00:00, 185.08it/s]


Running the Cross-Valdiation approach for dataset: US_Corn_Yield_2016_Removed_NEW YORK ML Method = KNN


Selecting Features: 100%|██████████| 10/10 [00:00<00:00, 147.96it/s]


Running the Cross-Valdiation approach for dataset: US_Corn_Yield_2016_Removed_NORTH CAROLINA ML Method = KNN


Selecting Features: 100%|██████████| 10/10 [00:00<00:00, 160.00it/s]


Running the Cross-Valdiation approach for dataset: US_Corn_Yield_2016_Removed_NORTH DAKOTA ML Method = KNN


Selecting Features: 100%|██████████| 10/10 [00:00<00:00, 177.01it/s]


Running the Cross-Valdiation approach for dataset: US_Corn_Yield_2016_Removed_OHIO ML Method = KNN


Selecting Features: 100%|██████████| 10/10 [00:00<00:00, 128.47it/s]


Running the Cross-Valdiation approach for dataset: US_Corn_Yield_2016_Removed_OKLAHOMA ML Method = KNN


Selecting Features: 100%|██████████| 10/10 [00:00<00:00, 129.34it/s]


Running the Cross-Valdiation approach for dataset: US_Corn_Yield_2016_Removed_PENNSYLVANIA ML Method = KNN


Selecting Features: 100%|██████████| 10/10 [00:00<00:00, 146.14it/s]


Running the Cross-Valdiation approach for dataset: US_Corn_Yield_2016_Removed_SOUTH CAROLINA ML Method = KNN


Selecting Features: 100%|██████████| 10/10 [00:00<00:00, 136.22it/s]


Running the Cross-Valdiation approach for dataset: US_Corn_Yield_2016_Removed_SOUTH DAKOTA ML Method = KNN


Selecting Features: 100%|██████████| 10/10 [00:00<00:00, 143.13it/s]


Running the Cross-Valdiation approach for dataset: US_Corn_Yield_2016_Removed_TENNESSEE ML Method = KNN


Selecting Features: 100%|██████████| 10/10 [00:00<00:00, 140.46it/s]


Running the Cross-Valdiation approach for dataset: US_Corn_Yield_2016_Removed_TEXAS ML Method = KNN


Selecting Features: 100%|██████████| 10/10 [00:00<00:00, 141.10it/s]


Running the Cross-Valdiation approach for dataset: US_Corn_Yield_2016_Removed_VIRGINIA ML Method = KNN


Selecting Features: 100%|██████████| 10/10 [00:00<00:00, 133.40it/s]


Running the Cross-Valdiation approach for dataset: US_Corn_Yield_2016_Removed_WEST VIRGINIA ML Method = KNN


Selecting Features: 100%|██████████| 10/10 [00:00<00:00, 142.31it/s]


Running the Cross-Valdiation approach for dataset: US_Corn_Yield_2016_Removed_WISCONSIN ML Method = KNN


Selecting Features: 100%|██████████| 10/10 [00:00<00:00, 142.32it/s]


Running the Cross-Valdiation approach for dataset: US_Corn_Yield_2016_Removed_WYOMING ML Method = KNN


Selecting Features: 100%|██████████| 10/10 [00:00<00:00, 132.68it/s]
