In [16]:
import contextlib
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
import os
import json
from tqdm import tqdm
from src import utils

In [17]:
def _mkdir(root_path, folder_name):
    """Creates a folder at current path"""
    # logger = logging.getLogger(self.logger_name)
    cur_dir = os.path.join(root_path, folder_name)
    with contextlib.suppress(FileExistsError):
        os.mkdir(cur_dir)
        # logger.info(f"Entering folder: /{folder_name}")
def _make_folders(root_path, folders):
        """Make the initial folders"""
        for folder in folders:
            _mkdir(root_path, folder)
            root_path = os.path.join(root_path, folder)
        return root_path

In [18]:
# Project path
project_dir = os.path.abspath('')[:-9]
# Load enviromental variables

env_var = utils.load_env_variables(project_dir)
env_var["root_path"] = "/exp/tpinho/Datasets/US_Corn_Yield_2016"

In [19]:
datasets_single = ["Brazil_Election_2018"]
few_methods = ["CrossValidation", "Optimistic", "TraditionalSCV"]

datasets = ["Brazil_Election_2018_Sampled_dec0.3_prob0.1",      
            "Brazil_Election_2018_Sampled_dec0.3_prob0.2",
            "Brazil_Election_2018_Sampled_dec0.3_prob0.3",
            "Brazil_Election_2018_Sampled_dec0.3_prob0.4",
            "Brazil_Election_2018_Sampled_dec0.3_prob0.5",
            "Brazil_Election_2018_Sampled_dec0.3_prob0.6",
            "Brazil_Election_2018_Sampled_dec0.3_prob0.7",
            "Brazil_Election_2018_Sampled_dec0.3_prob0.8",
            "Brazil_Election_2018_Sampled_dec0.3_prob0.9",
            ]

us_corn_datasets = ["US_Corn_Yield_2016_Removed_ALABAMA",
                "US_Corn_Yield_2016_Removed_ARKANSAS",
                "US_Corn_Yield_2016_Removed_CALIFORNIA",
                "US_Corn_Yield_2016_Removed_COLORADO",
                "US_Corn_Yield_2016_Removed_DELAWARE",
                "US_Corn_Yield_2016_Removed_GEORGIA",
                "US_Corn_Yield_2016_Removed_IDAHO",
                "US_Corn_Yield_2016_Removed_ILLINOIS",
                "US_Corn_Yield_2016_Removed_INDIANA",
                "US_Corn_Yield_2016_Removed_IOWA",
                "US_Corn_Yield_2016_Removed_KANSAS",
                "US_Corn_Yield_2016_Removed_KENTUCKY",
                "US_Corn_Yield_2016_Removed_LOUISIANA",
                "US_Corn_Yield_2016_Removed_MARYLAND",
                "US_Corn_Yield_2016_Removed_MICHIGAN",
                "US_Corn_Yield_2016_Removed_MINNESOTA",
                "US_Corn_Yield_2016_Removed_MISSISSIPPI",
                "US_Corn_Yield_2016_Removed_MISSOURI",
                "US_Corn_Yield_2016_Removed_MONTANA",
                "US_Corn_Yield_2016_Removed_NEBRASKA",
                "US_Corn_Yield_2016_Removed_NEW JERSEY",
                "US_Corn_Yield_2016_Removed_NEW MEXICO",
                "US_Corn_Yield_2016_Removed_NEW YORK",
                "US_Corn_Yield_2016_Removed_NORTH CAROLINA",
                "US_Corn_Yield_2016_Removed_NORTH DAKOTA",
                "US_Corn_Yield_2016_Removed_OHIO",
                "US_Corn_Yield_2016_Removed_OKLAHOMA",
                "US_Corn_Yield_2016_Removed_PENNSYLVANIA",
                "US_Corn_Yield_2016_Removed_SOUTH CAROLINA",
                "US_Corn_Yield_2016_Removed_SOUTH DAKOTA",
                "US_Corn_Yield_2016_Removed_TENNESSEE",
                "US_Corn_Yield_2016_Removed_TEXAS",
                "US_Corn_Yield_2016_Removed_VIRGINIA",
                "US_Corn_Yield_2016_Removed_WEST VIRGINIA",
                "US_Corn_Yield_2016_Removed_WISCONSIN",
                "US_Corn_Yield_2016_Removed_WYOMING"]
                
us_corn_datasets = ["US_Corn_Yield_2016_Removed_Northeast",
                    "US_Corn_Yield_2016_Removed_Southeast",
                    "US_Corn_Yield_2016_Removed_Midwest",
                    "US_Corn_Yield_2016_Removed_Southwest",
                    "US_Corn_Yield_2016_Removed_West"]


scv_methods = ["CrossValidation",
               "Optimistic",
               "RegGBSCV_R_Kappa_0.0",
               "RegGBSCV_R_Kappa_0.1",
               "RegGBSCV_R_Kappa_0.2",
               "RegGBSCV_R_Kappa_0.3",  
               "RegGBSCV_R_Kappa_0.4",
               "RegGBSCV_R_Kappa_0.5",
               "RegGBSCV_R_Kappa_0.6",
               "RegGBSCV_R_Kappa_0.7", 
               "RegGBSCV_R_Kappa_0.8",
               "RegGBSCV_R_Kappa_0.9",
               "RegGBSCV_R_Kappa_1.0",
               "TraditionalSCV"]

In [20]:
fs_method = "All"
ml_methods = ["KNN", "OLS", "Lasso", "Ridge", "ElasticNet", "DT", "LGBM", "RF", "MLP", "SVM"]
data_id = "INDEX"

for dataset_folder in us_corn_datasets:
    rmse_mean = {}
    print(f"Dataset: {dataset_folder}")
    dataset_path = os.path.join(env_var["root_path"], dataset_folder)
    #scv_methods = list(os.listdir(os.path.join(dataset_path, "results")))
    #rmse_mean["Dataset"] = rmse_mean["Dataset"] + [dataset_folder] if rmse_mean.get("Dataset") else [dataset_folder]
    #scv_methods = ["RegGBSCV_R_Kappa_2.0"]
    for ml_method in tqdm(ml_methods):
        rmse_mean["Method"] = rmse_mean["Method"] + [ml_method] if rmse_mean.get("Method") else [ml_method]
        for scv_method in scv_methods:
            predictions_path = os.path.join(dataset_path, "results", scv_method, "predictions", fs_method, f"{ml_method}")
            predi_files = [os.path.join(predictions_path,c) for c in os.listdir(predictions_path)]
            predictions = pd.DataFrame()
            for file in predi_files:
                pred_fold = pd.read_csv(file)
                predictions = pd.concat([predictions, pred_fold])
            predictions["fold"] = predictions[data_id].apply(lambda cod: str(cod)[:2])
            predictions["error"] = (predictions["PREDICTIONS"] - predictions["GROUND_TRUTH"])**2
            predictions = predictions.groupby(by="fold").agg("mean")
            rmse_mean[f"{scv_method}"] = rmse_mean[f"{scv_method}"] +[predictions["error"].mean()] if rmse_mean.get(f"{scv_method}") else [predictions["error"].mean()]
            #rmse_mean[f"{scv_method}_std"] = rmse_mean[f"{scv_method}_std"] +[predictions["error"].std()] if rmse_mean.get(f"{scv_method}_std") else [predictions["error"].std()]
    _make_folders(os.path.join(env_var["root_path"], dataset_folder), ["comparison"])
    df_rmse = pd.DataFrame(rmse_mean)
    df_rmse.set_index("Method", inplace=True)

    df_rmse.to_csv(os.path.join(env_var["root_path"], dataset_folder, "comparison", "RMSE.csv"))
    df_rmse.rank().to_csv(os.path.join(env_var["root_path"], dataset_folder, "comparison", "Rank.csv"))

pd.DataFrame(rmse_mean).rank()



Dataset: US_Corn_Yield_2016_Removed_ALABAMA


100%|██████████| 10/10 [00:13<00:00,  1.34s/it]


Dataset: US_Corn_Yield_2016_Removed_ARKANSAS


100%|██████████| 10/10 [00:19<00:00,  1.96s/it]


Dataset: US_Corn_Yield_2016_Removed_CALIFORNIA


100%|██████████| 10/10 [00:19<00:00,  1.91s/it]


Dataset: US_Corn_Yield_2016_Removed_COLORADO


100%|██████████| 10/10 [00:17<00:00,  1.72s/it]


Dataset: US_Corn_Yield_2016_Removed_DELAWARE


100%|██████████| 10/10 [00:19<00:00,  1.94s/it]


Dataset: US_Corn_Yield_2016_Removed_GEORGIA


100%|██████████| 10/10 [00:18<00:00,  1.89s/it]


Dataset: US_Corn_Yield_2016_Removed_IDAHO


100%|██████████| 10/10 [00:20<00:00,  2.03s/it]


Dataset: US_Corn_Yield_2016_Removed_ILLINOIS


100%|██████████| 10/10 [00:17<00:00,  1.78s/it]


Dataset: US_Corn_Yield_2016_Removed_INDIANA


100%|██████████| 10/10 [00:18<00:00,  1.82s/it]


Dataset: US_Corn_Yield_2016_Removed_IOWA


100%|██████████| 10/10 [00:17<00:00,  1.79s/it]


Dataset: US_Corn_Yield_2016_Removed_KANSAS


100%|██████████| 10/10 [00:17<00:00,  1.77s/it]


Dataset: US_Corn_Yield_2016_Removed_KENTUCKY


100%|██████████| 10/10 [00:18<00:00,  1.88s/it]


Dataset: US_Corn_Yield_2016_Removed_LOUISIANA


100%|██████████| 10/10 [00:21<00:00,  2.11s/it]


Dataset: US_Corn_Yield_2016_Removed_MARYLAND


100%|██████████| 10/10 [00:18<00:00,  1.83s/it]


Dataset: US_Corn_Yield_2016_Removed_MICHIGAN


100%|██████████| 10/10 [00:18<00:00,  1.83s/it]


Dataset: US_Corn_Yield_2016_Removed_MINNESOTA


100%|██████████| 10/10 [00:18<00:00,  1.84s/it]


Dataset: US_Corn_Yield_2016_Removed_MISSISSIPPI


100%|██████████| 10/10 [00:18<00:00,  1.85s/it]


Dataset: US_Corn_Yield_2016_Removed_MISSOURI


100%|██████████| 10/10 [00:18<00:00,  1.84s/it]


Dataset: US_Corn_Yield_2016_Removed_MONTANA


100%|██████████| 10/10 [00:17<00:00,  1.72s/it]


Dataset: US_Corn_Yield_2016_Removed_NEBRASKA


100%|██████████| 10/10 [00:17<00:00,  1.70s/it]


Dataset: US_Corn_Yield_2016_Removed_NEW JERSEY


100%|██████████| 10/10 [00:17<00:00,  1.70s/it]


Dataset: US_Corn_Yield_2016_Removed_NEW MEXICO


100%|██████████| 10/10 [00:18<00:00,  1.80s/it]


Dataset: US_Corn_Yield_2016_Removed_NEW YORK


100%|██████████| 10/10 [00:17<00:00,  1.77s/it]


Dataset: US_Corn_Yield_2016_Removed_NORTH CAROLINA


100%|██████████| 10/10 [00:17<00:00,  1.77s/it]


Dataset: US_Corn_Yield_2016_Removed_NORTH DAKOTA


100%|██████████| 10/10 [00:18<00:00,  1.82s/it]


Dataset: US_Corn_Yield_2016_Removed_OHIO


100%|██████████| 10/10 [00:16<00:00,  1.68s/it]


Dataset: US_Corn_Yield_2016_Removed_OKLAHOMA


100%|██████████| 10/10 [00:19<00:00,  1.90s/it]


Dataset: US_Corn_Yield_2016_Removed_PENNSYLVANIA


100%|██████████| 10/10 [00:18<00:00,  1.81s/it]


Dataset: US_Corn_Yield_2016_Removed_SOUTH CAROLINA


100%|██████████| 10/10 [00:17<00:00,  1.75s/it]


Dataset: US_Corn_Yield_2016_Removed_SOUTH DAKOTA


100%|██████████| 10/10 [00:16<00:00,  1.67s/it]


Dataset: US_Corn_Yield_2016_Removed_TENNESSEE


100%|██████████| 10/10 [00:16<00:00,  1.69s/it]


Dataset: US_Corn_Yield_2016_Removed_TEXAS


100%|██████████| 10/10 [00:16<00:00,  1.70s/it]


Dataset: US_Corn_Yield_2016_Removed_VIRGINIA


100%|██████████| 10/10 [00:18<00:00,  1.84s/it]


Dataset: US_Corn_Yield_2016_Removed_WEST VIRGINIA


100%|██████████| 10/10 [00:16<00:00,  1.67s/it]


Dataset: US_Corn_Yield_2016_Removed_WISCONSIN


100%|██████████| 10/10 [00:18<00:00,  1.81s/it]


Dataset: US_Corn_Yield_2016_Removed_WYOMING


100%|██████████| 10/10 [00:17<00:00,  1.71s/it]


Unnamed: 0,Method,CrossValidation,Optimistic,RegGBSCV_R_Kappa_0.0,RegGBSCV_R_Kappa_0.1,RegGBSCV_R_Kappa_0.2,RegGBSCV_R_Kappa_0.3,RegGBSCV_R_Kappa_0.4,RegGBSCV_R_Kappa_0.5,RegGBSCV_R_Kappa_0.6,RegGBSCV_R_Kappa_0.7,RegGBSCV_R_Kappa_0.8,RegGBSCV_R_Kappa_0.9,RegGBSCV_R_Kappa_1.0,TraditionalSCV
0,3.0,6.0,4.0,3.0,3.0,3.0,3.0,3.0,2.0,3.0,2.0,1.0,4.0,3.0,4.0
1,7.0,5.0,6.0,9.0,9.0,9.0,9.0,9.0,10.0,10.0,10.0,10.0,10.0,10.0,8.0
2,5.0,9.0,8.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,2.0,3.0,4.0,6.0
3,9.0,4.0,7.0,10.0,10.0,10.0,10.0,10.0,9.0,9.0,9.0,9.0,9.0,9.0,7.0
4,2.0,7.0,5.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,5.0,5.0,5.0,5.0
5,1.0,8.0,9.0,5.0,5.0,8.0,8.0,8.0,8.0,7.0,8.0,7.0,6.0,6.0,9.0
6,4.0,1.0,1.0,2.0,1.0,1.0,2.0,1.0,1.0,1.0,1.0,3.0,1.0,2.0,1.0
7,8.0,2.0,2.0,1.0,2.0,2.0,1.0,2.0,3.0,2.0,3.0,4.0,2.0,1.0,2.0
8,6.0,10.0,10.0,7.0,7.0,5.0,5.0,5.0,5.0,5.0,5.0,6.0,7.0,8.0,10.0
9,10.0,3.0,3.0,8.0,8.0,7.0,7.0,7.0,7.0,8.0,7.0,8.0,8.0,7.0,3.0
