In [33]:
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
import os
import json
from tqdm import tqdm
from src import utils

In [34]:
# Project path
project_dir = os.path.abspath('')[:-5]
# Load enviromental variables
env_var = utils.load_env_variables(project_dir)

In [35]:
datasets_single = ["Brazil_Election_2018_Sampled_dec0.3_prob0.2"]

datasets = [      "Brazil_Election_2018_Sampled_dec0.3_prob0.2",
                  "Brazil_Election_2018_Sampled_dec0.3_prob0.4",
                  "Brazil_Election_2018_Sampled_dec0.3_prob0.6",
                  "Brazil_Election_2018_Sampled_dec0.3_prob0.8",
                ]

scv_methods = ["Optimistic",
               "RegGBSCV_R_Kappa_0.0",
               "RegGBSCV_R_Kappa_0.2",  
               "RegGBSCV_R_Kappa_0.4",
               "RegGBSCV_R_Kappa_0.6", 
               "RegGBSCV_R_Kappa_0.8",
               "TraditionalSCV"]

In [36]:
fs_method = "CFS"
ml_methods = ["KNN_1", "OLS_1", "Lasso_1", "Ridge_1", "DT_1", "LGBM_1", "RF_1", "MLP_1", "SVM_1"]
data_id = "INDEX"

for dataset_folder in datasets:
    rmse_mean = {}
    print(f"Dataset: {dataset_folder}")
    dataset_path = os.path.join(env_var["root_path"], dataset_folder)
    scv_methods = list(os.listdir(os.path.join(dataset_path, "results")))
    #rmse_mean["Dataset"] = rmse_mean["Dataset"] + [dataset_folder] if rmse_mean.get("Dataset") else [dataset_folder]
    #scv_methods = ["RegGBSCV_R_Kappa_2.0"]
    for ml_method in tqdm(ml_methods):
        rmse_mean["Method"] = rmse_mean["Method"] + [ml_method] if rmse_mean.get("Method") else [ml_method]
        for scv_method in scv_methods:
            try:
                predictions_path = os.path.join(dataset_path, "results", scv_method, "predictions", fs_method, f"{ml_method}")
                predi_files = [os.path.join(predictions_path,c) for c in os.listdir(predictions_path)]
                predictions = pd.DataFrame()
                for file in predi_files:
                    pred_fold = pd.read_csv(file)
                    predictions = pd.concat([predictions, pred_fold])
                predictions["fold"] = predictions[data_id].apply(lambda cod: str(cod)[:2])
                predictions["error"] = (predictions["PREDICTIONS"] - predictions["GROUND_TRUTH"])**2
                predictions = predictions.groupby(by="fold").agg("mean")
                rmse_mean[f"{scv_method}"] = rmse_mean[f"{scv_method}"] +[predictions["error"].mean()] if rmse_mean.get(f"{scv_method}") else [predictions["error"].mean()]
                #rmse_mean[f"{scv_method}_std"] = rmse_mean[f"{scv_method}_std"] +[predictions["error"].std()] if rmse_mean.get(f"{scv_method}_std") else [predictions["error"].std()]
            except FileNotFoundError:
                pass
    pd.DataFrame(rmse_mean).to_csv(os.path.join(env_var["root_path"], dataset_folder, "comparison", "RMSE.csv"), index=False)

pd.DataFrame(rmse_mean)



Dataset: Brazil_Election_2018_Sampled_dec0.3_prob0.2


100%|██████████| 9/9 [00:05<00:00,  1.60it/s]


Dataset: Brazil_Election_2018_Sampled_dec0.3_prob0.4


100%|██████████| 9/9 [00:05<00:00,  1.62it/s]


Dataset: Brazil_Election_2018_Sampled_dec0.3_prob0.6


100%|██████████| 9/9 [00:06<00:00,  1.46it/s]


Dataset: Brazil_Election_2018_Sampled_dec0.3_prob0.8


100%|██████████| 9/9 [00:04<00:00,  1.82it/s]


Unnamed: 0,Method,Optimistic,RegGBSCV_R_Kappa_0.0,RegGBSCV_R_Kappa_0.2,RegGBSCV_R_Kappa_0.4,RegGBSCV_R_Kappa_0.6,RegGBSCV_R_Kappa_0.8,TraditionalSCV
0,KNN_1,263.456477,271.636821,273.538868,293.44203,301.125847,297.005325,471.171435
1,OLS_1,249.951111,271.686678,294.214713,408.082587,2154.965634,315.959657,2034.177643
2,Lasso_1,155.732538,162.479904,163.095783,177.011006,184.853304,182.58575,368.882354
3,Ridge_1,170.73958,164.292328,165.93698,182.819937,193.379822,188.699588,391.528515
4,DT_1,264.713089,261.196946,257.407169,280.582013,287.695285,270.102943,448.030854
5,LGBM_1,150.553607,165.717798,169.823072,170.738295,166.988706,191.421851,349.618321
6,RF_1,162.631692,169.522102,172.168981,174.46653,176.686161,184.97283,366.744847
7,MLP_1,289.599042,262.179108,259.971753,275.598672,309.481866,300.855127,468.202798
8,SVM_1,266.047462,263.590308,265.290629,276.431099,282.307888,275.112234,473.058276
