### ACTUAL

In [None]:
import os
import json

def delete_model_jsons(model_name):
    """
    Deletes JSON files for the specified model name across all indicators and countries.
    Replaces spaces in country names with underscores.

    Args:
        model_name (str): The name of the model (e.g., "XGBoost", "Prophet").

    Returns:
        None
    """
    # Load country names and indicators
    with open("../countries.json", "r") as f:
        country_names = json.load(f)
    with open("../indicators.json", "r") as f:
        indicators = json.load(f)

    # Define the base path for the parameter files
    base_dir = "../best_params"

    # Iterate over all indicators and countries to delete JSON files
    for indicator in indicators.keys():
        for country in country_names.keys():
            # Replace spaces in the country name with underscores

            # Construct the file path
            json_file_path = os.path.join(base_dir, indicator, f"{model_name}_{country}.json")
            
            # Check if the file exists and delete it
            if os.path.exists(json_file_path):
                try:
                    os.remove(json_file_path)
                    print(f"Deleted: {json_file_path}")
                except Exception as e:
                    print(f"Error deleting {json_file_path}: {e}")
            else:
                pass

# Example usage:
#delete_model_jsons("ARIMA")


In [None]:
# https://medium.com/@sandha.iitr/tuning-arima-for-forecasting-an-easy-approach-in-python-5f40d55184c4

In [None]:
import os
import json
import pandas as pd
import numpy as np
import xgboost as xgb
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_squared_error
from sklearn.feature_selection import RFE
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt

def save_plot(train, test_y, predictions, country, indicator, model_name):
    """Function to save the plot in both Indicators and Countries folders."""

    model_colors = {
    "ARIMA": "blue",
    "Holt_Winters": "yellow",
    "LSTM": "black",
    "XGBoost": "pink",
    "Prophet": "brown"
}

    # Plotting predicted vs actual
    plt.figure(figsize=(10, 6))
    if model_name == "Prophet":
        plt.plot(train['ds'], train['y'], label='Train Data', color='green', linestyle='--')
        plt.plot(test_y['ds'], test_y['y'], label='Actual', color='red', linestyle='--')
        plt.plot(test_y['ds'], predictions, label=f'Predicted({model_name})', color=f'{model_colors["Prophet"]}', 
                 linestyle='-', marker='o')
    else:
        plt.plot(train.index, train, label='Train Data', color='green', linestyle='--')
        plt.plot(test_y.index, test_y, label='Actual', color='red', linestyle='--')
        plt.plot(test_y.index, predictions, label=f'Predicted({model_name})', color=f'{model_colors[model_name]}', 
                 linestyle='-', marker='o')
    

    
    plt.title(f'Predicted({model_name}) vs Actual for {country} - {indicator}')
    plt.xlabel('Year')
    plt.ylabel('Value')
    plt.legend()

    # Create subfolder for the indicator if it doesn't exist
    indicator_folder = os.path.join('../images', 'model_plot', 'Indicators', indicator)
    os.makedirs(indicator_folder, exist_ok=True)
    
    # Save the plot in the Indicators folder with dynamic model name
    plot_filename_indicator = os.path.join(indicator_folder, f'{model_name}_{country.replace(" ", "_")}_{indicator.replace(" ", "_")}.png')
    plt.savefig(plot_filename_indicator)

    # Create subfolder for the country if it doesn't exist
    country_folder = os.path.join('../images', 'model_plot', 'Countries', country)
    os.makedirs(country_folder, exist_ok=True)
    
    # Save the same plot in the Countries folder with dynamic model name
    plot_filename_country = os.path.join(country_folder, f'{model_name}_{country.replace(" ", "_")}_{indicator.replace(" ", "_")}.png')
    plt.savefig(plot_filename_country)

    plt.close()




def train_arima(train_y, test_y, country, indicator):
    # Ensure the time index is correctly set
    train_y.index = pd.date_range(start=train_y.index[0], periods=len(train_y), freq='YE')  # Assuming yearly data
    test_y.index = pd.date_range(start=test_y.index[0], periods=len(test_y), freq='YE')

    # Define paths for saving/loading best parameters
    params_dir = os.path.join("../best_params", indicator)
    params_file = os.path.join(params_dir, f"ARIMA_{country}.json")
    best_params = None

    # Check if the best parameters JSON file exists
    if os.path.exists(params_file):
        with open(params_file, "r") as f:
            best_params = json.load(f)
        print(f"Loaded best parameters for {country} - {indicator} from {params_file}: {best_params}")
    else:
        # Run the grid search if no parameters file exists
        print(f"No pre-existing parameters for {country} - {indicator}. Running grid search.")
        best_rmse = float('inf')
        best_order = None
        best_predictions = None

        # Perform grid search over ARIMA orders (p, d, q)
        for p in range(15):
            for d in range(6):
                for q in range(10):
                    try:
                        # Fit the ARIMA model
                        model = ARIMA(train_y, order=(p, d, q))
                        model_fit = model.fit()

                        # Forecast and calculate RMSE
                        predictions = model_fit.forecast(steps=len(test_y))
                        rmse = np.sqrt(mean_squared_error(test_y, predictions))

                        # Track the best parameters
                        if rmse < best_rmse:
                            best_rmse = rmse
                            best_order = (p, d, q)
                            best_predictions = predictions
                    except Exception as e:
                        print(f"Error with parameters: p={p}, d={d}, q={q}. Error: {e}")
                        continue

        # Save the best parameters to JSON for future use
        if best_order is not None:
            best_params = {
                "p": best_order[0],
                "d": best_order[1],
                "q": best_order[2]
            }
            os.makedirs(params_dir, exist_ok=True)
            with open(params_file, "w") as f:
                json.dump(best_params, f, indent=4)
            print(f"Saved best parameters for {country} - {indicator} to {params_file}: {best_params}")

    # Train the final model using the best parameters (loaded or discovered)
    if best_params is not None:
        best_order = (best_params["p"], best_params["d"], best_params["q"])
        model = ARIMA(train_y, order=best_order)
        model_fit = model.fit()

        # Forecast using the final model
        best_predictions = model_fit.forecast(steps=len(test_y))

    # Save the predictions plot
    if best_predictions is not None:
        save_plot(train_y, test_y, best_predictions, country, indicator, model_name="ARIMA")

    return np.sqrt(mean_squared_error(test_y, best_predictions)), best_predictions






with open("../countries.json", "r") as f:
    country_names = json.load(f)

with open("../indicators.json", "r") as f:
    indicators = json.load(f)

data_folder = "../data/base"
model_errors_rmse = {}
log_data = []
country_indicators_plots = {}
for country, country_code in country_names.items():
    for indicator, indicator_code in indicators.items():
        filename = f"{country.replace(' ', '_')}_{indicator.replace(' ', '_')}.parquet"
        filepath = os.path.join(data_folder, filename)
        
        if os.path.exists(filepath):
            df = pd.read_parquet(filepath)
            if 'Year' in df.columns and 'Value' in df.columns:
                df = df.set_index('Year').sort_index()
                df.index = pd.to_datetime(df.index, format='%Y')
                df = df.dropna()
                df = df.drop('Indicator', axis = 1)
                df_original = df.copy()

                
                #df = df.dropna()
                train_size = int(len(df) * 0.8)


                
                model_errors_rmse[(country, indicator)] = {}
                
                model_errors_rmse[(country, indicator)]['ARIMA'],arime_pred = train_arima(df_original.iloc[:train_size]['Value'], 
                                                                               df_original.iloc[train_size:]['Value'], 
                                                                               country,indicator)

                
                sorted_models = sorted(model_errors_rmse[(country, indicator)].items(), key=lambda x: x[1])
                log_current_data = []
                for rank, (model_name, rmse) in enumerate(sorted_models, start=1):
                    log_data.append([country, indicator, model_name, rmse, rank])
                    log_current_data.append([country, indicator, model_name, rmse, rank])


from datetime import datetime
model ="ARIMA"
log_dir = f"../data/{model}_train"
os.makedirs(log_dir, exist_ok=True)

timestamp = datetime.now().strftime("%Y-%m-%d--%H-%M")
log_filename = os.path.join(log_dir, f"{model}_error_log_{timestamp}.csv")

log_df = pd.DataFrame(log_data, columns=['Country', 'Indicator', 'Model', 'RMSE', 'Rank'])
log_df.to_csv(log_filename, index=False)


### TESTY

In [None]:

import os
import json
import pandas as pd
import numpy as np
import xgboost as xgb
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_squared_error
from sklearn.feature_selection import RFE
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt

def save_plot(train, test_y, predictions, country, indicator, model_name):
    """Function to save the plot in both Indicators and Countries folders."""

    model_colors = {
    "ARIMA": "blue",
    "Holt_Winters": "yellow",
    "LSTM": "black",
    "XGBoost": "pink",
    "Prophet": "brown"
}

    # Plotting predicted vs actual
    plt.figure(figsize=(10, 6))
    if model_name == "Prophet":
        plt.plot(train['ds'], train['y'], label='Train Data', color='green', linestyle='--')
        plt.plot(test_y['ds'], test_y['y'], label='Actual', color='red', linestyle='--')
        plt.plot(test_y['ds'], predictions, label=f'Predicted({model_name})', color=f'{model_colors["Prophet"]}', 
                 linestyle='-', marker='o')
    else:
        plt.plot(train.index, train, label='Train Data', color='green', linestyle='--')
        plt.plot(test_y.index, test_y, label='Actual', color='red', linestyle='--')
        plt.plot(test_y.index, predictions, label=f'Predicted({model_name})', color=f'{model_colors[model_name]}', 
                 linestyle='-', marker='o')
    

    
    plt.title(f'Predicted({model_name}) vs Actual for {country} - {indicator}')
    plt.xlabel('Year')
    plt.ylabel('Value')
    plt.legend()

    # Create subfolder for the indicator if it doesn't exist
    indicator_folder = os.path.join('../images', 'model_plot', 'Indicators', indicator)
    os.makedirs(indicator_folder, exist_ok=True)
    
    # Save the plot in the Indicators folder with dynamic model name
    plot_filename_indicator = os.path.join(indicator_folder, f'{model_name}_{country.replace(" ", "_")}_{indicator.replace(" ", "_")}.png')
    plt.savefig(plot_filename_indicator)

    # Create subfolder for the country if it doesn't exist
    country_folder = os.path.join('../images', 'model_plot', 'Countries', country)
    os.makedirs(country_folder, exist_ok=True)
    
    # Save the same plot in the Countries folder with dynamic model name
    plot_filename_country = os.path.join(country_folder, f'{model_name}_{country.replace(" ", "_")}_{indicator.replace(" ", "_")}.png')
    plt.savefig(plot_filename_country)

    plt.close()




def train_arima(train_y, test_y, country, indicator):
    # Ensure the time index is correctly set
    train_y.index = pd.date_range(start=train_y.index[0], periods=len(train_y), freq='YE')  # Assuming yearly data
    test_y.index = pd.date_range(start=test_y.index[0], periods=len(test_y), freq='YE')

    # Define paths for saving/loading best parameters
    params_dir = os.path.join("../best_params", indicator)
    params_file = os.path.join(params_dir, f"ARIMA_{country}.json")
    best_params = None

    # Check if the best parameters JSON file exists
    if os.path.exists(params_file):
        with open(params_file, "r") as f:
            best_params = json.load(f)
        print(f"Loaded best parameters for {country} - {indicator} from {params_file}: {best_params}")
    else:
        # Run the grid search if no parameters file exists
        print(f"No pre-existing parameters for {country} - {indicator}. Running grid search.")
        best_rmse = float('inf')
        best_order = None
        best_predictions = None

        # Perform grid search over ARIMA orders (p, d, q)
        for p in range(15):
            for d in range(6):
                for q in range(10):
                    try:
                        # Fit the ARIMA model
                        model = ARIMA(train_y, order=(p, d, q))
                        model_fit = model.fit()

                        # Forecast and calculate RMSE
                        predictions = model_fit.forecast(steps=len(test_y))
                        rmse = np.sqrt(mean_squared_error(test_y, predictions))

                        # Track the best parameters
                        if rmse < best_rmse:
                            best_rmse = rmse
                            best_order = (p, d, q)
                            best_predictions = predictions
                    except Exception as e:
                        print(f"Error with parameters: p={p}, d={d}, q={q}. Error: {e}")
                        continue

        # Save the best parameters to JSON for future use
        if best_order is not None:
            best_params = {
                "p": best_order[0],
                "d": best_order[1],
                "q": best_order[2]
            }
            os.makedirs(params_dir, exist_ok=True)
            with open(params_file, "w") as f:
                json.dump(best_params, f, indent=4)
            print(f"Saved best parameters for {country} - {indicator} to {params_file}: {best_params}")

    # Train the final model using the best parameters (loaded or discovered)
    if best_params is not None:
        best_order = (best_params["p"], best_params["d"], best_params["q"])
        model = ARIMA(train_y, order=best_order)
        model_fit = model.fit()

        # Forecast using the final model
        best_predictions = model_fit.forecast(steps=len(test_y))

    # Save the predictions plot
    if best_predictions is not None:
        save_plot(train_y, test_y, best_predictions, country, indicator, model_name="ARIMA")

    return np.sqrt(mean_squared_error(test_y, best_predictions)), best_predictions






with open("../countries.json", "r") as f:
    country_names = json.load(f)

with open("../indicators.json", "r") as f:
    indicators = json.load(f)

data_folder = "../data/base"
model_errors_rmse = {}
log_data = []
country_indicators_plots = {}
for country, country_code in country_names.items():
    for indicator, indicator_code in indicators.items():
        filename = f"{country.replace(' ', '_')}_{indicator.replace(' ', '_')}.parquet"
        filepath = os.path.join(data_folder, filename)
        
        if os.path.exists(filepath):
            df = pd.read_parquet(filepath)
            if 'Year' in df.columns and 'Value' in df.columns:
                df = df.set_index('Year').sort_index()
                df.index = pd.to_datetime(df.index, format='%Y')
                df = df.dropna()
                df = df.drop('Indicator', axis = 1)
                df_original = df.copy()

                
                #df = df.dropna()
                train_size = int(len(df) * 0.8)


                
                model_errors_rmse[(country, indicator)] = {}
                
                model_errors_rmse[(country, indicator)]['ARIMA'],arime_pred = train_arima(df_original.iloc[:train_size]['Value'], 
                                                                               df_original.iloc[train_size:]['Value'], 
                                                                               country,indicator)

                
                sorted_models = sorted(model_errors_rmse[(country, indicator)].items(), key=lambda x: x[1])
                log_current_data = []
                for rank, (model_name, rmse) in enumerate(sorted_models, start=1):
                    log_data.append([country, indicator, model_name, rmse, rank])
                    log_current_data.append([country, indicator, model_name, rmse, rank])


from datetime import datetime
model ="ARIMA"
log_dir = f"../data/{model}_train"
os.makedirs(log_dir, exist_ok=True)

timestamp = datetime.now().strftime("%Y-%m-%d--%H-%M")
log_filename = os.path.join(log_dir, f"{model}_error_log_{timestamp}.csv")

log_df = pd.DataFrame(log_data, columns=['Country', 'Indicator', 'Model', 'RMSE', 'Rank'])
log_df.to_csv(log_filename, index=False)


Loaded best parameters for Italy - GDP growth (annual %) from ../best_params\GDP growth (annual %)\ARIMA_Italy.json: {'p': 13, 'd': 0, 'q': 6}


  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  arime_pred.index = arime_pred.index.to_period('A').to_timestamp()


### OLD VERSIONS

In [None]:
# ARIMA_error_log_2025-02-16--05-58
import os
import json
import pandas as pd
import numpy as np
import xgboost as xgb
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_squared_error
from sklearn.feature_selection import RFE
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt

def save_plot(train, test_y, predictions, country, indicator, model_name):
    """Function to save the plot in both Indicators and Countries folders."""

    model_colors = {
    "ARIMA": "blue",
    "Holt_Winters": "yellow",
    "LSTM": "black",
    "XGBoost": "pink",
    "Prophet": "brown"
}

    # Plotting predicted vs actual
    plt.figure(figsize=(10, 6))
    if model_name == "Prophet":
        plt.plot(train['ds'], train['y'], label='Train Data', color='green', linestyle='--')
        plt.plot(test_y['ds'], test_y['y'], label='Actual', color='red', linestyle='--')
        plt.plot(test_y['ds'], predictions, label=f'Predicted({model_name})', color=f'{model_colors["Prophet"]}', 
                 linestyle='-', marker='o')
    else:
        plt.plot(train.index, train, label='Train Data', color='green', linestyle='--')
        plt.plot(test_y.index, test_y, label='Actual', color='red', linestyle='--')
        plt.plot(test_y.index, predictions, label=f'Predicted({model_name})', color=f'{model_colors[model_name]}', 
                 linestyle='-', marker='o')
    

    
    plt.title(f'Predicted({model_name}) vs Actual for {country} - {indicator}')
    plt.xlabel('Year')
    plt.ylabel('Value')
    plt.legend()

    # Create subfolder for the indicator if it doesn't exist
    indicator_folder = os.path.join('../images', 'model_plot', 'Indicators', indicator)
    os.makedirs(indicator_folder, exist_ok=True)
    
    # Save the plot in the Indicators folder with dynamic model name
    plot_filename_indicator = os.path.join(indicator_folder, f'{model_name}_{country.replace(" ", "_")}_{indicator.replace(" ", "_")}.png')
    plt.savefig(plot_filename_indicator)

    # Create subfolder for the country if it doesn't exist
    country_folder = os.path.join('../images', 'model_plot', 'Countries', country)
    os.makedirs(country_folder, exist_ok=True)
    
    # Save the same plot in the Countries folder with dynamic model name
    plot_filename_country = os.path.join(country_folder, f'{model_name}_{country.replace(" ", "_")}_{indicator.replace(" ", "_")}.png')
    plt.savefig(plot_filename_country)

    plt.close()




def train_arima(train_y, test_y, country, indicator):
    # Ensure the time index is correctly set
    train_y.index = pd.date_range(start=train_y.index[0], periods=len(train_y), freq='YE')  # Assuming yearly data
    test_y.index = pd.date_range(start=test_y.index[0], periods=len(test_y), freq='YE')

    # Define paths for saving/loading best parameters
    params_dir = os.path.join("../best_params", indicator)
    params_file = os.path.join(params_dir, f"ARIMA_{country}.json")
    best_params = None

    # Check if the best parameters JSON file exists
    if os.path.exists(params_file):
        with open(params_file, "r") as f:
            best_params = json.load(f)
        print(f"Loaded best parameters for {country} - {indicator} from {params_file}: {best_params}")
    else:
        # Run the grid search if no parameters file exists
        print(f"No pre-existing parameters for {country} - {indicator}. Running grid search.")
        best_rmse = float('inf')
        best_order = None
        best_predictions = None

        # Perform grid search over ARIMA orders (p, d, q)
        for p in range(15):
            for d in range(6):
                for q in range(10):
                    try:
                        # Fit the ARIMA model
                        model = ARIMA(train_y, order=(p, d, q))
                        model_fit = model.fit()

                        # Forecast and calculate RMSE
                        predictions = model_fit.forecast(steps=len(test_y))
                        rmse = np.sqrt(mean_squared_error(test_y, predictions))

                        # Track the best parameters
                        if rmse < best_rmse:
                            best_rmse = rmse
                            best_order = (p, d, q)
                            best_predictions = predictions
                    except Exception as e:
                        print(f"Error with parameters: p={p}, d={d}, q={q}. Error: {e}")
                        continue

        # Save the best parameters to JSON for future use
        if best_order is not None:
            best_params = {
                "p": best_order[0],
                "d": best_order[1],
                "q": best_order[2]
            }
            os.makedirs(params_dir, exist_ok=True)
            with open(params_file, "w") as f:
                json.dump(best_params, f, indent=4)
            print(f"Saved best parameters for {country} - {indicator} to {params_file}: {best_params}")

    # Train the final model using the best parameters (loaded or discovered)
    if best_params is not None:
        best_order = (best_params["p"], best_params["d"], best_params["q"])
        model = ARIMA(train_y, order=best_order)
        model_fit = model.fit()

        # Forecast using the final model
        best_predictions = model_fit.forecast(steps=len(test_y))

    # Save the predictions plot
    if best_predictions is not None:
        save_plot(train_y, test_y, best_predictions, country, indicator, model_name="ARIMA")

    return np.sqrt(mean_squared_error(test_y, best_predictions)), best_predictions






with open("../countries.json", "r") as f:
    country_names = json.load(f)

with open("../indicators.json", "r") as f:
    indicators = json.load(f)

data_folder = "../data/base"
model_errors_rmse = {}
log_data = []
country_indicators_plots = {}
for country, country_code in country_names.items():
    for indicator, indicator_code in indicators.items():
        filename = f"{country.replace(' ', '_')}_{indicator.replace(' ', '_')}.parquet"
        filepath = os.path.join(data_folder, filename)
        
        if os.path.exists(filepath):
            df = pd.read_parquet(filepath)
            if 'Year' in df.columns and 'Value' in df.columns:
                df = df.set_index('Year').sort_index()
                df.index = pd.to_datetime(df.index, format='%Y')
                df = df.dropna()
                df = df.drop('Indicator', axis = 1)
                df_original = df.copy()

                
                #df = df.dropna()
                train_size = int(len(df) * 0.8)


                
                model_errors_rmse[(country, indicator)] = {}
                
                model_errors_rmse[(country, indicator)]['ARIMA'],arime_pred = train_arima(df_original.iloc[:train_size]['Value'], 
                                                                               df_original.iloc[train_size:]['Value'], 
                                                                               country,indicator)

                
                sorted_models = sorted(model_errors_rmse[(country, indicator)].items(), key=lambda x: x[1])
                log_current_data = []
                for rank, (model_name, rmse) in enumerate(sorted_models, start=1):
                    log_data.append([country, indicator, model_name, rmse, rank])
                    log_current_data.append([country, indicator, model_name, rmse, rank])


from datetime import datetime
model ="ARIMA"
log_dir = f"../data/{model}_train"
os.makedirs(log_dir, exist_ok=True)

timestamp = datetime.now().strftime("%Y-%m-%d--%H-%M")
log_filename = os.path.join(log_dir, f"{model}_error_log_{timestamp}.csv")

log_df = pd.DataFrame(log_data, columns=['Country', 'Indicator', 'Model', 'RMSE', 'Rank'])
log_df.to_csv(log_filename, index=False)


In [None]:
#ARIMA_error_log_2025-02-11--22-03
import os
import json
import pandas as pd
import numpy as np
import xgboost as xgb
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_squared_error
from sklearn.feature_selection import RFE
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt

def save_plot(train, test_y, predictions, country, indicator, model_name):
    """Function to save the plot in both Indicators and Countries folders."""

    model_colors = {
    "ARIMA": "blue",
    "Holt_Winters": "yellow",
    "LSTM": "black",
    "XGBoost": "pink",
    "Prophet": "brown"
}

    # Plotting predicted vs actual
    plt.figure(figsize=(10, 6))
    if model_name == "Prophet":
        plt.plot(train['ds'], train['y'], label='Train Data', color='green', linestyle='--')
        plt.plot(test_y['ds'], test_y['y'], label='Actual', color='red', linestyle='--')
        plt.plot(test_y['ds'], predictions, label=f'Predicted({model_name})', color=f'{model_colors["Prophet"]}', 
                 linestyle='-', marker='o')
    else:
        plt.plot(train.index, train, label='Train Data', color='green', linestyle='--')
        plt.plot(test_y.index, test_y, label='Actual', color='red', linestyle='--')
        plt.plot(test_y.index, predictions, label=f'Predicted({model_name})', color=f'{model_colors[model_name]}', 
                 linestyle='-', marker='o')
    

    
    plt.title(f'Predicted({model_name}) vs Actual for {country} - {indicator}')
    plt.xlabel('Year')
    plt.ylabel('Value')
    plt.legend()

    # Create subfolder for the indicator if it doesn't exist
    indicator_folder = os.path.join('../images', 'model_plot', 'Indicators', indicator)
    os.makedirs(indicator_folder, exist_ok=True)
    
    # Save the plot in the Indicators folder with dynamic model name
    plot_filename_indicator = os.path.join(indicator_folder, f'{model_name}_{country.replace(" ", "_")}_{indicator.replace(" ", "_")}.png')
    plt.savefig(plot_filename_indicator)

    # Create subfolder for the country if it doesn't exist
    country_folder = os.path.join('../images', 'model_plot', 'Countries', country)
    os.makedirs(country_folder, exist_ok=True)
    
    # Save the same plot in the Countries folder with dynamic model name
    plot_filename_country = os.path.join(country_folder, f'{model_name}_{country.replace(" ", "_")}_{indicator.replace(" ", "_")}.png')
    plt.savefig(plot_filename_country)

    plt.close()




def train_arima(train_y, test_y, country, indicator):
    best_rmse = float('inf')
    best_order = None
    best_predictions = None

    for p in range(4):
        for d in range(4):
            for q in range(4):
                try:
                    # Fit the ARIMA model
                    model = ARIMA(train_y, order=(p, d, q))
                    model_fit = model.fit()
                    
                    predictions = model_fit.forecast(steps=len(test_y))
                    rmse = np.sqrt(mean_squared_error(test_y, predictions))

                    if rmse < best_rmse:
                        best_rmse = rmse
                        best_order = (p, d, q)
                        best_predictions = predictions
                except Exception as e:
                    continue

    # After finding the best model, save the predictions plot
    if best_predictions is not None:
        save_plot(train_y, test_y, best_predictions, country, indicator, model_name="ARIMA")

    return best_rmse , best_predictions





with open("../countries.json", "r") as f:
    country_names = json.load(f)

with open("../indicators.json", "r") as f:
    indicators = json.load(f)

data_folder = "../data/base"
model_errors_rmse = {}
log_data = []
country_indicators_plots = {}
for country, country_code in country_names.items():
    for indicator, indicator_code in indicators.items():
        filename = f"{country.replace(' ', '_')}_{indicator.replace(' ', '_')}.parquet"
        filepath = os.path.join(data_folder, filename)
        
        if os.path.exists(filepath):
            df = pd.read_parquet(filepath)
            if 'Year' in df.columns and 'Value' in df.columns:
                df = df.set_index('Year').sort_index()
                df.index = pd.to_datetime(df.index, format='%Y')
                df = df.dropna()
                df = df.drop('Indicator', axis = 1)
                df_original = df.copy()

                
                #df = df.dropna()
                train_size = int(len(df) * 0.8)


                
                model_errors_rmse[(country, indicator)] = {}
                
                model_errors_rmse[(country, indicator)]['ARIMA'],arime_pred = train_arima(df_original.iloc[:train_size]['Value'], 
                                                                               df_original.iloc[train_size:]['Value'], 
                                                                               country,indicator)

                
                sorted_models = sorted(model_errors_rmse[(country, indicator)].items(), key=lambda x: x[1])
                log_current_data = []
                for rank, (model_name, rmse) in enumerate(sorted_models, start=1):
                    log_data.append([country, indicator, model_name, rmse, rank])
                    log_current_data.append([country, indicator, model_name, rmse, rank])


from datetime import datetime
model ="ARIMA"
log_dir = f"../data/{model}_train"
os.makedirs(log_dir, exist_ok=True)

timestamp = datetime.now().strftime("%Y-%m-%d--%H-%M")
log_filename = os.path.join(log_dir, f"{model}_error_log_{timestamp}.csv")

log_df = pd.DataFrame(log_data, columns=['Country', 'Indicator', 'Model', 'RMSE', 'Rank'])
log_df.to_csv(log_filename, index=False)
